[Ocfs2-tools-commits] zab commits r351 - in trunk/fsck.ocfs2: . include

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Oct 28 19:46:40 CDT 2004


Author: zab
Date: 2004-10-28 19:46:38 -0500 (Thu, 28 Oct 2004)
New Revision: 351

Modified:
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/include/util.h
   trunk/fsck.ocfs2/pass0.c
   trunk/fsck.ocfs2/pass4.c
Log:
strengthen inode allocator checking
                                                                                
o check and fix more blkno fields
o more forcefully fix descs into chains instead of orphaning them
o write back modified inodes and descs
o total up chain bits in inode
o properly fix up count/free in the chain rec


Modified: trunk/fsck.ocfs2/fsck.c
===================================================================
--- trunk/fsck.ocfs2/fsck.c	2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/fsck.c	2004-10-29 00:46:38 UTC (rev 351)
@@ -25,14 +25,6 @@
  * Roughly o2fsck performs the following operations.  Each pass' file has
  * more details.
  * 
- * - replay the journals if needed
- * 	- walk the journal extents looking for simple inconsistencies
- * 		- loops, doubly referenced blocks
- * 		- need this code later anyway for verifying files
- * 		  and i_clusters/i_size
- * 	- prompt to proceed if errors (mention backup superblock)
- * 		- ignore entirely or partially replay?
- *
  * - pass0: clean up the inode allocators
  * 	- kill loops, chains can't share groups
  * 	- move local allocs back to the global or something?

Modified: trunk/fsck.ocfs2/include/util.h
===================================================================
--- trunk/fsck.ocfs2/include/util.h	2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/include/util.h	2004-10-29 00:46:38 UTC (rev 351)
@@ -44,6 +44,12 @@
 	exit(FSCK_ERROR);						\
 } while (0)
 
+#define maybe_fatal(errcode, fmt...) do {				\
+	errcode_t _err = errcode;					\
+	if (_err == OCFS2_ET_NO_MEMORY || _err == OCFS2_ET_IO)		\
+		fatal_error(_err, fmt);					\
+} while (0)
+
 void o2fsck_write_inode(ocfs2_filesys *fs, uint64_t blkno, ocfs2_dinode *di);
 
 #endif /* __O2FSCK_UTIL_H__ */

Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c	2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/pass0.c	2004-10-29 00:46:38 UTC (rev 351)
@@ -21,12 +21,19 @@
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
- * Pass 0 verifies that the inode suballocators can be iterated over by
- * latter passes without risk of running into corruption.  Usually our passes
- * are analagous to ext{2,3} but ocfs2's allocation is more dynamic.  As 
- * pass 0 it only makes sure that basic iteration in pass 1 will work.  After
- * building a set of active inodes Pass 1 will call back into us to sync the
- * bitmaps with the active inodes.
+ * Pass 0 verifies that the inode suballocators can be iterated over by later
+ * passes without risk of running into corruption.  This is so the passes can
+ * build up state without having to worry about tearing it down half way
+ * through to clean up the suballocators.  For now fsck treats failure to find
+ * and verify the suballocator inodes themselves as fatal.  It will only clean
+ * up the data they point to.
+ *
+ * pass0 updates group descriptor chains on disk.
+ *
+ * XXX
+ * 	track used blocks that iteration won't see?
+ * 	verify more inode fields?
+ * 	use prompt to mark soft errors
  */
 
 #include <string.h>
@@ -55,13 +62,16 @@
 			    struct chain_state *cs, ocfs2_group_desc *bg,
 			    uint64_t blkno)
 {
+	int changed = 0;
+
 	verbosef("checking desc at %"PRIu64"; blkno %"PRIu64" size %u bits %u "
 		 "free_bits %u chain %u generation %u\n", blkno, bg->bg_blkno,
 		 bg->bg_size, bg->bg_bits, bg->bg_free_bits_count, 
 		 bg->bg_chain, bg->bg_generation);
 
-	/* We'll only consider this a valid descriptor if its signature,
-	 * parent inode, and generation all check out */
+	/* Once we think it's a valid group desc we aggressively tie it
+	 * into the inode that pointed to it for fear of losing any
+	 * descriptors. */
 	if (memcmp(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE,
 		   strlen(OCFS2_GROUP_DESC_SIGNATURE))) {
 		printf("Group descriptor at block %"PRIu64" has an invalid "
@@ -72,39 +82,66 @@
 	/* XXX maybe for advanced pain we could check to see if these 
 	 * kinds of descs have valid generations for the inodes they
 	 * reference */
-	if (bg->bg_parent_dinode != di->i_blkno) {
-		printf("Group descriptor at block %"PRIu64" is referenced by "
-			"inode %"PRIu64" but thinks its parent inode is "
-			"%"PRIu64"\n", blkno, di->i_blkno, 
-			bg->bg_parent_dinode);
-		return -1;
+	if ((bg->bg_parent_dinode != di->i_blkno) &&
+	    prompt(ost, PY, "Group descriptor at block %"PRIu64" is "
+		   "referenced by inode %"PRIu64" but thinks its parent inode "
+		   "is %"PRIu64".  Fix the descriptor's parent inode?", blkno,
+		   di->i_blkno, bg->bg_parent_dinode)) {
+		bg->bg_parent_dinode = di->i_blkno;
+		changed = 1;
 	}
 
-	if (bg->bg_generation != di->i_generation) {
-		printf("Group descriptor at block %"PRIu64" is referenced by "
-			"inode %"PRIu64" who has a generation of %u, but "
-			"the descriptor has a generation of %u\n",blkno, 
-			di->i_blkno, di->i_generation, bg->bg_generation);
-		return -1;
+	if ((bg->bg_generation != di->i_generation) &&
+	    prompt(ost, PY, "Group descriptor at block %"PRIu64" is "
+		   "referenced by inode %"PRIu64" who has a generation of "
+		   "%u, but the descriptor has a generation of %u.  Update "
+		   "the descriptor's generation?", blkno, di->i_blkno,
+		   di->i_generation, bg->bg_generation)) {
+		bg->bg_generation = di->i_generation;
+		changed = 1;
 	}
 
-	/* XXX check bg_blkno */
+	if ((bg->bg_blkno != blkno) &&
+	    prompt(ost, PY, "Group descriptor read from block %"PRIu64" "
+		   "claims to be located at block %"PRIu64".  Update its "
+		   "recorded block location?", blkno, di->i_blkno)) {
+		bg->bg_blkno = blkno;
+		changed = 1;
+	}
 
-	/* XXX check bg_chain */
+	if ((bg->bg_chain != cs->cs_chain_no) &&
+	    prompt(ost, PY, "Group descriptor at block %"PRIu64" was "
+		   "found in chain %u but it claims to be in chain %u. Update "
+		   "the descriptor's recorded chain?", blkno, cs->cs_chain_no,
+		   bg->bg_chain)) {
+		bg->bg_chain = cs->cs_chain_no;
+		changed = 1;
+	}
 
-	/* XXX check _chain and worry about cpg/bpc lining up with bg_bits. 
-	 * ah, bpc/cpg changes between the global bitmap and inode allocators,
-	 * not within an inode allocator.  and its variable for clustersize/
-	 * blocksize. */
+	if ((bg->bg_free_bits_count > bg->bg_bits) &&
+	    prompt(ost, PY, "Group descriptor at block %"PRIu64" claims to "
+		   "have %u free bits which is more than its %u total bits. "
+		   "Drop its free bit count down to the total?", blkno,
+		   bg->bg_free_bits_count, bg->bg_bits)) {
+		bg->bg_free_bits_count = bg->bg_bits;
+		changed = 1;
+	}
 
-#if 0
-	/* XXX hmm, do we care about these checks?  if we want to be able
-	 * to use the allocator, I think so.  This means walking them and
-	 * fixing up the bitmaps.  maybe we'll fix them up after we've
-	 * iterated through inodes but before we start allocating? */
-	if (bg->bg_bits != (u32)chain->cl_cpg * (u32)chain->cl_bpc) {
+	/* XXX check bg_bits vs cpg/bpc. */
+
+	if (changed) {
+		errcode_t ret;
+		/* XXX maybe a helper.. */
+		ret = ocfs2_write_group_desc(ost->ost_fs, bg->bg_blkno,
+					     (char *)bg);
+		if (ret) {
+			fatal_error(ret, "while writing a group descriptor to "
+				    "block %"PRIu64" somewhere in chain %d in "
+				    "group allocator inode %"PRIu64, 
+				    bg->bg_blkno, cs->cs_chain_no,
+				    di->i_blkno);
+		}
 	}
-#endif
 
 	cs->cs_total_bits += bg->bg_bits;
 	cs->cs_free_bits += bg->bg_free_bits_count;
@@ -112,12 +149,14 @@
 	return 0;
 }
 
+/* returns non-zero if the chain_rec was updated */
 static int check_chain(o2fsck_state *ost, ocfs2_dinode *di,
 		       struct chain_state *cs, ocfs2_chain_rec *chain,
 		       char *buf1, char *buf2)
 {
 	ocfs2_group_desc *bg1 = (ocfs2_group_desc *)buf1;
 	ocfs2_group_desc *bg2 = (ocfs2_group_desc *)buf2;
+	ocfs2_group_desc *write_bg = NULL;
 	uint64_t blkno = chain->c_blkno;
 	errcode_t ret;
 	int rc;
@@ -125,84 +164,121 @@
 	verbosef("free %u total %u blkno %"PRIu64"\n", chain->c_free,
 		 chain->c_total, chain->c_blkno);
 
-	if (ocfs2_block_out_of_range(ost->ost_fs, blkno))
+	if (chain->c_blkno == 0)
 		return 0;
 
+	if (ocfs2_block_out_of_range(ost->ost_fs, blkno)) {
+		if (!prompt(ost, PY, "Chain record %d in group allocator inode "
+			    "%"PRIu64" points to block %"PRIu64" which is out "
+ 			    "of range.  fsck can't continue without deleting "
+			    "this chain.  Delete it?", cs->cs_chain_no,
+			    di->i_blkno, blkno)) 
+			exit(FSCK_ERROR);
+
+		chain->c_blkno = 0;
+		return 1;
+	}
+
 	ret = ocfs2_read_group_desc(ost->ost_fs, blkno, buf1);
 	if (ret) {
-		/* trans or persis io error hmm. */
-		rc = -1;
+		maybe_fatal(ret, "while reading a group descriptor from block "
+			    "%"PRIu64" as pointed to by chain record %d in "
+			    "group allocator inode %"PRIu64, blkno, 
+			    cs->cs_chain_no, di->i_blkno);
+		if (!prompt(ost, PY, "fsck can't continue without deleting "
+		    "this chain.  Delete it?"))
+			exit(FSCK_ERROR);
+
+		chain->c_blkno = 0;
+		return 1;
 	}
 
 	rc = check_group_desc(ost, di, cs, bg1, blkno);
-	if (rc < 0 && prompt(ost, PY, "Chain %d in group allocator inode "
+	if (rc < 0) {
+		if (!prompt(ost, PY, "Chain %d in group allocator inode "
 			     "%"PRIu64" points to an invalid descriptor block "
-			     "at %"PRIu64".  Truncate this chain by removing "
-			     " this reference?", cs->cs_chain_no, di->i_blkno,
-			     blkno)) {
-		/* this essentially frees this chain. */
-		chain->c_free = 0;
-		chain->c_total = 0;
+			     "at %"PRIu64".  fsck can't continue without "
+			     "deleting this chain.  Delete it?",
+			     cs->cs_chain_no, di->i_blkno, blkno))
+			exit(FSCK_ERROR);
+
 		chain->c_blkno = 0;
 		return 1;
 	}
-	if (rc > 1) {
-		/* XXX write */
-	}
 
+	/* read in each group desc and check it.  if we see an error we try
+	 * to truncate the list after the last good desc */
 	while (bg1->bg_next_group) {
 		ret = ocfs2_read_group_desc(ost->ost_fs, bg1->bg_next_group,
 					    buf2);
 		if (ret) {
-			/* trans or persis io error hmm. */
-			rc = -1;
+			maybe_fatal(ret, "while reading a group descriptor "
+				    "from block %"PRIu64" as pointed to by "
+				    "chain record %d in group allocator inode "
+				    "%"PRIu64, bg1->bg_next_group, 
+				    cs->cs_chain_no, di->i_blkno);
+		} else {
+			rc = check_group_desc(ost, di, cs, bg2, 
+					      bg1->bg_next_group);
+			if (rc == 0) {
+				memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
+				continue;
+			}
+			/* fall through if check_group_desc fails */
 		}
 
-		rc = check_group_desc(ost, di, cs, bg2, bg1->bg_next_group);
-		if (rc > 1) {
-			/* XXX write */
-		}
+		if (!prompt(ost, PY, "fsck can't continue without truncating "
+			    "this chain by removing the link to the offending "
+			    "block. Truncate it?"))
+			exit(FSCK_ERROR);
 
-		if (rc == 0) {
-			blkno = bg1->bg_next_group;
-			memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
+		bg1->bg_next_group = 0;
+		write_bg = bg1;
+		break;
+	}
 
-			continue;
+	if (write_bg) {
+		ret = ocfs2_write_group_desc(ost->ost_fs, write_bg->bg_blkno,
+					     (char *)write_bg);
+		if (ret) {
+			fatal_error(ret, "while writing a group descriptor to "
+				    "block %"PRIu64" somewhere in chain %d in "
+				    "group allocator inode %"PRIu64, 
+				    write_bg->bg_blkno, cs->cs_chain_no,
+				    di->i_blkno);
 		}
-
-		if (prompt(ost, PY, "Desc %"PRIu64" points to an invalid "
-			   "descriptor at block %"PRIu64".  Truncate this "
-			   "chain by removing this reference?", blkno,
-			   bg1->bg_next_group)) {
-			bg1->bg_next_group = 0;
-			/* XXX write */
-			return 1;
-		}
 	}
 
+	/* XXX exit if it isn't updated? */
 	if (cs->cs_total_bits != chain->c_total ||
 	    cs->cs_free_bits != chain->c_free) {
 		if (prompt(ost, PY, "Chain %d in allocator inode %"PRIu64" "
 			   "has %u bits marked free out of %d total bits "
 			   "but the block groups in the chain have %u "
-			   "recorded out of %u total.  Fix this by updating "
+			   "free out of %u total.  Fix this by updating "
 			   "the chain record?", cs->cs_chain_no, di->i_blkno,
 			   chain->c_free, chain->c_total, cs->cs_free_bits,
 			   cs->cs_total_bits)) {
 			chain->c_total = cs->cs_total_bits;
 			chain->c_free = cs->cs_free_bits;
+			return 1;
 		}
 	}
 
 	return 0;
 }
 
+/* If this returns 0 then the inode allocator had better be amenable to
+ * iteration. */
 static errcode_t verify_inode_alloc(o2fsck_state *ost, ocfs2_dinode *di,
 				    char *buf1, char *buf2)
 {
 	struct chain_state cs = {0, };
 	ocfs2_chain_list *cl;
-	uint16_t i, max_chain_rec;
+	uint16_t i, max_count;
+	ocfs2_chain_rec *cr;
+	uint32_t free = 0, total = 0;
+	int changed = 0;
 	errcode_t ret;
 
 	if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
@@ -231,33 +307,88 @@
 
 	verbosef("cl count %u next %u\n", cl->cl_count, cl->cl_next_free_rec);
 
-	max_chain_rec = (ost->ost_fs->fs_blocksize - 
-			offsetof(ocfs2_dinode, id2.i_chain.cl_recs)) / 
-				sizeof(ocfs2_chain_rec);
+	max_count = ocfs2_chain_recs_per_inode(ost->ost_fs->fs_blocksize);
 
-	if (cl->cl_next_free_rec > max_chain_rec) {
-		if (prompt(ost, PY, "Allocator inode %"PRIu64" claims %u "
-			   "as the next free chain record, but it can only "
-			   "have %u total.  Set the next record value?",
-			   di->i_blkno, cl->cl_next_free_rec, max_chain_rec)) {
-			cl->cl_next_free_rec = max_chain_rec;
-		}
-	} else
-		max_chain_rec = cl->cl_next_free_rec;
+	if (cl->cl_count > max_count) {
+		if (!prompt(ost, PY, "Allocator inode %"PRIu64" claims to "
+			    "have %u chains, but the maximum is %u. Fix the "
+			    "inode's count and keep checking?", di->i_blkno,
+			    cl->cl_count, max_count))
+			exit(FSCK_ERROR);
 
-	for (i = 0; i < max_chain_rec; i++) {
-		/* clear it for each run */
+		cl->cl_count = max_count;
+		changed = 1;
+	}
+
+	if (cl->cl_next_free_rec > cl->cl_count) {
+		if (!prompt(ost, PY, "Allocator inode %"PRIu64" claims %u "
+			   "as the next free chain record, but the inode only "
+			   "has %u chains. Clamp the next record value and "
+			   "keep checking?",
+			   di->i_blkno, cl->cl_next_free_rec, cl->cl_count))
+			exit(FSCK_ERROR);
+
+		cl->cl_next_free_rec = cl->cl_count;
+		changed = 1;
+	}
+
+	for (i = 0; i < cl->cl_next_free_rec; i++) {
+		cr = &cl->cl_recs[i];
+
+		/* reset for each run */
 		cs = (struct chain_state) {
 			.cs_chain_no = i,
 		};
-		ret = check_chain(ost, di, &cs, &cl->cl_recs[i], buf1, buf2);
-		/* XXX do things :) */
+		changed |= check_chain(ost, di, &cs, cr, buf1, buf2);
+
+		/* replace this deleted chain with the last valid one, if
+		 * present, and this 'i' again.  If there isn't one to move
+		 * in place the loop will terminate */
+		if (cr->c_blkno == 0) {
+			if (i < (cl->cl_next_free_rec - 1)) {
+				cl->cl_next_free_rec--;
+				*cr = cl->cl_recs[cl->cl_next_free_rec];
+				changed = 1;
+				i--;
+			}
+			continue;
+		}
+
+		free += cs.cs_free_bits;
+		total += cs.cs_total_bits;
 	}
 
+	if (di->id1.bitmap1.i_total != total || 
+	    (di->id1.bitmap1.i_used != total - free)) {
+		if (prompt(ost, PY, "Allocator inode %"PRIu64" has %u bits "
+			   "marked used out of %d total bits but the chains "
+			   "have %u used out of %u total.  Fix this by "
+			   "updating the inode counts?", di->i_blkno,
+			   di->id1.bitmap1.i_used, di->id1.bitmap1.i_total,
+			   total - free, total)) {
+			   di->id1.bitmap1.i_used = total - free;
+			   di->id1.bitmap1.i_total = total;
+			   changed = 1;
+		}
+	}
+
+	if (changed) {
+		/* if we're writing it anyway, we might as well clear the
+		 * unused chain entries */ 
+		if (cl->cl_next_free_rec != max_count)
+			memset(&cl->cl_recs[cl->cl_next_free_rec], 0,
+			       (max_count - cl->cl_next_free_rec) * 
+			       sizeof(ocfs2_chain_rec));
+
+		ret = ocfs2_write_inode(ost->ost_fs, di->i_blkno, (char *)di);
+		if (ret)
+			fatal_error(ret, "while writing inode alloc inode "
+				    "%"PRIu64, di->i_blkno);
+	}
+
 	return 0;
 }
 
-
 errcode_t o2fsck_pass0(o2fsck_state *ost)
 {
 	errcode_t ret;
@@ -267,7 +398,7 @@
 	ocfs2_filesys *fs = ost->ost_fs;
 	int i, type;
 
-	printf("Pass 1: Checking allocation structures\n");
+	printf("Pass 0: Checking allocation structures\n");
 
 	ret = ocfs2_malloc_blocks(fs->fs_io, 3, &blocks);
 	if (ret) {
@@ -305,13 +436,19 @@
 					 (ost->ost_fs->fs_blocksize * 2));
 
 		/* XXX maybe helped by the alternate super block */
-		if (ret) {
-		}
+		if (ret)
+			goto out;
 
 		type = INODE_ALLOC_SYSTEM_INODE;
 	} while (++i < OCFS2_RAW_SB(fs->fs_super)->s_max_nodes);
 
 out:
+	/* errors are only returned to this guy if they're fatal -- memory
+	 * alloc or IO errors.  the.. returnee had the responsibility of 
+	 * describing the error at the source. */
+	if (ret)
+		exit(FSCK_ERROR);
+
 	if (di)
 		ocfs2_free(&di);
 	if (blocks)

Modified: trunk/fsck.ocfs2/pass4.c
===================================================================
--- trunk/fsck.ocfs2/pass4.c	2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/pass4.c	2004-10-29 00:46:38 UTC (rev 351)
@@ -64,7 +64,7 @@
 			/* XXX offer to remove files/dirs with no data? */
 			if (prompt(ost, PY, "Inode %"PRIu64" isn't referenced "
 				   "by any directory entries.  Move it to "
-				   "lost+found?")) {
+				   "lost+found?", ino)) {
 				o2fsck_reconnect_file(ost, ino);
 				refs = o2fsck_icount_get(ost->ost_icount_refs,
 						ino);



More information about the Ocfs2-tools-commits mailing list