[Ocfs2-tools-commits] zab commits r472 - in trunk: fsck.ocfs2 fsck.ocfs2/include libocfs2/include

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Dec 9 13:57:36 CST 2004


Author: zab
Date: 2004-12-09 13:57:34 -0600 (Thu, 09 Dec 2004)
New Revision: 472

Modified:
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/include/journal.h
   trunk/fsck.ocfs2/include/util.h
   trunk/fsck.ocfs2/journal.c
   trunk/fsck.ocfs2/pass0.c
   trunk/fsck.ocfs2/util.c
   trunk/libocfs2/include/ocfs2_fs.h
Log:
o check the journal's dirty flag instead of the publish mount flag
o add an o2fsck_bitcount() for counting bits set in bytes
o rework the freaking chain repair code again to account for fswreck finding
  bugs.  we are much more careful about linking in descs.  *knocks on wood*
o update the dinode from ocfs2, will break the build for a bit


Modified: trunk/fsck.ocfs2/fsck.c
===================================================================
--- trunk/fsck.ocfs2/fsck.c	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/fsck.ocfs2/fsck.c	2004-12-09 19:57:34 UTC (rev 472)
@@ -347,10 +347,6 @@
 	if (ret)
 		goto out;
 
-	ret = o2fsck_read_publish(ost);
-	if (ret)
-		goto out;
-
 out:
 	return ret;
 }
@@ -359,12 +355,16 @@
 				       int open_flags, uint64_t blkno,
 				       uint64_t blksize)
 {	
-	int replayed = 0;
+	int replayed = 0, should = 0;
 	errcode_t ret = 0;
 	char *whoami = __FUNCTION__;
 
-	if (!ost->ost_stale_mounts)
+	ret = o2fsck_should_replay_journals(ost->ost_fs, ost->ost_publish,
+					    &should);
+	if (ret)
 		goto out;
+	if (!should)
+		goto out;
 
 	if (!(ost->ost_fs->fs_flags & OCFS2_FLAG_RW)) {
 		printf("** Skipping journal replay because -n was "

Modified: trunk/fsck.ocfs2/include/journal.h
===================================================================
--- trunk/fsck.ocfs2/include/journal.h	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/fsck.ocfs2/include/journal.h	2004-12-09 19:57:34 UTC (rev 472)
@@ -28,6 +28,8 @@
 
 errcode_t o2fsck_replay_journals(ocfs2_filesys *fs, ocfs_publish *pub,
 				 int *replayed);
+errcode_t o2fsck_should_replay_journals(ocfs2_filesys *fs, ocfs_publish *pub,
+					int *should);
 
 #endif /* __O2FSCK_JOURNAL_H__ */
 

Modified: trunk/fsck.ocfs2/include/util.h
===================================================================
--- trunk/fsck.ocfs2/include/util.h	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/fsck.ocfs2/include/util.h	2004-12-09 19:57:34 UTC (rev 472)
@@ -44,5 +44,6 @@
 errcode_t o2fsck_type_from_dinode(o2fsck_state *ost, uint64_t ino,
 				  uint8_t *type);
 errcode_t o2fsck_read_publish(o2fsck_state *ost);
+unsigned long o2fsck_bitcount(void *bytes, size_t len);
 
 #endif /* __O2FSCK_UTIL_H__ */

Modified: trunk/fsck.ocfs2/journal.c
===================================================================
--- trunk/fsck.ocfs2/journal.c	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/fsck.ocfs2/journal.c	2004-12-09 19:57:34 UTC (rev 472)
@@ -504,6 +504,57 @@
 	return err;
 }
 
+errcode_t o2fsck_should_replay_journals(ocfs2_filesys *fs, ocfs_publish *pub,
+					int *should)
+{
+	uint16_t i, max_nodes;
+	ocfs2_dinode *di;
+	char *buf = NULL;
+	uint64_t blkno;
+	errcode_t ret;
+
+	*should = 0;
+	max_nodes = OCFS2_RAW_SB(fs->fs_super)->s_max_nodes;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret) {
+		com_err(whoami, ret, "while allocating room to read journal "
+			    "blocks");
+		goto out;
+	}
+
+	di = (ocfs2_dinode *)buf;
+
+	for (i = 0; i < max_nodes; i++) {
+		ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE, i,
+						&blkno);
+		if (ret) {
+			com_err(whoami, ret, "while looking up the journal "
+				"inode for node %d", i);
+			goto out;
+		}
+
+		ret = ocfs2_read_inode(fs, blkno, buf);
+		if (ret) {
+			com_err(whoami, ret, "while reading cached inode "
+				"%"PRIu64" for node %d's journal", blkno, i);
+			goto out;
+		}
+		
+		verbosef("node %d JOURNAL_DIRTY_FL: %d\n", i,
+			 di->id1.journal1.i_flags & OCFS2_JOURNAL_DIRTY_FL);
+
+		if (di->id1.journal1.i_flags & OCFS2_JOURNAL_DIRTY_FL) 
+			*should = 1;
+	}
+
+out:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+	
+}
+
 /* Try and replay the nodes journals if they're dirty.  This only returns
  * a non-zero error if the caller should not continue. */
 errcode_t o2fsck_replay_journals(ocfs2_filesys *fs, ocfs_publish *pub,

Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/fsck.ocfs2/pass0.c	2004-12-09 19:57:34 UTC (rev 472)
@@ -49,6 +49,7 @@
 #include <time.h>
 
 #include "ocfs2.h"
+#include "bitops.h"
 
 #include "dirblocks.h"
 #include "dirparents.h"
@@ -68,10 +69,12 @@
 	uint16_t	cs_cpg;
 };
 
-static int check_group_desc(o2fsck_state *ost, ocfs2_dinode *di,
-			    struct chain_state *cs, ocfs2_group_desc *bg,
-			    uint64_t blkno)
+static errcode_t repair_group_desc(o2fsck_state *ost, ocfs2_dinode *di,
+				   struct chain_state *cs,
+				   ocfs2_group_desc *bg,
+				   uint64_t blkno)
 {
+	errcode_t ret = 0;
 	int changed = 0;
 
 	verbosef("checking desc at %"PRIu64"; blkno %"PRIu64" size %u bits %u "
@@ -79,31 +82,15 @@
 		 bg->bg_size, bg->bg_bits, bg->bg_free_bits_count, 
 		 bg->bg_chain, bg->bg_generation);
 
-	/* Once we think it's a valid group desc we aggressively tie it
-	 * into the inode that pointed to it for fear of losing any
-	 * descriptors. */
-	if (memcmp(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE,
-		   strlen(OCFS2_GROUP_DESC_SIGNATURE))) {
-		printf("Group descriptor at block %"PRIu64" has an invalid "
-			"signature.\n", blkno);
-		return 1;
-	}
+	if (bg->bg_generation != ost->ost_fs_generation &&
+	    prompt(ost, PY, 0, "Group descriptor at block %"PRIu64" has "
+		   "a generation of %"PRIx32" which doesn't match the "
+		   "volume's generation of %"PRIx32".  Change the generation "
+		   "in the descriptor to match the volume?", blkno,
+		   bg->bg_generation, ost->ost_fs_generation)) {
 
-	if (bg->bg_generation != ost->ost_fs_generation) {
-		if (prompt(ost, PY, 0, "Group descriptor at block %"PRIu64" has "
-			   "a generation of %"PRIx32" which doesn't match the "
-			   "volume's generation of %"PRIx32".  Delete this "
-			   "group descriptor?", blkno, bg->bg_generation,
-			   ost->ost_fs_generation)) {
-
-			return 1;
-		}
-		if (prompt(ost, PY, 0, "Update the descriptor's generation to "
-			   "match the volume?")) {
-
-			bg->bg_generation = ost->ost_fs_generation;
-			changed = 1;
-		}
+		bg->bg_generation = ost->ost_fs_generation;
+		changed = 1;
 	}
 
 	/* XXX maybe for advanced pain we could check to see if these 
@@ -157,7 +144,6 @@
 	/* XXX check bg_bits vs cpg/bpc. */
 
 	if (changed) {
-		errcode_t ret;
 		ret = ocfs2_write_group_desc(ost->ost_fs, bg->bg_blkno,
 					     (char *)bg);
 		if (ret) {
@@ -172,189 +158,169 @@
 	cs->cs_total_bits += bg->bg_bits;
 	cs->cs_free_bits += bg->bg_free_bits_count;
 
-	return 0;
+	return ret;
 }
 
-struct chain_block_res {
-	unsigned	cb_new_next_blkno:1;
-	uint64_t	cb_next_blkno;
-	errcode_t	cb_err;
-};
-
-static void read_chain_block(o2fsck_state *ost, ocfs2_dinode *di,
-		       struct chain_state *cs, uint64_t blkno,
-		       ocfs2_group_desc *bg, ocfs2_bitmap *allowed,
-		       struct chain_block_res *cbr)
+#if 0
+/* XXX should be a real helper somewhere? */
+static void unlink_group_desc(o2fsck_state *ost,
+			      ocfs2_dinode *di,
+			      ocfs2_group_desc *bg)
 {
-	int was_set;
-	errcode_t ret;
-
-	memset(cbr, 0, sizeof(*cbr));
-
-	if (ocfs2_block_out_of_range(ost->ost_fs, blkno)) {
-		if (prompt(ost, PY, 8, "Chain %d in allocator at inode "
-			   "%"PRIu64" points to block %"PRIu64" which is out "
-			   "of range. Truncate this chain by deleting this "
-			   "invalid block reference?", cs->cs_chain_no,
-			   di->i_blkno, blkno))  {
-
-			cbr->cb_new_next_blkno = 1;
-			cbr->cb_next_blkno = 0;
-		} else {
-			cbr->cb_err = OCFS2_ET_BAD_BLKNO;
-		}
-
-		goto out;
-	}
-
-#if 0 /* XXX plausible + used test */
-	if (o2fsck_test_block_used(ost, blkno) &&
-	    prompt(ost, PY, 9, "Chain %d in allocator at inode %"PRIu64" "
-			   "points to block %"PRIu64" which has already been "
-			   "used by another part of the file system. "
-			   "Truncate this chain by deleting this invalid "
-			   "block reference?", cs->cs_chain_no, di->i_blkno,
-			   blkno))  {
-		cbr->cb_new_next_blkno = 1;
-		cbr->cb_next_blkno = 0;
-	}
+}
 #endif
 
-	if (allowed) {
-		ocfs2_bitmap_test(allowed, blkno, &was_set);
-		if (!was_set &&
-		    prompt(ost, PY, 10, "Chain %d in allocator at inode "
-			   "%"PRIu64" points to block %"PRIu64" which should "
-			   "not be found in the allocator.  Truncate this "
-			   "chain by deleting this invalid block reference?",
-			   cs->cs_chain_no, di->i_blkno, blkno))  {
-
-			cbr->cb_new_next_blkno = 1;
-			/* will set next after reading */
-		} 
-	}
-
-	ret = ocfs2_read_group_desc(ost->ost_fs, blkno, (char *)bg);
-	if (ret) {
-		com_err(whoami, ret, "while reading a group descriptor from "
-			"block %"PRIu64" as pointed to by chain %d in "
-			"allocator at inode %"PRIu64, blkno, 
-			cs->cs_chain_no, di->i_blkno);
-		cbr->cb_err = ret;
-		goto out;
-	}
-
-	if (cbr->cb_new_next_blkno) {
-		cbr->cb_next_blkno = bg->bg_next_group;
-		goto out;
-	}
-
-	if (check_group_desc(ost, di, cs, bg, blkno) &&
-	    prompt(ost, PY, 11, "Chain %d in allocator at inode %"PRIu64" "
-		   "refers to an invalid descriptor block at %"PRIu64". "
-		   "Truncate the chain by removing this reference?",
-		   cs->cs_chain_no,
-		   di->i_blkno, blkno)) {
-
-		cbr->cb_new_next_blkno = 1;
-		cbr->cb_next_blkno = 0;
-	}
-
-out:
-	return;
-}
-
 static void mark_group_used(o2fsck_state *ost, struct chain_state *cs,
-			    ocfs2_group_desc *bg, ocfs2_bitmap *allowed)
+			    uint64_t blkno, int just_desc)
 {
-	uint16_t clusters = cs->cs_cpg;
+	uint16_t clusters;
 
-	if (allowed) {
-		ocfs2_bitmap_clear(allowed, bg->bg_blkno, NULL);
-		/* only mark the desc cluster as in use */
+	if (just_desc)
 		clusters = 1;
-	}
+	else
+		clusters = cs->cs_cpg;
 
 	o2fsck_mark_clusters_allocated(ost, 
-		ocfs2_blocks_to_clusters(ost->ost_fs, bg->bg_blkno),
-		clusters);
+				ocfs2_blocks_to_clusters(ost->ost_fs, blkno),
+				clusters);
 }
 
-/*
- * this function is pretty hairy.  for dynamic chain allocators
- * it is just walking the chains to verify the group descs
- * and truncates a chain when it sees a link it can't follow. 
- * The only complexity in that case is the different language for
- * the head of the chain and the links in the chain.
- *
- * For static chain allocators (the cluster bitmap) it has a bitmap
- * of blocks that should be in the chains.  it will ask to remove
- * blocks in the chains that aren't in the bitmap and will clear
- * the bits in the bitmaps for blocks it finds in the chains.
- */
-static int check_chain(o2fsck_state *ost, ocfs2_dinode *di,
-		       struct chain_state *cs, ocfs2_chain_rec *chain,
-		       char *buf1, char *buf2, ocfs2_bitmap *allowed)
+/* this takes a slightly ridiculous number of arguments :/ */
+static errcode_t check_chain(o2fsck_state *ost,
+			     ocfs2_dinode *di,
+			     struct chain_state *cs,
+			     ocfs2_chain_rec *chain,
+			     char *buf1,
+			     char *buf2,
+			     int *chain_changed,
+			     ocfs2_bitmap *allowed,
+			     ocfs2_bitmap *forbidden)
 {
 	ocfs2_group_desc *bg1 = (ocfs2_group_desc *)buf1;
 	ocfs2_group_desc *bg2 = (ocfs2_group_desc *)buf2;
 	uint64_t blkno;
-	errcode_t ret;
-	int changed = 0;
-	struct chain_block_res cbr;
+	errcode_t ret = 0;
+	int changed = 0, depth = 0, clear_ref = 0;
 
 	verbosef("free %u total %u blkno %"PRIu64"\n", chain->c_free,
 		 chain->c_total, chain->c_blkno);
 
-new_head:
-	blkno = chain->c_blkno;
+	while(1) {
+		/* fetch the next reference */
+		if (depth == 0)
+			blkno = chain->c_blkno;
+		else {
+			/* we only mark a group as used if it wasn't
+			 * contentious.  if we weren't supposed to find it we
+			 * mark it for a future pass to consider.  we do
+			 * this here just as we're about to take the reference
+			 * to the next group, implying that we've just
+			 * decided that bg1 is valid. */
+			blkno = bg1->bg_blkno;
+			if (allowed) {
+				int was_set;
+				ocfs2_bitmap_test(allowed, blkno, &was_set);
+				if (was_set) {
+					ocfs2_bitmap_clear(allowed, blkno,
+							   &was_set);
+					mark_group_used(ost, cs, bg1->bg_blkno,
+							allowed != NULL);
+				} else if (forbidden)
+					ocfs2_bitmap_set(forbidden, blkno,
+							 &was_set);
+			} else
+				mark_group_used(ost, cs, bg1->bg_blkno,
+						allowed != NULL);
+			blkno = bg1->bg_next_group;
+		}
 
-	if (blkno == 0)
-		goto out;
+		/* we're done */
+		if (blkno == 0)
+			break;
 
-	read_chain_block(ost, di, cs, blkno, bg1, allowed, &cbr);
-	if (cbr.cb_err) {
-		ret = cbr.cb_err;
-		goto out;
-	}
+		/* is it even feasible? */
+		if (ocfs2_block_out_of_range(ost->ost_fs, blkno)) {
+			if (prompt(ost, PY, 8, "Chain %d in allocator at inode "
+				   "%"PRIu64" contains a reference at depth "
+				   "%d to block %"PRIu64" which is out "
+				   "of range. Truncate this chain?",
+				   cs->cs_chain_no, di->i_blkno, depth,
+				   blkno))  {
 
-	if (cbr.cb_new_next_blkno) {
-		chain->c_blkno = cbr.cb_new_next_blkno;
-		changed = 1;
-		goto new_head;
-	}
+				clear_ref = 1;
+				break;
+			}
+			/* this will just result in a bad blkno from
+			 * the read below.. */
+		}
 
-	mark_group_used(ost, cs, bg1, allowed);
+		ret = ocfs2_read_group_desc(ost->ost_fs, blkno, (char *)bg2);
+		if (ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC) {
+			if (prompt(ost, PY, 8, "Chain %d in allocator at inode "
+				   "%"PRIu64" contains a reference at depth "
+				   "%d to block %"PRIu64" which doesn't have "
+				   "a valid checksum.  Truncate this chain?",
+				   cs->cs_chain_no, di->i_blkno, depth,
+				   blkno))  {
 
-	/* read in each group desc and check it.  In this loop bg1 is 
-	 * verified and in the chain.  it's bg2 that is considered.  if
-	 * bg2 is found lacking we overwrite bg1's next_group and check
-	 * again */
-	while (bg1->bg_next_group) {
-
-		read_chain_block(ost, di, cs, bg1->bg_next_group, bg2, allowed,
-				 &cbr);
-		if (cbr.cb_err) {
-			ret = cbr.cb_err;
+				clear_ref = 1;
+				break;
+			}
+			
+			/* we're not interested in following a broken desc */
+			ret = 0;
+			break;
+		}
+		if (ret) {
+			com_err(whoami, ret, "while reading a group "
+				"descriptor from block %"PRIu64" as pointed "
+				"to by chain %d in allocator at inode "
+				"%"PRIu64" at depth %d", blkno, 
+				cs->cs_chain_no, di->i_blkno, depth);
 			goto out;
 		}
 
-		if (!cbr.cb_new_next_blkno) {
-			mark_group_used(ost, cs, bg2, allowed);
-			memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
-			continue;
+		if (bg2->bg_generation != ost->ost_fs_generation &&
+		    prompt(ost, PY, 0, "Group descriptor at block %"PRIu64" "
+			   "has a generation of %"PRIx32" which doesn't match "
+			   "the volume's generation of %"PRIx32".  Delete "
+			   "this group descriptor?", blkno, bg2->bg_generation,
+			   ost->ost_fs_generation)) {
+
+			clear_ref = 1;
+			break;
 		}
 
-		bg1->bg_next_group = cbr.cb_new_next_blkno;
+		ret = repair_group_desc(ost, di, cs, bg2, blkno);
+		if (ret)
+			goto out;
 
-		ret = ocfs2_write_group_desc(ost->ost_fs, bg1->bg_blkno,
-					     (char *)bg1);
-		if (ret) {
-			com_err(whoami, ret, "while writing a group "
-				"descriptor to block %"PRIu64" somewhere in "
-				"chain %d in group allocator inode %"PRIu64, 
-				bg1->bg_blkno, cs->cs_chain_no, di->i_blkno);
-			ost->ost_saw_error = 1;
+
+		/* the loop will now start by reading bg1->next_group */
+		memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
+		depth++;
+	}
+
+	/* we hit the premature end of a chain.. clear the last
+	 * ref we were working from */
+	if (clear_ref) {
+		if (depth == 0) {
+			chain->c_blkno = blkno;
+			*chain_changed = 1;
+		} else {
+			bg1->bg_next_group = blkno;
+			ret = ocfs2_write_group_desc(ost->ost_fs,
+					             bg1->bg_blkno,
+						     (char *)bg1);
+			if (ret) {
+				com_err(whoami, ret, "while writing a group "
+					"descriptor at depth %d in chain %d "
+					"in group allocator inode %"PRIu64" "
+					"to block %"PRIu64, depth,
+					cs->cs_chain_no, di->i_blkno,
+					bg1->bg_blkno);
+				ost->ost_saw_error = 1;
+			}
 		}
 	}
 
@@ -374,14 +340,15 @@
 	}
 
 out:
-	return changed;
+	return ret;
 }
 
 /* If this returns 0 then the inode allocator had better be amenable to
  * iteration. */
 static errcode_t verify_chain_alloc(o2fsck_state *ost, ocfs2_dinode *di,
 				    char *buf1, char *buf2,
-				    ocfs2_bitmap *allowed)
+				    ocfs2_bitmap *allowed,
+				    ocfs2_bitmap *forbidden)
 {
 	struct chain_state cs = {0, };
 	ocfs2_chain_list *cl;
@@ -423,6 +390,22 @@
 
 	max_count = ocfs2_chain_recs_per_inode(ost->ost_fs->fs_blocksize);
 
+	/* first, no rec should have a totally invalid blkno */
+	for (i = 0; i < max_count; i++) {
+		cr = &cl->cl_recs[i];
+
+		if (cr->c_blkno != 0&&
+		    ocfs2_block_out_of_range(ost->ost_fs, cr->c_blkno) &&
+		    prompt(ost, PY, 6, "Chain %d in allocator inode %"PRIu64" "
+			   "contains an initial block reference to %"PRIu64" "
+			   "which is out of range.  Clear this reference?",
+			   i, di->i_blkno, cr->c_blkno)) {
+
+			cr->c_blkno = 0;
+			changed = 1;
+		}
+	}
+
 	/* make sure cl_count is clamped to the size of the inode */
 	if (cl->cl_count > max_count &&
 	    prompt(ost, PY, 4, "Allocator inode %"PRIu64" claims to have %u "
@@ -461,7 +444,8 @@
 			.cs_chain_no = i,
 			.cs_cpg = cl->cl_cpg,
 		};
-		changed |= check_chain(ost, di, &cs, cr, buf1, buf2, allowed);
+		ret = check_chain(ost, di, &cs, cr, buf1, buf2, &changed,
+				  allowed, forbidden);
 
 		if (cr->c_blkno != 0) {
 			free += cs.cs_free_bits;
@@ -480,7 +464,12 @@
 				continue;
 			}
 
-			/* move later lists down if there are any */
+			/* when we move a chain to a different rec we have
+			 * to update bg_chain in all the descs in the chain.
+			 * we copy the last chain into the missing spot
+			 * instead of shifting everyone over a spot 
+			 * to minimize the number of chains we have to
+			 * update */
 			if (i < (cl->cl_next_free_rec - 1)) {
 				*cr = cl->cl_recs[cl->cl_next_free_rec - 1];
 				i--;
@@ -504,6 +493,7 @@
 			   total - free, total)) {
 			   di->id1.bitmap1.i_used = total - free;
 			   di->id1.bitmap1.i_total = total;
+
 			   changed = 1;
 		}
 	}
@@ -531,44 +521,66 @@
 				     char *buf1, char *buf2)
 {
 	struct ocfs2_cluster_group_sizes cgs;
-	uint32_t i, max_recs;
-	uint16_t bits;
+	uint16_t i, max_recs;
+	uint16_t bits, chain;
 	uint64_t blkno;
 	ocfs2_group_desc *bg = (ocfs2_group_desc *)buf1;
 	errcode_t ret;
 	struct chain_state cs;
-	int changed = 0;
 	ocfs2_chain_rec *rec;
-	ocfs2_bitmap *bitmap_descs = NULL;
+	ocfs2_bitmap *allowed = NULL, *forbidden = NULL;
+	int was_set;
 
-	ret = ocfs2_block_bitmap_new(ost->ost_fs, "bitmap group descriptors",
-				     &bitmap_descs);
+	/* XXX ugh, only used by mark_ */
+	cs.cs_cpg = di->id2.i_chain.cl_cpg;
+
+	ret = ocfs2_block_bitmap_new(ost->ost_fs, "allowed group descriptors",
+				     &allowed);
 	if (ret) {
-		com_err(whoami, ret, "while allocating bitmap descs bitmap");
+		com_err(whoami, ret, "while allocating allowed bitmap descs "
+			"bitmap");
 		goto out;
 	}
+	ret = ocfs2_block_bitmap_new(ost->ost_fs, "forbidden group "
+				     "descriptors", &forbidden);
+	if (ret) {
+		com_err(whoami, ret, "while allocating forbidden descs "
+			"bitmap");
+		goto out;
+	}
 	
 	ocfs2_calc_cluster_groups(ost->ost_fs->fs_clusters,
 				  ost->ost_fs->fs_blocksize, &cgs);
 
 	max_recs = ocfs2_chain_recs_per_inode(ost->ost_fs->fs_blocksize);
 
-	/* the first groups desc is specified in the super block, the 
-	 * rest are in the first cluster/block of the group */
 	for (i = 0, blkno = ost->ost_fs->fs_first_cg_blkno;
 	     i < cgs.cgs_cluster_groups; 
 	     i++, blkno = i * ocfs2_clusters_to_blocks(ost->ost_fs,
 						       cgs.cgs_cpg)) {
+		ocfs2_bitmap_set(allowed, blkno, NULL);
+	}
 
-		verbosef("looking for cluster bitmap desc at %"PRIu64"\n",
-			 blkno);
+	ret = verify_chain_alloc(ost, di, buf1, buf2, allowed, forbidden);
+	if (ret) {
+		com_err(whoami, ret, "while looking up chain allocator inode "
+			"%"PRIu64, di->i_blkno);
+		goto out;
+	}
 
-		if (i == cgs.cgs_cluster_groups - 1)
-			bits = cgs.cgs_tail_group_bits;
-		else
-			bits = cgs.cgs_cpg;
+	/* remove descs that we found in the chain that we didn't expect */
+	for (blkno = ost->ost_fs->fs_first_cg_blkno;
+	     !ocfs2_bitmap_find_next_set(forbidden, blkno, &blkno);
+	     blkno++) {
+		if (!prompt(ost, PY, 3, "Block %"PRIu64" is a group "
+			    "descriptor in the bitmap chain allocator but it "
+			    "isn't at one of the pre-determined location and "
+			    "so shouldn't be in the allocator.  Remove it "
+			    "from the chain?", blkno)) {
 
-		cs.cs_chain_no = i % max_recs;
+			mark_group_used(ost, &cs, blkno, 1);
+			continue;
+		}
 
 		ret = ocfs2_read_group_desc(ost->ost_fs, blkno, (char *)bg);
 		if (ret) {
@@ -578,71 +590,78 @@
 			continue;
 		}
 
-		/* XXX this is kind of awkward.  check_group_desc may change
-		 * bg_chain for a given descriptor but won't update the
-		 * linkage.  So when we call in from the iterator we'll just
-		 * link it back under a given chain.  I'm willing to live with
-		 * that for now. */
-		ret = check_group_desc(ost, di, &cs, bg, blkno);
-		if (ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC &&
-		    prompt(ost, PY, 2, "Cluster group descriptor at block "
-			   "%"PRIu64" doesn't even have a valid signature. "
-			   "Initialize it and mark it for inclusion in the "
-			   "cluster group chain?", blkno)) {
-
-			ocfs2_init_group_desc(ost->ost_fs, bg, blkno,
-					      di->i_generation, di->i_blkno,
-					      bits, cs.cs_chain_no);
-
-			ret = ocfs2_write_group_desc(ost->ost_fs, bg->bg_blkno,
-						     (char *)bg);
-			if (ret) {
-				com_err(whoami, ret, "while writing a cluster "
-					"group descriptor at block %"PRIu64,
-					blkno);
-				ost->ost_saw_error = 1;
-				continue;
-			}
-		}
-		if (ret == 0)
-			ocfs2_bitmap_set(bitmap_descs, blkno, NULL);
+		/* poo, worry about this. */
+//		o2fsck_unlink_group_desc(ost, di, bg);
 	}
 
-	ret = verify_chain_alloc(ost, di, buf1, buf2, bitmap_descs);
-	if (ret) {
-		com_err(whoami, ret, "while looking up chain allocator inode "
-			"%"PRIu64, di->i_blkno);
-		goto out;
-	}
-
 	/* find the blocks that we think should have been in the chains
 	 * but which weren't found */
-	for (blkno = ost->ost_fs->fs_first_cg_blkno;
-	     !ocfs2_bitmap_find_next_set(bitmap_descs, blkno, &blkno);
-	     blkno++) {
+	for (i = 0, blkno = ost->ost_fs->fs_first_cg_blkno;
+	     i < cgs.cgs_cluster_groups; 
+	     i++, blkno = i * ocfs2_clusters_to_blocks(ost->ost_fs,
+						       cgs.cgs_cpg)) {
 
+		if (ocfs2_bitmap_test(allowed, blkno, &was_set))
+			continue;
+		if (!was_set)
+			continue;
+
 		if (!prompt(ost, PY, 3, "Block %"PRIu64" should be a group "
 			    "descriptor for the bitmap chain allocator but it "
-			    "wasn't found in any chains.  Link it into the "
-			    "chain allocator?", blkno))
+			    "wasn't found in any chains.  Reinitialize it as "
+			    "a group desc and link it into the bitmap "
+			    "allocator?", blkno))
 			continue;
 
+		/* some input that init_desc might need */
+		if (i == cgs.cgs_cluster_groups - 1)
+			bits = cgs.cgs_tail_group_bits;
+		else
+			bits = cgs.cgs_cpg;
+		chain = i % max_recs;
+
+		/* we've been asked to link in this desc specifically. we're
+		 * using the predictability of the group descs to rebuild
+		 * its values.. we only preserve the bitmap if the signature
+		 * and generation match this volume */
 		ret = ocfs2_read_group_desc(ost->ost_fs, blkno, (char *)bg);
+		if (ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC ||
+		    bg->bg_generation != ost->ost_fs_generation) {
+			memset(bg, 0, ost->ost_fs->fs_blocksize);
+			ocfs2_init_group_desc(ost->ost_fs, bg, blkno,
+					      ost->ost_fs_generation,
+					      di->i_blkno,
+					      bits, chain);
+			ret = 0;
+		}
 		if (ret) {
 			com_err(whoami, ret, "while reading a cluster bitmap "
 				"group descriptor from block %"PRIu64,
 				blkno);
 			continue;
 		}
-		
-		/* XXX the rest of this block links a desc into the chain
-		 * and should probably be in libocfs2 */
-		/* XXX should be more paranoid in verifying the desc? */
+
+		/* first some easy fields */
+		bg->bg_size = ocfs2_group_bitmap_size(ost->ost_fs->fs_blocksize);
+		bg->bg_bits = bits;
+		bg->bg_parent_dinode = di->i_blkno;
+		bg->bg_blkno = blkno;
+		ocfs2_set_bit(0, bg->bg_bitmap);
+		bg->bg_free_bits_count = bg->bg_bits - 
+					 o2fsck_bitcount(bg->bg_bitmap,
+							 (bg->bg_bits + 7)/ 8);
+
+		/* we have to be kind of careful with the chain */
+		chain = ocfs2_min(chain,
+				  di->id2.i_chain.cl_next_free_rec);
+		chain = ocfs2_min(chain, max_recs);
+		bg->bg_chain = chain;
+
+		/* now really link it in */
 		rec = &di->id2.i_chain.cl_recs[bg->bg_chain];
 		bg->bg_next_group = rec->c_blkno;
 
-		ret = ocfs2_write_group_desc(ost->ost_fs, bg->bg_blkno,
-					     (char *)bg);
+		ret = ocfs2_write_group_desc(ost->ost_fs, blkno, (char *)bg);
 		if (ret) {
 			com_err(whoami, ret, "while writing a cluster group "
 				"descriptor at block %"PRIu64, blkno);
@@ -650,33 +669,35 @@
 			continue;
 		}
 
+		/* and update the calling inode */
 		rec->c_free += bg->bg_free_bits_count;
 		rec->c_total += bg->bg_bits;
 		rec->c_blkno = blkno;
-		if (di->id2.i_chain.cl_next_free_rec <= bg->bg_chain)
-			di->id2.i_chain.cl_next_free_rec = bg->bg_chain;
+
+		/* ugh */
+		if (di->id2.i_chain.cl_next_free_rec == bg->bg_chain &&
+		    di->id2.i_chain.cl_next_free_rec < max_recs)
+			di->id2.i_chain.cl_next_free_rec++;
+
 		di->id1.bitmap1.i_used += bg->bg_bits - bg->bg_free_bits_count;
 		di->id1.bitmap1.i_total += bg->bg_bits;
-		changed = 1;
-	}
 
-	/* XXX maybe we should verify the chain again now.  we might have
-	 * inserted a desc at its fixed position after some chains that
-	 * weren't in use. */
-
-	if (changed) {
 		ret = ocfs2_write_inode(ost->ost_fs, di->i_blkno, (char *)di);
 		if (ret) {
 			com_err(whoami, ret, "while writing inode alloc inode "
 				    "%"PRIu64, di->i_blkno);
 			ost->ost_saw_error = 1;
-			ret = 0;
+			goto out;
 		}
+
+		mark_group_used(ost, &cs, bg->bg_blkno, 1);
 	}
 
 out:
-	if (bitmap_descs)
-		ocfs2_bitmap_free(bitmap_descs);
+	if (allowed)
+		ocfs2_bitmap_free(allowed);
+	if (forbidden)
+		ocfs2_bitmap_free(forbidden);
 	return ret;
 }
 
@@ -762,7 +783,8 @@
 		ret = verify_chain_alloc(ost, di,
 					 blocks + ost->ost_fs->fs_blocksize,
 					 blocks + 
-					 (ost->ost_fs->fs_blocksize * 2), NULL);
+					 (ost->ost_fs->fs_blocksize * 2), 
+					 NULL, NULL);
 
 		/* XXX maybe helped by the alternate super block */
 		if (ret)
@@ -814,7 +836,8 @@
 		ret = verify_chain_alloc(ost, di,
 					 blocks + ost->ost_fs->fs_blocksize,
 					 blocks + 
-					 (ost->ost_fs->fs_blocksize * 2), NULL);
+					 (ost->ost_fs->fs_blocksize * 2), 
+					 NULL, NULL);
 
 		/* XXX maybe helped by the alternate super block */
 		if (ret)

Modified: trunk/fsck.ocfs2/util.c
===================================================================
--- trunk/fsck.ocfs2/util.c	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/fsck.ocfs2/util.c	2004-12-09 19:57:34 UTC (rev 472)
@@ -102,55 +102,24 @@
 	return ret;
 }
 
-errcode_t o2fsck_read_publish(o2fsck_state *ost)
+#define BITCOUNT(x)     (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+#define BX_(x)          ((x) - (((x)>>1)&0x77777777) \
+			     - (((x)>>2)&0x33333333) \
+			     - (((x)>>3)&0x11111111))
+unsigned long o2fsck_bitcount(void *bytes, size_t len)
 {
-	uint16_t i, max_nodes;
-	char *hb_buf = NULL;
-	uint64_t hb_ino;
-	errcode_t ret;
-	int buflen;
-	char *whoami = "read_publish";
+	uint32_t val;
+	unsigned long total = 0;
+	size_t this;
 
-	if (ost->ost_publish)
-		ocfs2_free(&ost->ost_publish);
-
-	max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
-
-	ret = ocfs2_malloc0(sizeof(ocfs_publish) * max_nodes,
-			    &ost->ost_publish);
-	if (ret) {
-		com_err(whoami, ret, "while allocating an array to store each "
-			"node's publish block");
-		goto out;
+	while(len) {
+		val = 0;
+		this = ocfs2_min(len, sizeof(val));
+		memcpy(&val, bytes, this);
+		total += BITCOUNT(val);
+		len -= this;
 	}
-
-	ret = ocfs2_lookup_system_inode(ost->ost_fs, HEARTBEAT_SYSTEM_INODE,
-					0, &hb_ino);
-	if (ret) {
-		com_err(whoami, ret, "while looking up the dlm system inode");
-		goto out;
-	}
-
-	ret = ocfs2_read_whole_file(ost->ost_fs, hb_ino, &hb_buf, &buflen);
-	if (ret) {
-		com_err(whoami, ret, "while reading dlm file");
-		goto out;
-	}
-
-	/* I have no idea what that magic math is really doing. */
-	for (i = 0; i < max_nodes; i++) {
-		int b_bits = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_blocksize_bits;
-
-		memcpy(&ost->ost_publish[i],
-		       hb_buf + ((2 + 4 + max_nodes + i) << b_bits),
-		       sizeof(ocfs_publish));
-		if (ost->ost_publish[i].mounted)
-			ost->ost_stale_mounts = 1;
-	}
-out:
-	if (ret && ost->ost_publish)
-		ocfs2_free(&ost->ost_publish);
-	if (hb_buf)
-		ocfs2_free(&hb_buf);
-	return ret;
+	return total;
 }
+#undef BITCOUNT
+#undef BX_

Modified: trunk/libocfs2/include/ocfs2_fs.h
===================================================================
--- trunk/libocfs2/include/ocfs2_fs.h	2004-12-09 19:50:43 UTC (rev 471)
+++ trunk/libocfs2/include/ocfs2_fs.h	2004-12-09 19:57:34 UTC (rev 472)
@@ -367,7 +367,7 @@
 					   belongs to */
 	__u16 i_suballoc_bit;		/* Bit offset in suballocater
 					   block group */
-/*10*/	ocfs2_disk_lock i_disk_lock;	/* Lock structure */
+	__u32 i_reserved0;
 /*14*/	__u32 i_clusters;		/* Cluster count */
 /*18*/	__u32 i_uid;			/* Owner UID */
 	__u32 i_gid;			/* Owning GID */
@@ -383,8 +383,8 @@
 	__u64 i_last_eb_blk;		/* Pointer to last extent
 					   block */
 /*60*/	__u32 i_fs_generation;		/* Generation per fs-instance */
-	__u32 i_reserved0;		/* Generation per fs-instance */
-/*68*/	__u64 i_reserved1[10];
+	__u32 i_reserved1;		/* Generation per fs-instance */
+/*68*/	__u64 i_reserved2[10];
 /*B8*/	union {
 		__u64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
@@ -397,6 +397,11 @@
 			__u32 i_total;	/* Total bits (clusters)
 					   available */
 		} bitmap1;
+		struct {		/* Info for journal system
+					   inodes */
+			__u32 i_flags;	/* Mounted, version, etc.    */
+			__u32 i_j_pad;
+		} journal1;
 	} id1;				/* Inode type dependant 1 */
 /*C0*/	union {
 		ocfs2_super_block i_super;



More information about the Ocfs2-tools-commits mailing list