[Ocfs2-tools-commits] zab commits r406 - in trunk/fsck.ocfs2: . include

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Nov 17 13:11:27 CST 2004


Author: zab
Date: 2004-11-17 13:11:25 -0600 (Wed, 17 Nov 2004)
New Revision: 406

Modified:
   trunk/fsck.ocfs2/Makefile
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/include/fsck.h
   trunk/fsck.ocfs2/include/pass1.h
   trunk/fsck.ocfs2/include/util.h
   trunk/fsck.ocfs2/journal.c
   trunk/fsck.ocfs2/pass0.c
   trunk/fsck.ocfs2/pass1.c
   trunk/fsck.ocfs2/util.c
Log:
o move used block tracking helpers into util.c
o track block used by journals seperately so that later inode iteration doesn't
  have to special case tracking blocks used by inodes
o explicitly mark blocks 0, 1, and SUPER as used very early.
o greatly simplify the code that was iterating down chains of group descs while
  teaching it to mark the group desc blocks as used
o mark blocks off of inodes as used o use the blocks we think are used to 
  determine which clusters we think should be marked as allocated in the 
  cluster bitmap
o use cutting-edge "dependency" technology so that freaking fsck freaking
  relinks when freaking libocfs2 is freaking rebuilt


Modified: trunk/fsck.ocfs2/Makefile
===================================================================
--- trunk/fsck.ocfs2/Makefile	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/Makefile	2004-11-17 19:11:25 UTC (rev 406)
@@ -6,6 +6,7 @@
 
 INCLUDES = -Iinclude -I$(TOPDIR)/libocfs2/include
 LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 ifdef OCFS_DEBUG
 OPTS += -ggdb
@@ -54,7 +55,7 @@
 dist-subdircreate:
 	$(TOPDIR)/mkinstalldirs $(DIST_DIR)/include
 
-fsck.ocfs2: $(OBJS)
+fsck.ocfs2: $(OBJS) $(LIBOCFS2_DEPS)
 	$(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
 
 include $(TOPDIR)/Postamble.make

Modified: trunk/fsck.ocfs2/fsck.c
===================================================================
--- trunk/fsck.ocfs2/fsck.c	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/fsck.c	2004-11-17 19:11:25 UTC (rev 406)
@@ -330,6 +330,10 @@
 
 	exit_if_skipping(ost);
 
+	o2fsck_mark_block_used(ost, 0);
+	o2fsck_mark_block_used(ost, 1);
+	o2fsck_mark_block_used(ost, OCFS2_SUPER_BLOCK_BLKNO);
+
 	/* XXX we don't use the bad blocks inode, do we? */
 
 	printf("Checking OCFS2 filesystem in %s:\n", filename);

Modified: trunk/fsck.ocfs2/include/fsck.h
===================================================================
--- trunk/fsck.ocfs2/include/fsck.h	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/include/fsck.h	2004-11-17 19:11:25 UTC (rev 406)
@@ -59,7 +59,9 @@
 			ost_force:1,	/* -f supplied; force check */
 			ost_write_inode_alloc_asked:1,
 			ost_write_inode_alloc:1,
-			ost_write_error:1;
+			ost_write_error:1,
+			ost_write_cluster_alloc_asked:1,
+			ost_write_cluster_alloc:1;
 } o2fsck_state;
 
 /* The idea is to let someone off-site run fsck and have it give us 

Modified: trunk/fsck.ocfs2/include/pass1.h
===================================================================
--- trunk/fsck.ocfs2/include/pass1.h	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/include/pass1.h	2004-11-17 19:11:25 UTC (rev 406)
@@ -27,7 +27,6 @@
 #include "fsck.h"
 
 errcode_t o2fsck_pass1(o2fsck_state *ost);
-int o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno);
 void o2fsck_free_inode_allocs(o2fsck_state *ost);
 
 #endif /* __O2FSCK_PASS1_H__ */

Modified: trunk/fsck.ocfs2/include/util.h
===================================================================
--- trunk/fsck.ocfs2/include/util.h	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/include/util.h	2004-11-17 19:11:25 UTC (rev 406)
@@ -44,6 +44,8 @@
 } while (0)
 
 void o2fsck_write_inode(o2fsck_state *ost, uint64_t blkno, ocfs2_dinode *di);
+int o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno);
+int o2fsck_test_block_used(o2fsck_state *ost, uint64_t blkno);
 
 #endif /* __O2FSCK_UTIL_H__ */
 

Modified: trunk/fsck.ocfs2/journal.c
===================================================================
--- trunk/fsck.ocfs2/journal.c	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/journal.c	2004-11-17 19:11:25 UTC (rev 406)
@@ -63,6 +63,9 @@
 
 	unsigned		ji_set_final_seq:1;
 	uint32_t		ji_final_seq;
+
+	/* we keep our own bitmap for detecting overlapping journal blocks */
+	ocfs2_bitmap		*ji_used_blocks;
 };
 
 struct revoke_entry {
@@ -233,23 +236,31 @@
 				      uint64_t *blkno,
 				      int check_dup)
 {
-	errcode_t err;
+	errcode_t ret;
 	int contig;
+	int was_set;
 
-	err = ocfs2_extent_map_get_blocks(ji->ji_cinode, blkoff, 1, blkno,
+	ret = ocfs2_extent_map_get_blocks(ji->ji_cinode, blkoff, 1, blkno,
 					  &contig);
-	if (err) 
-		com_err(whoami, err, "while looking up logical block "
+	if (ret) {
+		com_err(whoami, ret, "while looking up logical block "
 			"%"PRIu64" in node %d's journal", blkoff, ji->ji_node);
+		goto out;
+	}
 
-	if (check_dup && o2fsck_mark_block_used(ost, *blkno)) {
-		printf("Logical block %"PRIu64" in node %d's journal maps to "
-		       "block %"PRIu64" which has already been used in "
-		       "another journal.\n", blkoff, ji->ji_node, *blkno);
-		err = OCFS2_ET_DUPLICATE_BLOCK;
+	if (check_dup) {
+		ocfs2_bitmap_set(ji->ji_used_blocks, *blkno, &was_set);
+		if (was_set)  {
+			printf("Logical block %"PRIu64" in node %d's journal "
+			       "maps to block %"PRIu64" which has already "
+			       "been used in another journal.\n", blkoff,
+			       ji->ji_node, *blkno);
+			ret = OCFS2_ET_DUPLICATE_BLOCK;
+		}
 	}
 
-	return err;
+out:
+	return ret;
 }
 
 static errcode_t read_journal_block(o2fsck_state *ost, 
@@ -517,9 +528,17 @@
 	char *buf = NULL, *dlm_buf = NULL;
 	int i, max_nodes, buflen, journal_trouble = 0;
 	uint64_t dlm_ino;
+	ocfs2_bitmap *used_blocks = NULL;
 
 	max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
 
+	ret = ocfs2_block_bitmap_new(ost->ost_fs, "journal blocks",
+				     &used_blocks);
+	if (ret) {
+		com_err(whoami, ret, "while allocating journal block bitmap"); 
+		goto out;
+	}
+
 	ret = ocfs2_malloc_blocks(ost->ost_fs->fs_io, 1, &buf);
 	if (ret) {
 		com_err(whoami, ret, "while allocating room to read journal "
@@ -555,8 +574,8 @@
 			continue;
 		}
 		ji->ji_replay = 1;
+		ji->ji_used_blocks = used_blocks;
 
-		/* check mounted bits in the publish doo-dah. */
 		err = prep_journal_info(ost, i, ji);
 		if (err == 0)
 			err = walk_journal(ost, i, ji, buf, 0);
@@ -633,6 +652,8 @@
 		ocfs2_free(&buf);
 	if (dlm_buf)
 		ocfs2_free(&dlm_buf);
+	if (used_blocks)
+		ocfs2_bitmap_free(used_blocks);
 
 	return ret;
 }

Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/pass0.c	2004-11-17 19:11:25 UTC (rev 406)
@@ -67,10 +67,9 @@
 	uint32_t	cs_chain_no;
 };
 
-/* returns 0 if the group desc is valid */
-static int check_group_desc(o2fsck_state *ost, ocfs2_dinode *di,
-			    struct chain_state *cs, ocfs2_group_desc *bg,
-			    uint64_t blkno)
+static errcode_t check_group_desc(o2fsck_state *ost, ocfs2_dinode *di,
+				  struct chain_state *cs, ocfs2_group_desc *bg,
+				  uint64_t blkno)
 {
 	int changed = 0;
 
@@ -158,6 +157,92 @@
 	return 0;
 }
 
+struct chain_block_res {
+	unsigned	cb_new_next_blkno:1;
+	uint64_t	cb_next_blkno;
+	errcode_t	cb_err;
+};
+
+static void read_chain_block(o2fsck_state *ost, ocfs2_dinode *di,
+		       struct chain_state *cs, uint64_t blkno,
+		       ocfs2_group_desc *bg, ocfs2_bitmap *allowed,
+		       struct chain_block_res *cbr)
+{
+	int was_set;
+	errcode_t ret;
+
+	memset(cbr, 0, sizeof(*cbr));
+
+	if (ocfs2_block_out_of_range(ost->ost_fs, blkno)) {
+		if (prompt(ost, PY, "Chain %d in allocator at inode %"PRIu64" "
+			   "points to block %"PRIu64" which is out of range. "
+			   "Truncate this chain by deleting this invalid "
+			   "block reference?", cs->cs_chain_no, di->i_blkno,
+			   blkno))  {
+			cbr->cb_new_next_blkno = 1;
+			cbr->cb_next_blkno = 0;
+		} else {
+			cbr->cb_err = OCFS2_ET_BAD_BLKNO;
+		}
+
+		goto out;
+	}
+
+	if (o2fsck_test_block_used(ost, blkno) &&
+	    prompt(ost, PY, "Chain %d in allocator at inode %"PRIu64" "
+			   "points to block %"PRIu64" which has already been "
+			   "used by another part of the file system. "
+			   "Truncate this chain by deleting this invalid "
+			   "block reference?", cs->cs_chain_no, di->i_blkno,
+			   blkno))  {
+		cbr->cb_new_next_blkno = 1;
+		cbr->cb_next_blkno = 0;
+	}
+
+	if (allowed) {
+		ocfs2_bitmap_test(allowed, blkno, &was_set);
+		if (!was_set &&
+		    prompt(ost, PY, "Chain %d in allocator at inode %"PRIu64" "
+			   "points to block %"PRIu64" which should not be "
+			   "found in the allocator.  Truncate this chain by "
+			   "deleting this invalid block reference?",
+			   cs->cs_chain_no, di->i_blkno, blkno))  {
+			cbr->cb_new_next_blkno = 1;
+			/* will set next after reading */
+		} 
+	}
+
+	ret = ocfs2_read_group_desc(ost->ost_fs, blkno, (char *)bg);
+	if (ret) {
+		com_err(whoami, ret, "while reading a group descriptor from "
+			"block %"PRIu64" as pointed to by chain %d in "
+			"allocator at inode %"PRIu64, blkno, 
+			cs->cs_chain_no, di->i_blkno);
+		cbr->cb_err = ret;
+		goto out;
+	}
+
+	if (cbr->cb_new_next_blkno) {
+		cbr->cb_next_blkno = bg->bg_next_group;
+		goto out;
+	}
+
+	ret = check_group_desc(ost, di, cs, bg, blkno);
+	if (ret) {
+		if (prompt(ost, PY, "Chain %d in allocator at inode %"PRIu64" "
+			   "refers to an invalid descriptor block at "
+			   "%"PRIu64".  Truncate the chain by removing this "
+			   "reference?", cs->cs_chain_no, di->i_blkno,
+			   blkno)) {
+			cbr->cb_new_next_blkno = 1;
+			cbr->cb_next_blkno = 0;
+		}
+	}
+
+out:
+	return;
+}
+
 /*
  * this function is pretty hairy.  for dynamic chain allocators
  * it is just walking the chains to verify the group descs
@@ -178,8 +263,8 @@
 	ocfs2_group_desc *bg2 = (ocfs2_group_desc *)buf2;
 	uint64_t blkno;
 	errcode_t ret;
-	int rc, changed = 0, remove = 0;
-	int was_set;
+	int changed = 0;
+	struct chain_block_res cbr;
 
 	verbosef("free %u total %u blkno %"PRIu64"\n", chain->c_free,
 		 chain->c_total, chain->c_blkno);
@@ -190,135 +275,54 @@
 	if (blkno == 0)
 		goto out;
 
-	if (ocfs2_block_out_of_range(ost->ost_fs, blkno)) {
-		if (prompt(ost, PY, "Chain record %d in group allocator inode "
-			    "%"PRIu64" points to block %"PRIu64" which is out "
- 			    "of range.  Empty this chain by deleting this "
-			    "invalid block reference?", cs->cs_chain_no,
-			    di->i_blkno, blkno))  {
-			chain->c_blkno = 0;
-			changed = 1;
-		}
-
+	read_chain_block(ost, di, cs, blkno, bg1, allowed, &cbr);
+	if (cbr.cb_err) {
+		ret = cbr.cb_err;
 		goto out;
 	}
 
-	if (allowed) {
-		ocfs2_bitmap_test(allowed, blkno, &was_set);
-		if (!was_set &&
-		    prompt(ost, PY, "Chain record %d in chain allocator inode "
-			   "%"PRIu64" points to group descriptor block "
-			   "%"PRIu64" which should not be found in the "
-			   "allocator.  Remove this group descriptor block?",
-			   cs->cs_chain_no, di->i_blkno, blkno))  {
-			remove = 1;
-		}
-	}
-
-	ret = ocfs2_read_group_desc(ost->ost_fs, blkno, buf1);
-	if (ret) {
-		com_err(whoami, ret, "while reading a group descriptor from "
-			"block %"PRIu64" as pointed to by chain record %d in "
-			"group allocator inode %"PRIu64, blkno, 
-			cs->cs_chain_no, di->i_blkno);
-		goto out;
-	}
-
-	if (remove) {
-		chain->c_blkno = bg1->bg_next_group;
+	if (cbr.cb_new_next_blkno) {
+		chain->c_blkno = cbr.cb_new_next_blkno;
 		changed = 1;
-		remove = 0;
 		goto new_head;
 	}
 
-	ret = check_group_desc(ost, di, cs, bg1, blkno);
-	if (ret) {
-		if (prompt(ost, PY, "Chain %d in group allocator inode "
-			   "%"PRIu64" points to an invalid descriptor block "
-			   "at %"PRIu64".  Delete the chain?",
-			   cs->cs_chain_no, di->i_blkno, blkno)) {
-			chain->c_blkno = 0;
-			changed = 1;
-		}
-		goto out;
-	}
-
 	if (allowed)
 		ocfs2_bitmap_clear(allowed, chain->c_blkno, NULL);
+	o2fsck_mark_block_used(ost, chain->c_blkno);
 
 	/* read in each group desc and check it.  In this loop bg1 is 
 	 * verified and in the chain.  it's bg2 that is considered.  if
 	 * bg2 is found lacking we overwrite bg1's next_group and check
 	 * again */
 	while (bg1->bg_next_group) {
-		int write = 0;
-		/* see if we're about to reference a block that we shouldn't */
-		if (allowed) {
-			ocfs2_bitmap_test(allowed, bg1->bg_next_group, 
-					  &was_set);
-			if (!was_set &&
-			    prompt(ost, PY, "Chain %d in chain allocator "
-				   "inode %"PRIu64" points to group "
-				   "descriptor block %"PRIu64" which should "
-				   "not be found in the allocator.  Remove "
-				   "this group descriptor block?",
-				   cs->cs_chain_no, di->i_blkno, blkno))  {
-				remove = 1;
-			}
-		}
-		/* read the next desc in either to verify it or just to
-		 * grab the reference to the desc after it */
-		ret = ocfs2_read_group_desc(ost->ost_fs, bg1->bg_next_group,
-					    buf2);
-		if (ret) {
-			com_err(whoami, ret, "while reading a group descriptor "
-				    "from block %"PRIu64" as pointed to by "
-				    "chain record %d in group allocator inode "
-				    "%"PRIu64, bg1->bg_next_group, 
-				    cs->cs_chain_no, di->i_blkno);
+
+		read_chain_block(ost, di, cs, bg1->bg_next_group, bg2, allowed,
+				 &cbr);
+		if (cbr.cb_err) {
+			ret = cbr.cb_err;
 			goto out;
-		} 
-
-		/* skip over this desc that we've been told to remove it */
-		if (remove) {
-			bg1->bg_next_group = bg2->bg_next_group;
-			write = 1;
-			remove = 0;
-			goto write_bg1;
 		}
 
-		rc = check_group_desc(ost, di, cs, bg2, bg1->bg_next_group);
-		if (rc == 0) {
+		if (!cbr.cb_new_next_blkno) {
 			if (allowed)
 				ocfs2_bitmap_clear(allowed, bg2->bg_blkno,
 						   NULL);
+			o2fsck_mark_block_used(ost, bg2->bg_blkno);
 			memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
 			continue;
 		}
 
-		if (prompt(ost, PY, "Chain %d in group allocator inode "
-			   "%"PRIu64" contains an invalid descriptor block "
-			   "at %"PRIu64".  Truncate the chain to the last "
-			   "valid descriptor block?", cs->cs_chain_no,
-			   di->i_blkno, bg1->bg_next_group)) {
-			bg1->bg_next_group = 0;
-			write = 1;
-		}
+		bg1->bg_next_group = cbr.cb_new_next_blkno;
 
-write_bg1:
-		if (write) {
-			ret = ocfs2_write_group_desc(ost->ost_fs, 
-						     bg1->bg_blkno,
-						     (char *)bg1);
-			if (ret) {
-				com_err(whoami, ret, "while writing a group "
-					"descriptor to block %"PRIu64" "
-					"somewhere in chain %d in group "
-					"allocator inode %"PRIu64, 
-					bg1->bg_blkno, cs->cs_chain_no,
-					di->i_blkno);
-				ost->ost_write_error = 1;
-			}
+		ret = ocfs2_write_group_desc(ost->ost_fs, bg1->bg_blkno,
+					     (char *)bg1);
+		if (ret) {
+			com_err(whoami, ret, "while writing a group "
+				"descriptor to block %"PRIu64" somewhere in "
+				"chain %d in group allocator inode %"PRIu64, 
+				bg1->bg_blkno, cs->cs_chain_no, di->i_blkno);
+			ost->ost_write_error = 1;
 		}
 	}
 

Modified: trunk/fsck.ocfs2/pass1.c
===================================================================
--- trunk/fsck.ocfs2/pass1.c	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/pass1.c	2004-11-17 19:11:25 UTC (rev 406)
@@ -37,6 +37,12 @@
  *  - bitmap of which inodes are directories or regular files
  *  - directory blocks that it finds off of directory inodes
  *
+ * The end of Pass 1 is when the found block bitmap should contain all the
+ * blocks in the system that are in use.  This is used to derive the set of
+ * clusters that should be allocated.  The cluster chain allocator is loaded
+ * and synced up with this set and potentially written back.  After that point
+ * fsck can use libocfs2 to allocate and free clusters as usual.
+ *
  * XXX
  * 	check many, many, more i_ fields for each inode type
  * 	make sure the inode's dtime/count/valid match in update_inode_alloc
@@ -59,19 +65,6 @@
 
 static const char *whoami = "pass1";
 
-/* XXX need to, you know, do things with this. */
-int o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno)
-{
-	int was_set;
-
-	ocfs2_bitmap_set(ost->ost_found_blocks, blkno, &was_set);
-
-	if (was_set) /* XX can go away one all callers handle this */
-		verbosef("!! duplicate block %"PRIu64"\n", blkno);
-
-	return was_set;
-}
-
 void o2fsck_free_inode_allocs(o2fsck_state *ost)
 {
 	uint16_t i;
@@ -125,6 +118,10 @@
 			continue;
 		}
 
+		/* hmm, de hmm.  it would be kind of nice if the bitmaps
+		 * didn't use 'int' but rather some real boolean construct */
+		oldval = !!oldval;
+
 		/* this node covers the inode.  see if we've changed the 
 		 * bitmap and if the user wants us to keep tracking it and
 		 * write back the new map */
@@ -367,7 +364,7 @@
 
 	if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) || (blkno > fs->fs_blocks)) {
 		vb->vb_errors++;
-#if 0 /* ext2 does this by returning a value to libext2 which clears the 
+#if 0 /* XXX ext2 does this by returning a value to libext2 which clears the 
 	 block from the inode's allocation */
 		if (prompt(ost, PY, "inode %"PRIu64" references bad physical "
 			   "block %"PRIu64" at logical block %"PRIu64", "
@@ -387,6 +384,8 @@
 		}
 	}
 
+	o2fsck_mark_block_used(ost, blkno);
+
 	if (S_ISDIR(di->i_mode)) {
 		verbosef("adding dir block %"PRIu64"\n", blkno);
 		o2fsck_add_dir_block(&ost->ost_dirblocks, di->i_blkno, blkno,
@@ -401,13 +400,17 @@
 	return 0;
 }
 
+/* XXX this is only really building up the vb data so that the caller can
+ * verify the chain allocator inode's fields.  I wonder if we shouldn't have
+ * already done that in pass 0. */
 static int check_gd_block(ocfs2_filesys *fs, uint64_t gd_blkno, int chain_num,
 			   void *priv_data)
 {
 	struct verifying_blocks *vb = priv_data;
 	verbosef("found gd block %"PRIu64"\n", gd_blkno);
+	/* XXX should arguably be verifying that pass 0 marked the group desc
+	 * blocks found */
 	/* don't have bcount */
-	o2fsck_mark_block_used(vb->vb_ost, gd_blkno);
 	vb_saw_block(vb, vb->vb_num_blocks);
 	return 0;
 }
@@ -505,6 +508,149 @@
 	}
 }
 
+/* once we've iterated all the inodes we should have the current working
+ * set of which blocks we think are in use.  we use this to derive the set
+ * of clusters that should be allocated in the cluster chain allocators.  we
+ * don't iterate over all clusters like we do inodes.. */
+static void write_cluster_alloc(o2fsck_state *ost)
+{
+	ocfs2_cached_inode *ci = NULL;
+	errcode_t ret;
+	uint64_t blkno, last_cbit, cbit, cbit_found;
+	struct ocfs2_cluster_group_sizes cgs;
+
+	ocfs2_calc_cluster_groups(ost->ost_fs->fs_clusters,
+				  ost->ost_fs->fs_blocksize, &cgs);
+
+	/* first load the cluster chain alloc so we can compare */
+	ret = ocfs2_lookup_system_inode(ost->ost_fs,
+					GLOBAL_BITMAP_SYSTEM_INODE, 0, &blkno);
+	if (ret) {
+		com_err(whoami, ret, "while looking up the cluster bitmap "
+			"allocator inode");
+		goto out;
+	}
+
+	/* load in the cluster chain allocator */
+	ret = ocfs2_read_cached_inode(ost->ost_fs, blkno, &ci);
+	if (ret) {
+		com_err(whoami, ret, "while reading the cluster bitmap "
+			"allocator inode from block %"PRIu64, blkno);
+		goto out;
+	}
+
+	ret = ocfs2_load_chain_allocator(ost->ost_fs, ci);
+	if (ret) {
+		com_err(whoami, ret, "while loading the cluster bitmap "
+			"allocator from block %"PRIu64, blkno);
+		goto out;
+	}
+
+	/* we walk our found blocks bitmap to find clusters that we think
+	 * are in use.  each time we find a block in a cluster we skip ahead
+	 * to the first block of the next cluster when looking for the next.
+	 *
+	 * once we have a cluster we think is allocated we walk the cluster
+	 * chain alloc bitmaps from the last cluster we thought was allocated
+	 * to make sure that all the bits are cleared on the way.
+	 *
+	 * we special case the number of clusters as the cluster offset which
+	 * indicates that the rest of the bits to the end of the bitmap
+	 * should be clear.
+	 */
+	last_cbit = 0;
+	blkno = 0;
+	cbit = 0;
+	for ( ; cbit < ost->ost_fs->fs_clusters; 
+	        blkno = ocfs2_clusters_to_blocks(ost->ost_fs, cbit + 1),
+		last_cbit = cbit + 1) {
+
+		verbosef("starting with blkno %"PRIu64"\n", blkno);
+
+		ret = ocfs2_bitmap_find_next_set(ost->ost_found_blocks, blkno,
+						 &blkno);
+
+		/* clear to the end */
+		if (ret == OCFS2_ET_BIT_NOT_FOUND)
+			cbit = ost->ost_fs->fs_clusters;
+		else {
+			uint64_t cgroup, cluster;
+			/* the libocfs2 bitmap interfaces names bit ranges
+			 * by the block the desc starts on.  so to find
+			 * a bit number for a given cluster we find the 
+			 * block offset of the start of the cluster group
+			 * and add the offset of the cluster in its cluster
+			 * group */
+			cluster = ocfs2_blocks_to_clusters(ost->ost_fs, blkno);
+			cgroup = cluster - (cluster % cgs.cgs_cpg);
+			cbit = ocfs2_clusters_to_blocks(ost->ost_fs, cgroup);
+
+			cbit += cluster % cgs.cgs_cpg;
+		}
+
+		ret = ocfs2_bitmap_find_next_set(ci->ci_chains, last_cbit, 
+						 &cbit_found);
+		if (ret == OCFS2_ET_BIT_NOT_FOUND)
+			cbit_found = ost->ost_fs->fs_clusters;
+
+		verbosef("blkno %"PRIu64" cbit %"PRIu64" last_cbit %"PRIu64" "
+			 "cbit_found %"PRIu64"\n", blkno, cbit, last_cbit,
+			 cbit_found);
+
+		if (cbit_found == cbit)
+			continue;
+
+		if (!ost->ost_write_cluster_alloc_asked) {
+			int yn;
+			yn = prompt(ost, PY, "The cluster bitmap doesn't "
+				    "match what fsck thinks should be in use "
+				    "and freed.  Update the bitmap on disk?");
+			ost->ost_write_cluster_alloc_asked = 1;
+			ost->ost_write_cluster_alloc = !!yn;
+			if (!ost->ost_write_cluster_alloc)
+				goto out;
+		}
+
+		/* clear set bits that should have been clear up to cbit */
+		while (cbit_found < cbit) {
+			ret = ocfs2_chain_force_val(ost->ost_fs, ci,
+						    cbit_found, 0, NULL);
+			if (ret) {
+				com_err(whoami, ret, "while trying to clear "
+					"bit %"PRIu64" in the cluster bitmap.",
+					cbit_found);
+				goto out;
+			}
+			cbit_found++;
+			ret = ocfs2_bitmap_find_next_set(ci->ci_chains, cbit, 
+							 &cbit_found);
+			if (ret == OCFS2_ET_BIT_NOT_FOUND)
+				cbit_found = ost->ost_fs->fs_clusters;
+		}
+
+		/* make sure cbit is set before moving on */
+		if (cbit_found != cbit && cbit != ost->ost_fs->fs_clusters) {
+			ret = ocfs2_chain_force_val(ost->ost_fs, ci, cbit, 1,
+						    NULL);
+			if (ret) {
+				com_err(whoami, ret, "while trying to set bit "
+					"%"PRIu64" in the cluster bitmap.",
+					cbit);
+				goto out;
+			}
+		}
+	}
+
+	ret = ocfs2_write_chain_allocator(ost->ost_fs, ci);
+	if (ret)
+		com_err(whoami, ret, "while trying to write back the cluster "
+			"bitmap allocator");
+
+out:
+	if (ci)
+		ocfs2_free_cached_inode(ost->ost_fs, ci);
+}
+
 static void write_inode_alloc(o2fsck_state *ost)
 {
 	int max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
@@ -579,6 +725,8 @@
 
 		valid = 0;
 
+		o2fsck_mark_block_used(ost, blkno);
+
 		/* scanners have to skip over uninitialized inodes */
 		if (!memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
 		    strlen(OCFS2_INODE_SIGNATURE)) &&
@@ -600,6 +748,7 @@
 		fatal_error(OCFS2_ET_INTERNAL_FAILURE, "duplicate blocks "
 				"found, need to learn to fix.");
 
+	write_cluster_alloc(ost);
 	write_inode_alloc(ost);
 
 out_close_scan:

Modified: trunk/fsck.ocfs2/util.c
===================================================================
--- trunk/fsck.ocfs2/util.c	2004-11-17 18:45:05 UTC (rev 405)
+++ trunk/fsck.ocfs2/util.c	2004-11-17 19:11:25 UTC (rev 406)
@@ -50,3 +50,23 @@
 		ost->ost_write_error = 1;
 	}
 }
+
+int o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno)
+{
+	int was_set;
+
+	ocfs2_bitmap_set(ost->ost_found_blocks, blkno, &was_set);
+
+	if (was_set) /* XX can go away one all callers handle this */
+		com_err(__FUNCTION__, OCFS2_ET_INTERNAL_FAILURE,
+			"!! duplicate block %"PRIu64, blkno);
+
+	return was_set;
+}
+
+int o2fsck_test_block_used(o2fsck_state *ost, uint64_t blkno)
+{
+	int was_set;
+	ocfs2_bitmap_test(ost->ost_found_blocks, blkno, &was_set);
+	return was_set;
+}



More information about the Ocfs2-tools-commits mailing list