[Ocfs2-tools-commits] zab commits r348 - in trunk: fsck.ocfs2 fsck.ocfs2/include libocfs2 libocfs2/include

Wed Oct 27 18:37:31 CDT 2004

Author: zab
Date: 2004-10-27 18:37:29 -0500 (Wed, 27 Oct 2004)
New Revision: 348

Modified:
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/include/pass1.h
   trunk/fsck.ocfs2/include/problem.h
   trunk/fsck.ocfs2/journal.c
   trunk/fsck.ocfs2/pass0.c
   trunk/fsck.ocfs2/pass1.c
   trunk/libocfs2/include/ocfs2.h
   trunk/libocfs2/ocfs2_err.et.in
Log:
strengthen journal replay and misc fixes encountered along the way.
                                                                                
o add format checking to prompt(), fix up some bonehead callers
o have mark_block_used return whether or not the block was already used
o add comments to the top of journal.c
o don't replay a journal that has bad tags
o don't replay a journal that references another's blocks
o don't replay a journal that references another's blocks
o add some prompting to journal replay, improve messages
o add a '-y' option to force yes
o add ocfs2_{clusters,blocks}_in_{blocks,bytes}()



Modified: trunk/fsck.ocfs2/fsck.c
===================================================================

--- trunk/fsck.ocfs2/fsck.c	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/fsck.ocfs2/fsck.c	2004-10-27 23:37:29 UTC (rev 348)
@@ -21,7 +21,45 @@
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
- * Authors: Zach Brown
+ * --
+ * Roughly o2fsck performs the following operations.  Each pass' file has
+ * more details.
+ * 
+ * - replay the journals if needed
+ * 	- walk the journal extents looking for simple inconsistencies
+ * 		- loops, doubly referenced blocks
+ * 		- need this code later anyway for verifying files
+ * 		  and i_clusters/i_size
+ * 	- prompt to proceed if errors (mention backup superblock)
+ * 		- ignore entirely or partially replay?
+ *
+ * - pass0: clean up the inode allocators
+ * 	- kill loops, chains can't share groups
+ * 	- move local allocs back to the global or something?
+ * 	- verify just enough of the fields to make iterating work
+ *
+ * - pass1: walk inodes
+ * 	- record all valid clusters that inodes point to
+ * 	- make sure extent trees in inodes are consistent
+ * 	- inconsistencies mark inodes for deletion
+ * 	- update cluster bitmap
+ * 		- have bits reflect our set of referenced clusters
+ * 		- again, how to resolve local/global?
+ * 		* from this point on the library can trust the cluster bitmap
+ *
+ * 	- update the inode allocators
+ * 		- make sure our set of valid inodes matches the bits
+ * 		- make sure all the bit totals add up
+ * 		* from this point on the library can trust the inode allocators
+ *
+ * This makes it so only these early passes need to have global 
+ * allocation goo in memory.  The rest can use the library as 
+ * usual.
+ *
+ * so what do we do about the extent metadata allocators?  track them in
+ * the same way we track inodes in the inode suballocators, I guess.  store
+ * with whatever key they have.  do the suballocators only allocate extent
+ * list blocks that are only owned by a tree?  that'd make it pretty easy.
  */
 #include <getopt.h>
 #include <limits.h>
@@ -184,7 +222,7 @@
 
 	initialize_ocfs_error_table();
 
-	while((c = getopt(argc, argv, "b:B:npv")) != EOF) {
+	while((c = getopt(argc, argv, "b:B:npvy")) != EOF) {
 		switch (c) {
 			case 'b':
 				blkno = read_number(optarg);
@@ -224,6 +262,11 @@
 				ost->ost_answer = 1;
 				break;
 
+			case 'y':
+				ost->ost_ask = 0;
+				ost->ost_answer = 1;
+				break;
+
 			case 'v':
 				verbose = 1;
 				break;

Modified: trunk/fsck.ocfs2/include/pass1.h
===================================================================
--- trunk/fsck.ocfs2/include/pass1.h	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/fsck.ocfs2/include/pass1.h	2004-10-27 23:37:29 UTC (rev 348)
@@ -27,7 +27,7 @@
 #include "fsck.h"
 
 errcode_t o2fsck_pass1(o2fsck_state *ost);
-void o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno);
+int o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno);
 
 #endif /* __O2FSCK_PASS1_H__ */
 

Modified: trunk/fsck.ocfs2/include/problem.h
===================================================================
--- trunk/fsck.ocfs2/include/problem.h	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/fsck.ocfs2/include/problem.h	2004-10-27 23:37:29 UTC (rev 348)
@@ -33,7 +33,8 @@
 /* returns non-zero for yes and zero for no.  The caller is expected to
  * provide a thorough description of the state and the action that will
  * be taken depending on the answer.  Without \n termination. */
-int prompt(o2fsck_state *ost, unsigned flags, const char *fmt, ...);
+int prompt(o2fsck_state *ost, unsigned flags, const char *fmt, ...)
+	 __attribute__ ((format (printf, 3, 4)));
 
 #endif /* __O2FSCK_PROBLEM_H__ */
 

Modified: trunk/fsck.ocfs2/journal.c
===================================================================
--- trunk/fsck.ocfs2/journal.c	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/fsck.ocfs2/journal.c	2004-10-27 23:37:29 UTC (rev 348)
@@ -21,8 +21,22 @@
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
- * Authors: Zach Brown
+ * --
+ * This replays the jbd journals for each node.  First all the journals are
+ * walked to detect inconsistencies.  Only journals with no problems will be
+ * replayed.  IO errors during replay will just result in partial journal
+ * replay, just like jbd does in the kernel.  Journals that don't pass
+ * consistency checks, like having overlapping blocks or strange fields, are
+ * ignored and left for later passes to clean up.  Overlap testing is done
+ * using o2fsck_state's used block bitmap.  
+
+ * XXX
+ * 	future passes need to guarantee journals exist and are the same size 
+ * 	pass fsck trigger back up, write dirty fs, always zap/write
+ * 	revocation code is totally untested
+ * 	some setup errors, like finding the dlm system inode, are fatal
  */
+
 #include <stdint.h>
 #include <string.h>
 #include <inttypes.h>
@@ -36,12 +50,15 @@
 #include "ocfs2.h"
 #include "ocfs2_disk_dlm.h"
 #include "pass1.h"
+#include "problem.h"
 #include "util.h"
 
 static char *whoami = "journal recovery";
 
 struct journal_info {
 	int			ji_node;
+	unsigned		ji_replay:1;
+
 	uint64_t		ji_ino;
 	struct rb_root		ji_revoke;
 	journal_superblock_t	*ji_jsb;
@@ -185,19 +202,24 @@
 	return block;
 }
 
-static uint64_t count_tags(char *buf, size_t size)
+static errcode_t count_tags(o2fsck_state *ost, char *buf, size_t size,
+			    uint64_t *nr_ret)
 {
 	journal_block_tag_t *tag, *last;
 	uint64_t nr = 0;
 
 	if (size < sizeof(journal_header_t) + sizeof(*tag))
-		return 0;
+		return OCFS2_ET_BAD_JOURNAL_TAG;
 
        	tag = (journal_block_tag_t *)&buf[sizeof(journal_header_t)];
        	last = (journal_block_tag_t *)&buf[size - sizeof(*tag)];
 
 	for(; tag <= last; tag++) {
 		nr++;
+		if (ocfs2_block_out_of_range(ost->ost_fs, 
+					     be32_to_cpu(tag->t_blocknr)))
+			return OCFS2_ET_BAD_JOURNAL_TAG;
+
 		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
 			break;
 		/* inline uuids are 16 bytes, tags are 8 */
@@ -205,34 +227,45 @@
 			tag += 2;
 	}
 
-	return nr;
+	*nr_ret = nr;
+	return 0;
 }
 
 static errcode_t lookup_journal_block(o2fsck_state *ost, 
 				      struct journal_info *ji, 
 				      uint64_t blkoff,
-				      uint64_t *blkno)
+				      uint64_t *blkno,
+				      int check_dup)
 {
 	errcode_t err;
 	int contig;
 
-	err = ocfs2_extent_map_get_blocks(ji->ji_cinode, blkoff,
-					  1, blkno, &contig);
+	err = ocfs2_extent_map_get_blocks(ji->ji_cinode, blkoff, 1, blkno,
+					  &contig);
 	if (err) 
 		com_err(whoami, err, "while looking up logical block "
 			"%"PRIu64" in node %d's journal", blkoff, ji->ji_node);
+
+	if (check_dup && o2fsck_mark_block_used(ost, *blkno)) {
+		printf("Logical block %"PRIu64" in node %d's journal maps to "
+		       "block %"PRIu64" which has already been used in "
+		       "another journal.\n", blkoff, ji->ji_node, *blkno);
+		err = OCFS2_ET_DUPLICATE_BLOCK;
+	}
+
 	return err;
 }
 
 static errcode_t read_journal_block(o2fsck_state *ost, 
 				    struct journal_info *ji, 
 				    uint64_t blkoff, 
-				    char *buf)
+				    char *buf,
+				    int check_dup)
 {
 	errcode_t err;
 	uint64_t	blkno;
 
-	err = lookup_journal_block(ost, ji, blkoff, &blkno);
+	err = lookup_journal_block(ost, ji, blkoff, &blkno, check_dup);
 	if (err)
 		return err;
 
@@ -275,7 +308,7 @@
 		if (revoke_this_block(&ji->ji_revoke, tag.t_blocknr, seq))
 			goto skip_io;
 
-		err = read_journal_block(ost, ji, *next_block, io_buf);
+		err = read_journal_block(ost, ji, *next_block, io_buf, 1);
 		if (err) {
 			ret = err;
 			goto skip_io;
@@ -308,9 +341,9 @@
 static errcode_t walk_journal(o2fsck_state *ost, int node, 
 			      struct journal_info *ji, char *buf, int recover)
 {
-	errcode_t err = 0;
+	errcode_t err, ret = 0;
 	uint32_t next_seq;
-	uint64_t next_block;
+	uint64_t next_block, nr;
 	journal_superblock_t *jsb = ji->ji_jsb;
 	journal_header_t jh;
 
@@ -321,16 +354,23 @@
 	if (next_block == 0)
 		return 0;
 
-	while(1) {
-		verbosef("next_seq %"PRIu32" next_block %"PRIu64"\n", next_seq, 
+	/* ret is set when bad tags are seen in the first scan and when there
+	 * are io errors in the recovery scan.  Only stop walking the journal
+	 * when bad tags are seen in the first scan. */
+	while(recover || !ret) {
+		verbosef("next_seq %"PRIu32" final_seq %"PRIu32" next_block "
+			 "%"PRIu64"\n", next_seq, ji->ji_final_seq,
 			 next_block);
 
 		if (recover && seq_geq(next_seq, ji->ji_final_seq))
 			break;
 
-		err = read_journal_block(ost, ji, next_block, buf);
-		if (err)
+		/* only mark the blocks used on the first pass */
+		err = read_journal_block(ost, ji, next_block, buf, !recover);
+		if (err) {
+			ret = err;
 			break;
+		}
 
 		next_block = jwrap(jsb, next_block + 1);
 
@@ -353,14 +393,21 @@
 		switch(jh.h_blocktype) {
 		case JFS_DESCRIPTOR_BLOCK:
 			verbosef("found a desc type %x\n", jh.h_blocktype);
-			if (!recover) {
-				next_block = jwrap(jsb, next_block + 
-					    count_tags(buf, jsb->s_blocksize));
+			/* replay the blocks described in the desc block */
+			if (recover) {
+				err = replay_blocks(ost, ji, buf, next_seq, 
+						    &next_block);
+				if (err)
+					ret = err;
 				continue;
 			}
 
-			err = replay_blocks(ost, ji, buf, next_seq, 
-					    &next_block);
+			/* just record the blocks as used and carry on */ 
+			err = count_tags(ost, buf, jsb->s_blocksize, &nr);
+			if (err)
+				ret = err;
+			else
+				next_block = jwrap(jsb, next_block + nr);
 			break;
 
 		case JFS_COMMIT_BLOCK:
@@ -386,14 +433,15 @@
 		ji->ji_set_final_seq = 1;
 		ji->ji_final_seq = next_seq;
 	} else if (ji->ji_final_seq != next_seq) {
-		err = OCFS2_ET_IO;
-		com_err(whoami, err, "while recovering the journal and found "
-			"that we arrived at seq %"PRIu32" instead of seq "
-			"%"PRIu32" as we expected from a previous scan.",
-			next_seq, ji->ji_final_seq);
+		printf("Replaying node %d's journal stopped at seq %"PRIu32" "
+		       "but an initial scan indicated that it should have "
+		       "stopped at seq %"PRIu32"\n", ji->ji_node, next_seq,
+		       ji->ji_final_seq);
+		if (ret == 0)
+			err = OCFS2_ET_IO;
 	}
 
-	return err;
+	return ret;
 }
 
 static errcode_t prep_journal_info(o2fsck_state *ost, int node,
@@ -430,7 +478,7 @@
 		goto out;
 	}
 
-	err = lookup_journal_block(ost, ji, 0, &ji->ji_jsb_block);
+	err = lookup_journal_block(ost, ji, 0, &ji->ji_jsb_block, 1);
 	if (err)
 		goto out;
 
@@ -463,34 +511,14 @@
 	return pub->mounted;
 }
 
-/* XXX The only job this has is to replay the journal if it can.  It doesn't
- * participate in book-keeping and doesn't try to fix up the journals.  It is
- * just replaying as much as it can for the main fsck passes.
- *
- * For now this is very simple and paranoid.  Any errors encountered
- * are fatal and stop fsck.  I propose:
- *
- * - Allocation errors are always fatal.  If we can't allocate what little
- *   we need to replay the journals there's no way we will be able to
- *   perform a full fsck.  Instead of wiping the journal we should leave
- *   the task to someone with enough mem (it won't be a lot, this is largely
- *   academic.)
- *
- * - block IO errors should only effect the bits of journal recovery
- *   they hit.  The rest should be recovered and fsck can pick up
- *   the pieces.  remapping around bad blocks, etc.
- *
- * - Missing journals, insane fields, etc, should be cleared and left
- *   for fsck to pick up.
- *
- * XXX pass fsck trigger back up, write dirty fs, always zap/write */
+/* XXX be more strict with the error codes that trickle up to here */
 errcode_t o2fsck_replay_journals(o2fsck_state *ost)
 {
 	errcode_t err = 0, ret = 0;
 	struct journal_info *jis, *ji;
 	journal_superblock_t *jsb;
 	char *buf = NULL, *dlm_buf = NULL;
-	int i, max_nodes, buflen;
+	int i, max_nodes, buflen, journal_trouble = 0;
 	uint64_t dlm_ino;
 
 	max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
@@ -522,40 +550,43 @@
 		goto out;
 	}
 
-	for (i = 0; i < max_nodes ; i++) {
+	printf("Checking each node's journal.\n");
+
+	for (i = 0, ji = jis; i < max_nodes; i++, ji++) {
 		if (!publish_mounted_set(ost->ost_fs, dlm_buf, i, max_nodes)) {
 			verbosef("node %d is clean\n", i);
 			continue;
 		}
+		ji->ji_replay = 1;
+
 		/* check mounted bits in the publish doo-dah. */
-		err = prep_journal_info(ost, i, &jis[i]);
-		if (err) {
-			ret = err;
-			continue;
-		}
+		err = prep_journal_info(ost, i, ji);
+		if (err == 0)
+			err = walk_journal(ost, i, ji, buf, 0);
 
-		err = walk_journal(ost, i, &jis[i], buf, 0);
 		if (err) {
-			ret = err;
-			continue;
+			ji->ji_replay = 0;
+			printf("Node %d's journal can not be replayed.\n", i);
+			journal_trouble = 1;
 		}
 	}
 
-	for (i = 0, ji = jis; ret == 0 && i < max_nodes; i++, ji++) {
-		if (!ji->ji_ino)
+	for (i = 0, ji = jis; i < max_nodes; i++, ji++) {
+		if (!ji->ji_replay)
 			continue;
 
+		if (!prompt(ost, PY, "Node %d's journal needs to be replayed. "
+			    "Do so?", i)) {
+			journal_trouble = 1;
+			continue;
+		}
+
 		err = walk_journal(ost, i, ji, buf, 1);
 		if (err) {
-			ret = err;
+			journal_trouble = 1;
 			continue;
 		} 
 
-		/* only write back the journal super block if we were
-		 * able to replay the journal */
-		if (ji->ji_jsb_block == 0)
-			continue;
-
 		jsb = ji->ji_jsb;
 		/* reset the journal */
 		jsb->s_start = 0;
@@ -563,14 +594,40 @@
 		if (ji->ji_set_final_seq)
 			jsb->s_sequence = ji->ji_final_seq + 1;
 
+		/* we don't write back a clean 'mounted' bit here.  That would
+		 * have to also include having recovered the orphan dir.  we
+		 * updated s_start, though, so we won't replay the journal
+		 * again. */
 		err = ocfs2_write_journal_superblock(ost->ost_fs,
 						     ji->ji_jsb_block,
 						     (char *)ji->ji_jsb);
-		if (err)
-			ret = err;
+		if (err) {
+			com_err(whoami, err, "while writing node %d's journal "
+				"super block", i);
+			journal_trouble = 1;
+		}
+
+		printf("Node %d's journal replayed successfully.\n", i);
 	}
 
+	/* XXX make sure we maintain journal_trouble in all cases */
+	if (journal_trouble && 
+	    !prompt(ost, PN, "There were problems replaying journals.  This "
+		    "means that the file system is almost certainly badly "
+		    "damanged and that fsck might do more harm than good if "
+		    "it continues to try and repair.  Should fsck continue "
+		    "trying to repair the filesystem?")) {
+		printf("Exiting.\n");
+		exit(FSCK_ERROR);
+	}
+
 out:
+	if (ret) {
+		printf("fsck does not deal gracefully with failure to even "
+		       "discover a volume's journals.  Exiting.\n");
+		exit(FSCK_ERROR);
+	}
+
 	if (jis) {
 		for (i = 0, ji = jis; ret == 0 && i < max_nodes; i++, ji++) {
 			if (ji->ji_jsb)

Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/fsck.ocfs2/pass0.c	2004-10-27 23:37:29 UTC (rev 348)
@@ -21,8 +21,14 @@
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
- * Authors: Zach Brown
+ * Pass 0 verifies that the inode suballocators can be iterated over by
+ * latter passes without risk of running into corruption.  Usually our passes
+ * are analagous to ext{2,3} but ocfs2's allocation is more dynamic.  As 
+ * pass 0 it only makes sure that basic iteration in pass 1 will work.  After
+ * building a set of active inodes Pass 1 will call back into us to sync the
+ * bitmaps with the active inodes.
  */
+
 #include <string.h>
 #include <inttypes.h>
 #include <time.h>
@@ -49,8 +55,10 @@
 			    struct chain_state *cs, ocfs2_group_desc *bg,
 			    uint64_t blkno)
 {
-	verbosef("checking desc at %"PRIu64" bg: %"PRIu64"\n", blkno, 
-		 bg->bg_blkno);
+	verbosef("checking desc at %"PRIu64"; blkno %"PRIu64" size %u bits %u "
+		 "free_bits %u chain %u generation %u\n", blkno, bg->bg_blkno,
+		 bg->bg_size, bg->bg_bits, bg->bg_free_bits_count, 
+		 bg->bg_chain, bg->bg_generation);
 
 	/* We'll only consider this a valid descriptor if its signature,
 	 * parent inode, and generation all check out */
@@ -114,6 +122,9 @@
 	errcode_t ret;
 	int rc;
 
+	verbosef("free %u total %u blkno %"PRIu64"\n", chain->c_free,
+		 chain->c_total, chain->c_blkno);
+
 	if (ocfs2_block_out_of_range(ost->ost_fs, blkno))
 		return 0;
 
@@ -235,7 +246,10 @@
 		max_chain_rec = cl->cl_next_free_rec;
 
 	for (i = 0; i < max_chain_rec; i++) {
-		cs.cs_chain_no = i;
+		/* clear it for each run */
+		cs = (struct chain_state) {
+			.cs_chain_no = i,
+		};
 		ret = check_chain(ost, di, &cs, &cl->cl_recs[i], buf1, buf2);
 		/* XXX do things :) */
 	}
@@ -243,42 +257,6 @@
 	return 0;
 }
 
-/* 
- * here's a little rough-draft of what I think the procedure should
- * look like.  I'm probably missing things.
- *
- * - replay the journals if needed
- * 	- walk the journal extents looking for simple inconsistencies
- * 		- loops, doubly referenced blocks
- * 		- need this code later anyway for verifying files
- * 		  and i_clusters/i_size
- * 	- prompt to proceed if errors (mention backup superblock)
- * 		- ignore entirely or partially replay?
- *
- * - clean up the inode allocators
- * 	- kill loops, chains can't share groups
- * 	- move local allocs back to the global or something?
- * 	- verify just enough of the fields to make iterating work
- *
- * - walk inodes
- * 	- record all valid clusters that inodes point to
- * 	- make sure extent trees in inodes are consistent
- * 	- inconsistencies mark inodes for deletion
- *
- * - update cluster bitmap
- * 	- have bits reflect our set of referenced clusters
- * 	- again, how to resolve local/global?
- * 	* from this point on the library can trust the cluster bitmap
- *
- * - update the inode allocators
- * 	- make sure our set of valid inodes matches the bits
- * 	- make sure all the bit totals add up
- * 	* from this point on the library can trust the inode allocators
- *
- * This makes it so only these early passes need to have global 
- * allocation goo in memory.  The rest can use the library as 
- * usual.
- */
 
 errcode_t o2fsck_pass0(o2fsck_state *ost)
 {
@@ -325,6 +303,7 @@
 					 blocks + ost->ost_fs->fs_blocksize,
 					 blocks + 
 					 (ost->ost_fs->fs_blocksize * 2));
+
 		/* XXX maybe helped by the alternate super block */
 		if (ret) {
 		}

Modified: trunk/fsck.ocfs2/pass1.c
===================================================================
--- trunk/fsck.ocfs2/pass1.c	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/fsck.ocfs2/pass1.c	2004-10-27 23:37:29 UTC (rev 348)
@@ -39,14 +39,17 @@
 
 static const char *whoami = "pass1";
 
-void o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno)
+/* XXX need to, you know, do things with this. */
+int o2fsck_mark_block_used(o2fsck_state *ost, uint64_t blkno)
 {
 	int was_set;
+
 	ocfs2_bitmap_set(ost->ost_found_blocks, blkno, &was_set);
-	if (was_set) {
-		verbosef("duplicate block %"PRIu64"\n", blkno);
-		ocfs2_bitmap_set(ost->ost_dup_blocks, blkno, NULL);
-	}
+
+	if (was_set) /* XX can go away one all callers handle this */
+		verbosef("!! duplicate block %"PRIu64"\n", blkno);
+
+	return was_set;
 }
 
 /* XXX should walk down all the i_fields to make sure we're veryfying
@@ -178,14 +181,6 @@
 	ocfs2_free(&buf);
 }
 
-/* XXX maybe this should be a helper in libocfs2? */
-static uint64_t blocks_holding_bytes(ocfs2_filesys *fs, uint64_t bytes)
-{
-	int b_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
-
-	return (bytes +  fs->fs_blocksize - 1) >> b_bits;
-}
-
 static void check_link_data(struct verifying_blocks *vb)
 {
 	ocfs2_dinode *di = vb->vb_di;
@@ -216,7 +211,7 @@
 		}
 	}
 
-	expected = blocks_holding_bytes(ost->ost_fs, vb->vb_link_len + 1);
+	expected = ocfs2_blocks_in_bytes(ost->ost_fs, vb->vb_link_len + 1);
 
 	if (di->i_size != vb->vb_link_len) {
 		if (prompt(ost, PY, "The target of symlink inode %"PRIu64" "
@@ -235,8 +230,8 @@
 	if (vb->vb_num_blocks != expected) {
 		if (prompt(ost, PN, "The target of symlink inode %"PRIu64" "
 			   "fits in %"PRIu64" blocks but the inode has "
-			   "%"PRIu64" allocated.  Clear the inode?", expected,
-			   di->i_blkno)) {
+			   "%"PRIu64" allocated.  Clear the inode?", 
+			   di->i_blkno, expected, di->i_blkno)) {
 			vb->vb_clear = 1;
 			return;
 		}
@@ -290,15 +285,6 @@
 	return 0;
 }
 
-/* XXX maybe this should be a helper in libocfs2? */
-static uint64_t clusters_holding_blocks(ocfs2_filesys *fs, uint64_t num_blocks)
-{
-	int c_to_b_bits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits -
-		          OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
-
-	return (num_blocks + ((1 << c_to_b_bits) - 1)) >> c_to_b_bits;
-}
-
 static int check_gd_block(ocfs2_filesys *fs, uint64_t gd_blkno, int chain_num,
 			   void *priv_data)
 {
@@ -392,10 +378,10 @@
 #endif
 
 	if (vb.vb_num_blocks > 0)
-		expected = clusters_holding_blocks(fs, vb.vb_last_block + 1);
+		expected = ocfs2_clusters_in_blocks(fs, vb.vb_last_block + 1);
 
 	if (di->i_clusters < expected &&
-	    prompt(ost, PY, "inode %"PRIu64" has %"PRIu64" clusters but its "
+	    prompt(ost, PY, "inode %"PRIu64" has %"PRIu32" clusters but its "
 		   "blocks fit in %"PRIu64" clusters.  Correct the number of "
 		   "clusters?", di->i_blkno, di->i_clusters, expected)) {
 		di->i_clusters = expected;

Modified: trunk/libocfs2/include/ocfs2.h
===================================================================
--- trunk/libocfs2/include/ocfs2.h	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/libocfs2/include/ocfs2.h	2004-10-27 23:37:29 UTC (rev 348)
@@ -403,6 +403,15 @@
 					  void *priv_data),
 			      void *priv_data);
 
+/* 
+ * ${foo}_to_${bar} is a floor function.  blocks_to_clusters will
+ * returns the cluster that contains a block, not the number of clusters
+ * that hold a given number of blocks.
+ *
+ * ${foo}_in_${bar} is a ceiling function.  clusters_in_blocks will give
+ * the number of clusters needed to hold a given number of blocks.
+ */
+
 static inline uint64_t ocfs2_clusters_to_blocks(ocfs2_filesys *fs,
 						uint32_t clusters)
 {
@@ -423,10 +432,35 @@
 	return (uint32_t)(blocks >> b_to_c_bits);
 }
 
+static inline uint64_t ocfs2_blocks_in_bytes(ocfs2_filesys *fs, uint64_t bytes)
+{
+	uint64_t ret = bytes + fs->fs_blocksize - 1;
+
+	if (ret < bytes) /* deal with wrapping */
+		ret = UINT64_MAX;
+		return ret;
+
+	return ret >> OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+}
+
+static inline uint64_t ocfs2_clusters_in_blocks(ocfs2_filesys *fs, 
+						uint64_t blocks)
+{
+	int c_to_b_bits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits -
+		          OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+	uint64_t ret = blocks + ((1 << c_to_b_bits) - 1); 
+
+	if (ret < blocks) /* deal with wrapping */
+		blocks = UINT64_MAX;
+
+	return ret >> c_to_b_bits;
+}
+
 static inline int ocfs2_block_out_of_range(ocfs2_filesys *fs, uint64_t block)
 {
 	return (block < OCFS2_SUPER_BLOCK_BLKNO) || (block > fs->fs_blocks);
 }
 
 
+
 #endif  /* _FILESYS_H */

Modified: trunk/libocfs2/ocfs2_err.et.in
===================================================================
--- trunk/libocfs2/ocfs2_err.et.in	2004-10-27 06:48:43 UTC (rev 347)
+++ trunk/libocfs2/ocfs2_err.et.in	2004-10-27 23:37:29 UTC (rev 348)
@@ -62,6 +62,9 @@
 ec	OCFS2_ET_BAD_JOURNAL_REVOKE,
 	"Invalid revoke record in journal"
 
+ec	OCFS2_ET_BAD_JOURNAL_TAG,
+	"Invalid block tag in journal"
+
 ec	OCFS2_ET_INODE_NOT_VALID,
 	"Inode is not valid (active)"
 
@@ -107,4 +110,7 @@
 ec	OCFS2_ET_EXTENT_NOT_FOUND,
 	"No mapping exists for the given extent range"
 
+ec	OCFS2_ET_DUPLICATE_BLOCK,
+	"Duplicate block discovered"
+
 	end