[Ocfs2-tools-commits] zab commits r462 - in trunk: fsck.ocfs2 fsck.ocfs2/include libocfs2/include

Sat Dec 4 14:38:46 CST 2004

Author: zab
Date: 2004-12-04 14:38:44 -0600 (Sat, 04 Dec 2004)
New Revision: 462

Modified:
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/fsck.ocfs2.8.in
   trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
   trunk/fsck.ocfs2/include/fsck.h
   trunk/fsck.ocfs2/include/journal.h
   trunk/fsck.ocfs2/include/problem.h
   trunk/fsck.ocfs2/include/util.h
   trunk/fsck.ocfs2/journal.c
   trunk/fsck.ocfs2/pass0.c
   trunk/fsck.ocfs2/pass1.c
   trunk/fsck.ocfs2/problem.c
   trunk/fsck.ocfs2/util.c
   trunk/libocfs2/include/ocfs2_fs.h
Log:
o always replay journals when their publish region is mounted
o only do a full check when mnt count or time are exceeded or -f
o add some messages to let the user know what is going on
o don't give replay access to o2fsck_state, it doesn't use it
o clean up the setting and clearing of bits in the global bitmap
o update the lastcheck field in the superblock and write it out


Modified: trunk/fsck.ocfs2/fsck.c
===================================================================

--- trunk/fsck.ocfs2/fsck.c	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/fsck.c	2004-12-04 20:38:44 UTC (rev 462)
@@ -140,11 +140,12 @@
 	return 0;
 }
 
-static errcode_t check_superblock(char *whoami, o2fsck_state *ost)
+static errcode_t check_superblock(o2fsck_state *ost)
 {
 	ocfs2_dinode *di = ost->ost_fs->fs_super;
 	ocfs2_super_block *sb = OCFS2_RAW_SB(di);
 	errcode_t ret = 0;
+	char *whoami = __FUNCTION__;
 
 	if (sb->s_max_nodes == 0) {
 		printf("The superblock max_nodes field is set to 0.\n");
@@ -166,14 +167,102 @@
 	return ret;
 }
 
-static void exit_if_skipping(o2fsck_state *ost)
+static errcode_t write_out_superblock(o2fsck_state *ost)
 {
+	ocfs2_dinode *di = ost->ost_fs->fs_super;
+	ocfs2_super_block *sb = OCFS2_RAW_SB(di);
+
+	sb->s_errors = ost->ost_saw_error;
+	sb->s_lastcheck = time(NULL);
+	sb->s_mnt_count = 0;
+
+	return ocfs2_write_super(ost->ost_fs);
+}
+
+static void scale_time(time_t secs, unsigned *scaled, char **units)
+{
+	if (secs < 60) {
+		*units = "seconds";
+		goto done;
+	}
+	secs /= 60;
+
+	if (secs < 60) {
+		*units = "minutes";
+		goto done;
+	}
+	secs /= 60;
+
+	if (secs < 24) {
+		*units = "hours";
+		goto done;
+	}
+	secs /= 24;
+	*units = "days";
+
+done:
+	*scaled = secs;
+}
+
+/* avoid "warning: `%c' yields only last 2 digits of year in some locales" */
+static size_t ftso_strftime(char *s, size_t max, const char *fmt,
+			    const struct tm *tm) {
+	return strftime(s, max, fmt, tm);
+}
+
+static int fs_is_clean(o2fsck_state *ost, char *filename)
+{
+	ocfs2_super_block *sb = OCFS2_RAW_SB(ost->ost_fs->fs_super);
+	time_t now = time(NULL);
+	time_t next = sb->s_lastcheck + sb->s_checkinterval;
+	static char reason[4096] = {'\0', };
+	struct tm local;
+
 	if (ost->ost_force)
-		return;
+		strcpy(reason, "was run with -f");
+	else if (sb->s_state & OCFS2_ERROR_FS)
+		strcpy(reason, "contains a file system with errors");
+	else if (sb->s_max_mnt_count > 0 &&
+		 sb->s_mnt_count > sb->s_max_mnt_count) {
+		sprintf(reason, "has been mounted %u times without being "
+			"checked", sb->s_mnt_count);
+	} else if (sb->s_checkinterval > 0 && now >= next) {
+		unsigned scaled_time;
+		char *scaled_units;
 
-	/* XXX do something with s_state, _mnt_count, checkinterval,
-	 * etc. */
-	return;
+		scale_time(now - sb->s_lastcheck, &scaled_time, &scaled_units);
+		sprintf(reason, "has gone %u %s without being checked",
+			scaled_time, scaled_units);
+	}
+
+	if (reason[0]) {
+		printf("%s %s, check forced.\n", filename, reason);
+		return 0;
+	}
+
+	reason[0] = '\0';
+
+	if (sb->s_max_mnt_count > 0)
+		sprintf(reason, "after %u additional mounts", 
+			sb->s_max_mnt_count - sb->s_mnt_count);
+
+	if (sb->s_checkinterval > 0) {
+		localtime_r(&next, &local);
+
+		if (reason[0])
+			ftso_strftime(reason + strlen(reason),
+				 sizeof(reason) - strlen(reason),
+			 	 " or by %c, whichever comes first", &local);
+		else
+			ftso_strftime(reason, sizeof(reason), "by %c", &local);
+	}
+
+	printf("%s is clean.", filename);
+
+	if (reason[0])
+		printf("  It will be checked %s.\n", reason);
+
+	return 1;
 }
 
 static void print_label(o2fsck_state *ost)
@@ -236,13 +325,91 @@
 	exit(FSCK_USAGE);
 }
 
+static errcode_t open_and_check(o2fsck_state *ost, char *filename,
+				int open_flags, uint64_t blkno,
+				uint64_t blksize)
+{
+	errcode_t ret;
+	char *whoami = __FUNCTION__;
+
+	ret = ocfs2_open(filename, open_flags, blkno, blksize, &ost->ost_fs);
+	if (ret) {
+		com_err(whoami, ret, "while opening file \"%s\"", filename);
+		goto out;
+	}
+
+	ret = check_superblock(ost);
+	if (ret)
+		goto out;
+
+	ret = o2fsck_read_publish(ost);
+	if (ret)
+		goto out;
+
+out:
+	return ret;
+}
+
+static errcode_t maybe_replay_journals(o2fsck_state *ost, char *filename,
+				       int open_flags, uint64_t blkno,
+				       uint64_t blksize)
+{	
+	int replayed = 0;
+	errcode_t ret = 0;
+	char *whoami = __FUNCTION__;
+
+	if (!ost->ost_stale_mounts)
+		goto out;
+
+	if (!(ost->ost_fs->fs_flags & OCFS2_FLAG_RW)) {
+		printf("** Skipping journal replay because -n was "
+		       "given.  There may be spurious errors that "
+		       "journal replay would fix. **\n");
+		goto out;
+	}
+
+	printf("%s wasn't cleanly unmounted by all nodes.  Attempting to "
+	       "replay the journals for nodes that didn't unmount cleanly",
+	       filename);
+
+	/* journal replay is careful not to use ost as we only really
+	 * build it up after spraying the journal all over the disk
+	 * and reopening */
+	ret = o2fsck_replay_journals(ost->ost_fs, ost->ost_publish,
+				     &replayed);
+	if (ret)
+		goto out;
+
+	/* if the journals were replayed we close the fs and start
+	 * over */
+	if (!replayed)
+		goto out;
+
+	ret = ocfs2_close(ost->ost_fs);
+	if (ret) {
+		com_err(whoami, ret, "while closing \"%s\"", filename);
+		goto out;
+	}
+
+	ret = open_and_check(ost, filename, open_flags, blkno, blksize);
+	if (ret) {
+		printf("fsck saw unrecoverable errors while "
+		       "re-opening the super block and will not "
+		       "continue.\n");
+		goto out;
+	}
+out:
+	return ret;
+}
+
 int main(int argc, char **argv)
 {
 	char *filename;
 	int64_t blkno, blksize;
 	o2fsck_state _ost, *ost = &_ost;
-	int c, ret, open_flags = OCFS2_FLAG_RW;
+	int c, open_flags = OCFS2_FLAG_RW;
 	int fsck_mask = FSCK_OK;
+	errcode_t ret;
 
 	memset(ost, 0, sizeof(o2fsck_state));
 	ost->ost_ask = 1;
@@ -257,7 +424,7 @@
 	setlinebuf(stderr);
 	setlinebuf(stdout);
 
-	while((c = getopt(argc, argv, "b:B:GnpuvVy")) != EOF) {
+	while((c = getopt(argc, argv, "b:B:fGnpuvVy")) != EOF) {
 		switch (c) {
 			case 'b':
 				blkno = read_number(optarg);
@@ -346,30 +513,47 @@
 
 	filename = argv[optind];
 
-	ret = ocfs2_open(filename, open_flags, blkno, blksize, &ost->ost_fs);
+	ret = open_and_check(ost, filename, open_flags, blkno, blksize);
 	if (ret) {
-		com_err(argv[0], ret,
-			"while opening file \"%s\"", filename);
+		printf("fsck saw unrecoverable errors in the super block and "
+		       "will not continue.\n");
 		fsck_mask |= FSCK_ERROR;
 		goto out;
 	}
 
+	printf("Checking OCFS2 filesystem in %s:\n", filename);
+	printf("  label:              ");
+	print_label(ost);
+	printf("  uuid:               ");
+	print_uuid(ost);
+	printf("  number of blocks:   %"PRIu64"\n", ost->ost_fs->fs_blocks);
+	printf("  bytes per block:    %u\n", ost->ost_fs->fs_blocksize);
+	printf("  number of clusters: %"PRIu32"\n", ost->ost_fs->fs_clusters);
+	printf("  bytes per cluster:  %u\n", ost->ost_fs->fs_clustersize);
+	printf("  max nodes:          %u\n\n", 
+	       OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes);
+
+	ret = maybe_replay_journals(ost, filename, open_flags, blkno, blksize);
+	if (ret) {
+		printf("fsck encountered unrecoverable errors while "
+		       "replaying the journals and will not continue\n");
+		fsck_mask |= FSCK_ERROR;
+		goto out;
+	}
+
+	/* allocate all this junk after we've replayed the journal and the
+	 * sb should be stable */
 	if (o2fsck_state_init(ost->ost_fs, argv[0], ost)) {
 		fprintf(stderr, "error allocating run-time state, exiting..\n");
 		fsck_mask |= FSCK_ERROR;
 		goto out;
 	}
 
-	ret = check_superblock(argv[0], ost);
-	if (ret) {
-		printf("fsck saw unrecoverable errors in the super block and "
-		       "will not continue.\n");
-		fsck_mask |= FSCK_ERROR;
+	if (fs_is_clean(ost, filename)) {
+		fsck_mask = FSCK_OK;
 		goto out;
 	}
 
-	exit_if_skipping(ost);
-
 #if 0
 	o2fsck_mark_block_used(ost, 0);
 	o2fsck_mark_block_used(ost, 1);
@@ -379,60 +563,61 @@
 
 	/* XXX we don't use the bad blocks inode, do we? */
 
-	printf("Checking OCFS2 filesystem in %s:\n", filename);
-	printf("  label:              ");
-	print_label(ost);
-	printf("  uuid:               ");
-	print_uuid(ost);
-	printf("  number of blocks:   %"PRIu64"\n", ost->ost_fs->fs_blocks);
-	printf("  bytes per block:    %u\n", ost->ost_fs->fs_blocksize);
-	printf("  number of clusters: %"PRIu32"\n", ost->ost_fs->fs_clusters);
-	printf("  bytes per cluster:  %u\n", ost->ost_fs->fs_clustersize);
-	printf("  max nodes:          %u\n", 
-	       OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes);
 
-	ret = o2fsck_replay_journals(ost);
+	/* XXX for now it is assumed that errors returned from a pass
+	 * are fatal.  these can be fixed over time. */
+	ret = o2fsck_pass0(ost);
 	if (ret) {
-		printf("fsck encountered unrecoverable errors while replaying "
-		       "the journals and will not continue\n");
-		fsck_mask |= FSCK_ERROR;
-		goto out;
+		com_err(argv[0], ret, "while performing pass 0");
+		goto done;
 	}
 
-	/* XXX think harder about these error cases. */
-	ret = o2fsck_pass0(ost);
+	ret = o2fsck_pass1(ost);
 	if (ret) {
-		printf("fsck encountered unrecoverable errors in pass 0 and "
-		       "will not continue\n");
-		fsck_mask |= FSCK_ERROR;
-		goto out;
+		com_err(argv[0], ret, "while performing pass 1");
+		goto done;
 	}
 
-	ret = o2fsck_pass1(ost);
-	if (ret)
-		com_err(argv[0], ret, "pass1 failed");
-
 	ret = o2fsck_pass2(ost);
-	if (ret)
-		com_err(argv[0], ret, "pass2 failed");
+	if (ret) {
+		com_err(argv[0], ret, "while performing pass 2");
+		goto done;
+	}
 
 	ret = o2fsck_pass3(ost);
-	if (ret)
-		com_err(argv[0], ret, "pass3 failed");
+	if (ret) {
+		com_err(argv[0], ret, "while performing pass 3");
+		goto done;
+	}
 
 	ret = o2fsck_pass4(ost);
+	if (ret) {
+		com_err(argv[0], ret, "while performing pass 4");
+		goto done;
+	}
+
+done:
 	if (ret)
-		com_err(argv[0], ret, "pass4 failed");
+		fsck_mask |= FSCK_ERROR;
+	else {
+		ost->ost_saw_error = 0;
+		printf("All passes succeeded.\n");
+	}
 
+	if (ost->ost_fs->fs_flags & OCFS2_FLAG_RW) {
+		ret = write_out_superblock(ost);
+		if (ret)
+			com_err(argv[0], ret, "while writing back the "
+				"superblock");
+	}
+
 	ret = ocfs2_close(ost->ost_fs);
 	if (ret) {
-		com_err(argv[0], ret,
-			"while closing file \"%s\"", filename);
-	}
+		com_err(argv[0], ret, "while closing file \"%s\"", filename);
+		/* XXX I wonder about this error.. */
+		fsck_mask |= FSCK_ERROR;
+	} 
 
-	/* XXX check if the fs is modified and yell something. */
-	printf("fsck completed successfully.\n");
-
 out:
 	return fsck_mask;
 }

Modified: trunk/fsck.ocfs2/fsck.ocfs2.8.in
===================================================================
--- trunk/fsck.ocfs2/fsck.ocfs2.8.in	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/fsck.ocfs2.8.in	2004-12-04 20:38:44 UTC (rev 462)
@@ -2,7 +2,7 @@
 .SH "NAME"
 fsck.ocfs2 \- Check an OCFS2 file system.
 .SH "SYNOPSIS"
-\fBfsck.ocfs2\fR [ \fB\-Gnpuvy\fR ] [ \fB\-b\fR \fIsuperblock block\fR ] [ \fB\-B\fR \fIblock size\fR ] \fIdevice\fR
+\fBfsck.ocfs2\fR [ \fB\-fGnpuvy\fR ] [ \fB\-b\fR \fIsuperblock block\fR ] [ \fB\-B\fR \fIblock size\fR ] \fIdevice\fR
 .SH "DESCRIPTION"
 .PP 
 \fBfsck.ocfs2\fR is used to check an OCFS2 file system.

Modified: trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
===================================================================
--- trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2004-12-04 20:38:44 UTC (rev 462)
@@ -16,14 +16,6 @@
 
 .SH "CHECKS"
 
-.SS "1"
-Each node's journals are usually empty after an orderly shut down of the
-file system.  A node was marked as not having shut down properly so its
-journal should be replayed.
-
-Answering yes replays the given node's journal.  Each block in the journal will be written
-to its intended location in the file system. 
-
 .SS "2"
 The group descriptors that make up the global bitmap chain allocator are
 static and should be found in specific chains in the allocator.  A given

Modified: trunk/fsck.ocfs2/include/fsck.h
===================================================================
--- trunk/fsck.ocfs2/include/fsck.h	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/include/fsck.h	2004-12-04 20:38:44 UTC (rev 462)
@@ -26,6 +26,7 @@
 
 #include "icount.h"
 #include "dirblocks.h"
+#include "ocfs2_disk_dlm.h"
 
 typedef struct _o2fsck_state {
 	ocfs2_filesys 	*ost_fs;
@@ -49,6 +50,8 @@
 	 * entries, including '.' and '..'. */
 	o2fsck_icount	*ost_icount_refs;
 
+	ocfs_publish	*ost_publish;
+
 	o2fsck_dirblocks	ost_dirblocks;
 
 	uint32_t	ost_fs_generation;
@@ -63,7 +66,12 @@
 			ost_write_inode_alloc:1,
 			ost_write_error:1,
 			ost_write_cluster_alloc_asked:1,
-			ost_write_cluster_alloc:1,
+ 			ost_write_cluster_alloc:1,
+ 			ost_saw_error:1, /* if we think there are still errors
+ 					  * on disk we'll mark the sb as having
+ 					  * errors as we exit */
+ 			ost_stale_mounts:1, /* set when reading publish blocks
+ 					     * that still indicated mounted */
 			ost_fix_fs_gen:1;
 } o2fsck_state;
 

Modified: trunk/fsck.ocfs2/include/journal.h
===================================================================
--- trunk/fsck.ocfs2/include/journal.h	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/include/journal.h	2004-12-04 20:38:44 UTC (rev 462)
@@ -26,7 +26,8 @@
 
 #include "fsck.h"
 
-errcode_t o2fsck_replay_journals(o2fsck_state *ost);
+errcode_t o2fsck_replay_journals(ocfs2_filesys *fs, ocfs_publish *pub,
+				 int *replayed);
 
 #endif /* __O2FSCK_JOURNAL_H__ */
 

Modified: trunk/fsck.ocfs2/include/problem.h
===================================================================
--- trunk/fsck.ocfs2/include/problem.h	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/include/problem.h	2004-12-04 20:38:44 UTC (rev 462)
@@ -33,7 +33,7 @@
 /* returns non-zero for yes and zero for no.  The caller is expected to
  * provide a thorough description of the state and the action that will
  * be taken depending on the answer.  Without \n termination. */
-int prompt(o2fsck_state *ost, unsigned flags, unsigned code, 
+int prompt(o2fsck_state *ost, unsigned flags, uint16_t code, 
 	   const char *fmt, ...)
 	 __attribute__ ((format (printf, 4, 5)));
 

Modified: trunk/fsck.ocfs2/include/util.h
===================================================================
--- trunk/fsck.ocfs2/include/util.h	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/include/util.h	2004-12-04 20:38:44 UTC (rev 462)
@@ -49,5 +49,6 @@
 				    uint32_t num);
 errcode_t o2fsck_type_from_dinode(o2fsck_state *ost, uint64_t ino,
 				  uint8_t *type);
+errcode_t o2fsck_read_publish(o2fsck_state *ost);
 
 #endif /* __O2FSCK_UTIL_H__ */

Modified: trunk/fsck.ocfs2/journal.c
===================================================================
--- trunk/fsck.ocfs2/journal.c	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/journal.c	2004-12-04 20:38:44 UTC (rev 462)
@@ -23,8 +23,7 @@
  * replayed.  IO errors during replay will just result in partial journal
  * replay, just like jbd does in the kernel.  Journals that don't pass
  * consistency checks, like having overlapping blocks or strange fields, are
- * ignored and left for later passes to clean up.  Overlap testing is done
- * using o2fsck_state's used block bitmap.  
+ * ignored and left for later passes to clean up.  
 
  * XXX
  * 	future passes need to guarantee journals exist and are the same size 
@@ -44,7 +43,6 @@
 #undef be32_to_cpu
 #include "jfs_user.h"
 #include "ocfs2.h"
-#include "ocfs2_disk_dlm.h"
 #include "pass1.h"
 #include "problem.h"
 #include "util.h"
@@ -201,7 +199,7 @@
 	return block;
 }
 
-static errcode_t count_tags(o2fsck_state *ost, char *buf, size_t size,
+static errcode_t count_tags(ocfs2_filesys *fs, char *buf, size_t size,
 			    uint64_t *nr_ret)
 {
 	journal_block_tag_t *tag, *last;
@@ -215,7 +213,7 @@
 
 	for(; tag <= last; tag++) {
 		nr++;
-		if (ocfs2_block_out_of_range(ost->ost_fs, 
+		if (ocfs2_block_out_of_range(fs, 
 					     be32_to_cpu(tag->t_blocknr)))
 			return OCFS2_ET_BAD_JOURNAL_TAG;
 
@@ -230,7 +228,7 @@
 	return 0;
 }
 
-static errcode_t lookup_journal_block(o2fsck_state *ost, 
+static errcode_t lookup_journal_block(ocfs2_filesys *fs, 
 				      struct journal_info *ji, 
 				      uint64_t blkoff,
 				      uint64_t *blkno,
@@ -263,7 +261,7 @@
 	return ret;
 }
 
-static errcode_t read_journal_block(o2fsck_state *ost, 
+static errcode_t read_journal_block(ocfs2_filesys *fs, 
 				    struct journal_info *ji, 
 				    uint64_t blkoff, 
 				    char *buf,
@@ -272,11 +270,11 @@
 	errcode_t err;
 	uint64_t	blkno;
 
-	err = lookup_journal_block(ost, ji, blkoff, &blkno, check_dup);
+	err = lookup_journal_block(fs, ji, blkoff, &blkno, check_dup);
 	if (err)
 		return err;
 
-	err = io_read_block(ost->ost_fs->fs_io, blkno, 1, buf);
+	err = io_read_block(fs->fs_io, blkno, 1, buf);
 	if (err)
 		com_err(whoami, err, "while reading block %"PRIu64" of node "
 			"%d's journal", blkno, ji->ji_node);
@@ -284,7 +282,7 @@
 	return err;
 }
 
-static errcode_t replay_blocks(o2fsck_state *ost, struct journal_info *ji,
+static errcode_t replay_blocks(ocfs2_filesys *fs, struct journal_info *ji,
 			       char *buf, uint64_t seq, uint64_t *next_block)
 {
 	journal_block_tag_t tag, *tagp;
@@ -296,7 +294,7 @@
 	num = (ji->ji_jsb->s_blocksize - sizeof(journal_header_t)) / 
 	      sizeof(tag);
 
-	ret = ocfs2_malloc_blocks(ost->ost_fs->fs_io, 1, &io_buf);
+	ret = ocfs2_malloc_blocks(fs->fs_io, 1, &io_buf);
 	if (ret) {
 		com_err(whoami, ret, "while allocating a block buffer");
 		goto out;
@@ -315,7 +313,7 @@
 		if (revoke_this_block(&ji->ji_revoke, tag.t_blocknr, seq))
 			goto skip_io;
 
-		err = read_journal_block(ost, ji, *next_block, io_buf, 1);
+		err = read_journal_block(fs, ji, *next_block, io_buf, 1);
 		if (err) {
 			ret = err;
 			goto skip_io;
@@ -326,7 +324,7 @@
 			memcpy(io_buf, &magic, sizeof(magic));
 		}
 
-		err = io_write_block(ost->ost_fs->fs_io, tag.t_blocknr, 1, 
+		err = io_write_block(fs->fs_io, tag.t_blocknr, 1, 
 				     io_buf);
 		if (err)
 			ret = err;
@@ -345,7 +343,7 @@
 	return ret;
 }
 
-static errcode_t walk_journal(o2fsck_state *ost, int node, 
+static errcode_t walk_journal(ocfs2_filesys *fs, int node, 
 			      struct journal_info *ji, char *buf, int recover)
 {
 	errcode_t err, ret = 0;
@@ -373,7 +371,7 @@
 			break;
 
 		/* only mark the blocks used on the first pass */
-		err = read_journal_block(ost, ji, next_block, buf, !recover);
+		err = read_journal_block(fs, ji, next_block, buf, !recover);
 		if (err) {
 			ret = err;
 			break;
@@ -402,7 +400,7 @@
 			verbosef("found a desc type %x\n", jh.h_blocktype);
 			/* replay the blocks described in the desc block */
 			if (recover) {
-				err = replay_blocks(ost, ji, buf, next_seq, 
+				err = replay_blocks(fs, ji, buf, next_seq, 
 						    &next_block);
 				if (err)
 					ret = err;
@@ -410,7 +408,7 @@
 			}
 
 			/* just record the blocks as used and carry on */ 
-			err = count_tags(ost, buf, jsb->s_blocksize, &nr);
+			err = count_tags(fs, buf, jsb->s_blocksize, &nr);
 			if (err)
 				ret = err;
 			else
@@ -451,7 +449,7 @@
 	return ret;
 }
 
-static errcode_t prep_journal_info(o2fsck_state *ost, int node,
+static errcode_t prep_journal_info(ocfs2_filesys *fs, int node,
 			           struct journal_info *ji)
 {
 	errcode_t err;
@@ -459,12 +457,12 @@
 	ji->ji_revoke = RB_ROOT;
 	ji->ji_node = node;
 
-	err = ocfs2_malloc_blocks(ost->ost_fs->fs_io, 1, &ji->ji_jsb);
+	err = ocfs2_malloc_blocks(fs->fs_io, 1, &ji->ji_jsb);
 	if (err)
 		com_err(whoami, err, "while allocating space for node %d's "
 			    "journal superblock", node);
 
-	err = ocfs2_lookup_system_inode(ost->ost_fs, JOURNAL_SYSTEM_INODE,
+	err = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE,
 					node, &ji->ji_ino);
 	if (err) {
 		com_err(whoami, err, "while looking up the journal inode for "
@@ -472,26 +470,26 @@
 		goto out;
 	}
 
-	err = ocfs2_read_cached_inode(ost->ost_fs, ji->ji_ino, &ji->ji_cinode);
+	err = ocfs2_read_cached_inode(fs, ji->ji_ino, &ji->ji_cinode);
 	if (err) {
 		com_err(whoami, err, "while reading cached inode %"PRIu64" "
 			"for node %d's journal", ji->ji_ino, node);
 		goto out;
 	}
 
-	err = ocfs2_extent_map_init(ost->ost_fs, ji->ji_cinode);
+	err = ocfs2_extent_map_init(fs, ji->ji_cinode);
 	if (err) {
 		com_err(whoami, err, "while initializing extent map");
 		goto out;
 	}
 
-	err = lookup_journal_block(ost, ji, 0, &ji->ji_jsb_block, 1);
+	err = lookup_journal_block(fs, ji, 0, &ji->ji_jsb_block, 1);
 	if (err)
 		goto out;
 
 	/* XXX be smarter about reading in the whole super block if it
 	 * spans multiple blocks */
-	err = ocfs2_read_journal_superblock(ost->ost_fs, ji->ji_jsb_block, 
+	err = ocfs2_read_journal_superblock(fs, ji->ji_jsb_block, 
 					    (char *)ji->ji_jsb);
 	if (err) {
 		com_err(whoami, err, "while reading block %"PRIu64" as node "
@@ -500,46 +498,35 @@
 		goto out;
 	}
 
-	verbosef("jsb start %u maxlen %u\n", ji->ji_jsb->s_start,
-		 ji->ji_jsb->s_maxlen);
+	verbosef("node: %d jsb start %u maxlen %u\n", node,
+		 ji->ji_jsb->s_start, ji->ji_jsb->s_maxlen);
 out:
 	return err;
 }
 
-static int publish_mounted_set(ocfs2_filesys *fs, char *buf, int node, 
-			       int max_nodes)
-{
-	int b_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
-	/* magic! */
-	ocfs_publish *pub = (ocfs_publish *)(buf + ((2 + 4 + max_nodes + node)
-			       			<< b_bits));
-	verbosef("node %d's publish: %u\n", node, le32_to_cpu(pub->mounted));
-
-	return pub->mounted;
-}
-
 /* Try and replay the nodes journals if they're dirty.  This only returns
  * a non-zero error if the caller should not continue. */
-errcode_t o2fsck_replay_journals(o2fsck_state *ost)
+errcode_t o2fsck_replay_journals(ocfs2_filesys *fs, ocfs_publish *pub,
+				 int *replayed)
 {
 	errcode_t err = 0, ret = 0;
 	struct journal_info *jis, *ji;
 	journal_superblock_t *jsb;
-	char *buf = NULL, *dlm_buf = NULL;
-	int i, max_nodes, buflen, journal_trouble = 0;
-	uint64_t dlm_ino;
+	char *buf = NULL;
+	int journal_trouble = 0;
+	uint16_t i, max_nodes;
 	ocfs2_bitmap *used_blocks = NULL;
 
-	max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
+	max_nodes = OCFS2_RAW_SB(fs->fs_super)->s_max_nodes;
 
-	ret = ocfs2_block_bitmap_new(ost->ost_fs, "journal blocks",
+	ret = ocfs2_block_bitmap_new(fs, "journal blocks",
 				     &used_blocks);
 	if (ret) {
 		com_err(whoami, ret, "while allocating journal block bitmap"); 
 		goto out;
 	}
 
-	ret = ocfs2_malloc_blocks(ost->ost_fs->fs_io, 1, &buf);
+	ret = ocfs2_malloc_blocks(fs->fs_io, 1, &buf);
 	if (ret) {
 		com_err(whoami, ret, "while allocating room to read journal "
 			    "blocks");
@@ -553,32 +540,19 @@
 		goto out;
 	}
 
-	ret = ocfs2_lookup_system_inode(ost->ost_fs, DLM_SYSTEM_INODE,
-					0, &dlm_ino);
-	if (ret) {
-		com_err(whoami, ret, "while looking up the dlm system inode");
-		goto out;
-	}
-
-	ret = ocfs2_read_whole_file(ost->ost_fs, dlm_ino, &dlm_buf, &buflen);
-	if (ret) {
-		com_err(whoami, ret, "while reading dlm file");
-		goto out;
-	}
-
 	printf("Checking each node's journal.\n");
 
 	for (i = 0, ji = jis; i < max_nodes; i++, ji++) {
-		if (!publish_mounted_set(ost->ost_fs, dlm_buf, i, max_nodes)) {
+		if (!pub[i].mounted) {
 			verbosef("node %d is clean\n", i);
 			continue;
 		}
 		ji->ji_replay = 1;
 		ji->ji_used_blocks = used_blocks;
 
-		err = prep_journal_info(ost, i, ji);
+		err = prep_journal_info(fs, i, ji);
 		if (err == 0)
-			err = walk_journal(ost, i, ji, buf, 0);
+			err = walk_journal(fs, i, ji, buf, 0);
 
 		if (err) {
 			ji->ji_replay = 0;
@@ -591,13 +565,9 @@
 		if (!ji->ji_replay)
 			continue;
 
-		if (!prompt(ost, PY, 1, "Node %d's journal needs to be "
-			    "replayed. Do so?", i)) {
-			journal_trouble = 1;
-			continue;
-		}
+		printf("Replaying node %d's journal.\n", i);
 
-		err = walk_journal(ost, i, ji, buf, 1);
+		err = walk_journal(fs, i, ji, buf, 1);
 		if (err) {
 			journal_trouble = 1;
 			continue;
@@ -614,16 +584,18 @@
 		 * have to also include having recovered the orphan dir.  we
 		 * updated s_start, though, so we won't replay the journal
 		 * again. */
-		err = ocfs2_write_journal_superblock(ost->ost_fs,
+		err = ocfs2_write_journal_superblock(fs,
 						     ji->ji_jsb_block,
 						     (char *)ji->ji_jsb);
 		if (err) {
 			com_err(whoami, err, "while writing node %d's journal "
 				"super block", i);
 			journal_trouble = 1;
+		} else {
+			printf("Node %d's journal replayed successfully.\n",
+			       i);
+			*replayed = 1;
 		}
-
-		printf("Node %d's journal replayed successfully.\n", i);
 	}
 
 	/* this is awkward, but we want fsck -n to tell us as much as it
@@ -641,7 +613,7 @@
 			if (ji->ji_jsb)
 				ocfs2_free(&ji->ji_jsb);
 			if (ji->ji_cinode)
-				ocfs2_free_cached_inode(ost->ost_fs, 
+				ocfs2_free_cached_inode(fs, 
 							ji->ji_cinode);
 			revoke_free_all(&ji->ji_revoke);
 		}
@@ -650,8 +622,6 @@
 
 	if (buf)
 		ocfs2_free(&buf);
-	if (dlm_buf)
-		ocfs2_free(&dlm_buf);
 	if (used_blocks)
 		ocfs2_bitmap_free(used_blocks);
 

Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/pass0.c	2004-12-04 20:38:44 UTC (rev 462)
@@ -165,7 +165,7 @@
 				"descriptor to block %"PRIu64" somewhere in "
 				"chain %d in group allocator inode %"PRIu64, 
 				bg->bg_blkno, cs->cs_chain_no, di->i_blkno);
-			ost->ost_write_error = 1;
+			ost->ost_saw_error = 1;
 		}
 	}
 
@@ -354,7 +354,7 @@
 				"descriptor to block %"PRIu64" somewhere in "
 				"chain %d in group allocator inode %"PRIu64, 
 				bg1->bg_blkno, cs->cs_chain_no, di->i_blkno);
-			ost->ost_write_error = 1;
+			ost->ost_saw_error = 1;
 		}
 	}
 
@@ -513,7 +513,7 @@
 		if (ret) {
 			com_err(whoami, ret, "while writing inode alloc inode "
 				    "%"PRIu64, di->i_blkno);
-			ost->ost_write_error = 1;
+			ost->ost_saw_error = 1;
 			ret = 0;
 		}
 	}
@@ -600,7 +600,7 @@
 				com_err(whoami, ret, "while writing a cluster "
 					"group descriptor at block %"PRIu64,
 					blkno);
-				ost->ost_write_error = 1;
+				ost->ost_saw_error = 1;
 				continue;
 			}
 		}
@@ -646,7 +646,7 @@
 		if (ret) {
 			com_err(whoami, ret, "while writing a cluster group "
 				"descriptor at block %"PRIu64, blkno);
-			ost->ost_write_error = 1;
+			ost->ost_saw_error = 1;
 			continue;
 		}
 
@@ -669,7 +669,7 @@
 		if (ret) {
 			com_err(whoami, ret, "while writing inode alloc inode "
 				    "%"PRIu64, di->i_blkno);
-			ost->ost_write_error = 1;
+			ost->ost_saw_error = 1;
 			ret = 0;
 		}
 	}

Modified: trunk/fsck.ocfs2/pass1.c
===================================================================
--- trunk/fsck.ocfs2/pass1.c	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/pass1.c	2004-12-04 20:38:44 UTC (rev 462)
@@ -821,6 +821,35 @@
 	return;
 }
 
+/* XXX we really need to get the latch stuff straight */
+static errcode_t force_cluster_bit(o2fsck_state *ost, 
+				   ocfs2_cached_inode *ci,
+				   uint64_t bit,
+				   int val)
+{
+	errcode_t ret;
+	char *reason;
+
+	if (!val) {
+		reason = "Cluster %u is marked in the global cluster "
+			 "bitmap but it isn't in use.  Clear its bit "
+			 "in the bitmap?";
+	} else {
+		reason = "Cluster %u is in use but isn't set in the "
+			 "global cluster bitmap.  Set its bit in the "
+			 "bitmap?";
+	}
+
+	if (!prompt(ost, PY, 0, reason, bit))
+		return 0;
+
+	ret = ocfs2_chain_force_val(ost->ost_fs, ci, bit, !!val, NULL);
+	if (ret)
+		com_err(whoami, ret, "while trying to %s bit %"PRIu64" in the "
+			"cluster bitmap", val ? "set" : "clear", bit);
+	return ret;
+}
+
 /* once we've iterated all the inodes we should have the current working
  * set of which blocks we think are in use.  we use this to derive the set
  * of clusters that should be allocated in the cluster chain allocators.  we
@@ -893,27 +922,9 @@
 		if (cbit_found == cbit)
 			continue;
 
-		if (!ost->ost_write_cluster_alloc_asked) {
-			int yn;
-			yn = prompt(ost, PY, 0, "The cluster bitmap doesn't "
-				    "match what fsck thinks should be in use "
-				    "and freed.  Update the bitmap on disk?");
-			ost->ost_write_cluster_alloc_asked = 1;
-			ost->ost_write_cluster_alloc = !!yn;
-			if (!ost->ost_write_cluster_alloc)
-				goto out;
-		}
-
 		/* clear set bits that should have been clear up to cbit */
 		while (cbit_found < cbit) {
-			ret = ocfs2_chain_force_val(ost->ost_fs, ci,
-						    cbit_found, 0, NULL);
-			if (ret) {
-				com_err(whoami, ret, "while trying to clear "
-					"bit %"PRIu64" in the cluster bitmap.",
-					cbit_found);
-				goto out;
-			}
+			force_cluster_bit(ost, ci, cbit_found, 0);
 			cbit_found++;
 			ret = ocfs2_bitmap_find_next_set(ci->ci_chains, cbit, 
 							 &cbit_found);
@@ -922,16 +933,8 @@
 		}
 
 		/* make sure cbit is set before moving on */
-		if (cbit_found != cbit && cbit != ost->ost_fs->fs_clusters) {
-			ret = ocfs2_chain_force_val(ost->ost_fs, ci, cbit, 1,
-						    NULL);
-			if (ret) {
-				com_err(whoami, ret, "while trying to set bit "
-					"%"PRIu64" in the cluster bitmap.",
-					cbit);
-				goto out;
-			}
-		}
+		if (cbit_found != cbit && cbit != ost->ost_fs->fs_clusters)
+			force_cluster_bit(ost, ci, cbit, 1);
 	}
 
 	ret = ocfs2_write_chain_allocator(ost->ost_fs, ci);

Modified: trunk/fsck.ocfs2/problem.c
===================================================================
--- trunk/fsck.ocfs2/problem.c	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/problem.c	2004-12-04 20:38:44 UTC (rev 462)
@@ -146,7 +146,7 @@
  * and have a notion of grouping, as well.  The caller is expected to provide
  * a fully formed question that isn't terminated with a newline.
  */
-int prompt(o2fsck_state *ost, unsigned flags, unsigned code, const char *fmt,
+int prompt(o2fsck_state *ost, unsigned flags, uint16_t code, const char *fmt,
 	   ...)
 {
 	va_list ap;

Modified: trunk/fsck.ocfs2/util.c
===================================================================
--- trunk/fsck.ocfs2/util.c	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/fsck.ocfs2/util.c	2004-12-04 20:38:44 UTC (rev 462)
@@ -26,6 +26,7 @@
  *
  */
 #include <inttypes.h>
+#include <string.h>
 #include "ocfs2.h"
 
 #include "util.h"
@@ -39,7 +40,6 @@
 		com_err(whoami, OCFS2_ET_INTERNAL_FAILURE, "when asked to "
 			"write an inode with an i_blkno of %"PRIu64" to block "
 			"%"PRIu64, di->i_blkno, blkno);
-		ost->ost_write_error = 1;
 		return;
 	}
 
@@ -47,7 +47,7 @@
 	if (ret) {
 		com_err(whoami, ret, "while writing inode %"PRIu64, 
 		        di->i_blkno);
-		ost->ost_write_error = 1;
+		ost->ost_saw_error = 1;
 	}
 }
 
@@ -101,3 +101,56 @@
 		ocfs2_free(&buf);
 	return ret;
 }
+
+errcode_t o2fsck_read_publish(o2fsck_state *ost)
+{
+	uint16_t i, max_nodes;
+	char *dlm_buf = NULL;
+	uint64_t dlm_ino;
+	errcode_t ret;
+	int buflen;
+	char *whoami = "read_publish";
+
+	if (ost->ost_publish)
+		ocfs2_free(&ost->ost_publish);
+
+	max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
+
+	ret = ocfs2_malloc0(sizeof(ocfs_publish) * max_nodes,
+			    &ost->ost_publish);
+	if (ret) {
+		com_err(whoami, ret, "while allocating an array to store each "
+			"node's publish block");
+		goto out;
+	}
+
+	ret = ocfs2_lookup_system_inode(ost->ost_fs, DLM_SYSTEM_INODE,
+					0, &dlm_ino);
+	if (ret) {
+		com_err(whoami, ret, "while looking up the dlm system inode");
+		goto out;
+	}
+
+	ret = ocfs2_read_whole_file(ost->ost_fs, dlm_ino, &dlm_buf, &buflen);
+	if (ret) {
+		com_err(whoami, ret, "while reading dlm file");
+		goto out;
+	}
+
+	/* I have no idea what that magic math is really doing. */
+	for (i = 0; i < max_nodes; i++) {
+		int b_bits = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_blocksize_bits;
+
+		memcpy(&ost->ost_publish[i],
+		       dlm_buf + ((2 + 4 + max_nodes + i) << b_bits),
+		       sizeof(ocfs_publish));
+		if (ost->ost_publish[i].mounted)
+			ost->ost_stale_mounts = 1;
+	}
+out:
+	if (ret && ost->ost_publish)
+		ocfs2_free(&ost->ost_publish);
+	if (dlm_buf)
+		ocfs2_free(&dlm_buf);
+	return ret;
+}

Modified: trunk/libocfs2/include/ocfs2_fs.h
===================================================================
--- trunk/libocfs2/include/ocfs2_fs.h	2004-12-04 03:00:43 UTC (rev 461)
+++ trunk/libocfs2/include/ocfs2_fs.h	2004-12-04 20:38:44 UTC (rev 462)
@@ -96,6 +96,11 @@
 #define OCFS2_DLM_FL		(0x00000200)	/* DLM area */
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
 
+/*
+ * superblock s_state flags
+ */
+#define OCFS2_ERROR_FS		(0x00000001)	/* FS saw errors */
+
 /* Limit of space in ocfs2_dir_entry */
 #define OCFS2_MAX_FILENAME_LENGTH       255