[Ocfs2-commits] mfasheh commits r2564 - branches/readonly-operation/fs/ocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Tue Sep 13 20:48:00 CDT 2005


Author: mfasheh
Date: 2005-09-13 20:47:53 -0500 (Tue, 13 Sep 2005)
New Revision: 2564

Modified:
   branches/readonly-operation/fs/ocfs2/alloc.c
   branches/readonly-operation/fs/ocfs2/dlmglue.c
   branches/readonly-operation/fs/ocfs2/extent_map.c
   branches/readonly-operation/fs/ocfs2/file.c
   branches/readonly-operation/fs/ocfs2/journal.c
   branches/readonly-operation/fs/ocfs2/ocfs2.h
   branches/readonly-operation/fs/ocfs2/slot_map.c
   branches/readonly-operation/fs/ocfs2/suballoc.c
   branches/readonly-operation/fs/ocfs2/super.c
   branches/readonly-operation/fs/ocfs2/super.h
Log:
* Initial support for going readonly on disk errors:
        -Add ocfs2_error() and ocfs2_abort() to take the file system
         read-only (or panic if errors=panic is specified)        
        -Flag the OSB as having errors and refuse an RDWR remount if it's
         set.
  
* Go through and change all usage of OCFS2_BUG_ON_INVALID_DINODE() to set   
  things read-only instead.

* Fix a few OCFS2_BUG_ON_RO() usage to call ocfs2_error()
  instead. There's plenty more of those to do still.



Modified: branches/readonly-operation/fs/ocfs2/alloc.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/alloc.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/alloc.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -115,7 +115,11 @@
 
 	mlog_entry_void();
 
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+		retval = -EIO;
+		goto bail;
+	}
 
 	if (fe->i_last_eb_blk) {
 		retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
@@ -913,7 +917,12 @@
 
 	di = (ocfs2_dinode *) tl_bh->b_data;
 	tl = &di->id2.i_dealloc;
-	OCFS2_BUG_ON_INVALID_DINODE(di);
+	if (!OCFS2_IS_VALID_DINODE(di)) {
+		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
+		status = -EIO;
+		goto bail;
+	}
+
 	tl_count = le16_to_cpu(tl->tl_count);
 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
 			tl_count == 0,
@@ -1063,7 +1072,11 @@
 
 	di = (ocfs2_dinode *) tl_bh->b_data;
 	tl = &di->id2.i_dealloc;
-	OCFS2_BUG_ON_INVALID_DINODE(di);
+	if (!OCFS2_IS_VALID_DINODE(di)) {
+		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
+		status = -EIO;
+		goto bail;
+	}
 
 	num_to_flush = le16_to_cpu(tl->tl_used);
 	mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
@@ -1224,7 +1237,11 @@
 
 	di = (ocfs2_dinode *) tl_bh->b_data;
 	tl = &di->id2.i_dealloc;
-	OCFS2_BUG_ON_INVALID_DINODE(di);
+	if (!OCFS2_IS_VALID_DINODE(di)) {
+		OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
+		status = -EIO;
+		goto bail;
+	}
 
 	if (le16_to_cpu(tl->tl_used)) {
 		mlog(0, "We'll have %u logs to recover\n",
@@ -1393,10 +1410,14 @@
 	ocfs2_extent_list *el;
 	struct buffer_head *bh = NULL;
 
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
-
 	*new_last_eb = NULL;
 
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+		status = -EIO;
+		goto bail;
+	}
+
 	/* we have no tree, so of course, no last_eb. */
 	if (!fe->id2.i_list.l_tree_depth)
 		goto bail;

Modified: branches/readonly-operation/fs/ocfs2/dlmglue.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/dlmglue.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/dlmglue.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -49,6 +49,7 @@
 #include "inode.h"
 #include "journal.h"
 #include "slot_map.h"
+#include "super.h"
 #include "uptodate.h"
 #include "vote.h"
 
@@ -1520,7 +1521,11 @@
 		 * node provides a guarantee that we receive that
 		 * message and can mark the inode before dropping any
 		 * locks associated with it. */
-		OCFS2_BUG_ON_INVALID_DINODE(fe);
+		if (!OCFS2_IS_VALID_DINODE(fe)) {
+			OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+			status = -EIO;
+			goto bail_refresh;
+		}
 		mlog_bug_on_msg(inode->i_generation !=
 				le32_to_cpu(fe->i_generation),
 				"Invalid dinode %"MLFu64" disk generation: %u "

Modified: branches/readonly-operation/fs/ocfs2/extent_map.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/extent_map.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/extent_map.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -36,6 +36,7 @@
 
 #include "extent_map.h"
 #include "inode.h"
+#include "super.h"
 
 #include "buffer_head_io.h"
 
@@ -312,7 +313,11 @@
 			return ret;
 		}
 		di = (ocfs2_dinode *)bh->b_data;
-		OCFS2_BUG_ON_INVALID_DINODE(di);
+		if (!OCFS2_IS_VALID_DINODE(di)) {
+			brelse(bh);
+			OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di);
+			return -EIO;
+		}
 		el = &di->id2.i_list;
 	}
 

Modified: branches/readonly-operation/fs/ocfs2/file.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/file.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/file.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -46,6 +46,7 @@
 #include "journal.h"
 #include "mmap.h"
 #include "suballoc.h"
+#include "super.h"
 
 #include "buffer_head_io.h"
 
@@ -484,7 +485,11 @@
 	}
 
 	fe = (ocfs2_dinode *) fe_bh->b_data;
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+		status = -EIO;
+		goto bail;
+	}
 	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
 			"Inode %"MLFu64", inode i_size = %lld != di "
 			"i_size = %"MLFu64", i_flags = 0x%x\n",
@@ -771,7 +776,11 @@
 	}
 
 	fe = (ocfs2_dinode *) bh->b_data;
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+		status = -EIO;
+		goto leave;
+	}
 	BUG_ON(i_size_read(inode) !=
 	       (le64_to_cpu(fe->i_size) - *bytes_extended));
 	BUG_ON(new_i_size < i_size_read(inode));

Modified: branches/readonly-operation/fs/ocfs2/journal.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/journal.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/journal.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -610,7 +610,15 @@
 	mlog_entry_void();
 
 	fe = (ocfs2_dinode *)bh->b_data;
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		/* This is called from startup/shutdown which will
+		 * handle the errors in a specific manner, so no need
+		 * to call ocfs2_error() here. */
+		mlog(ML_ERROR, "Journal dinode %"MLFu64"  has invalid "
+		     "signature: %.*s\n", fe->i_blkno, 7, fe->i_signature);
+		status = -EIO;
+		goto out;
+	}
 
 	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
 	if (dirty)
@@ -623,6 +631,7 @@
 	if (status < 0)
 		mlog_errno(status);
 
+out:
 	mlog_exit(status);
 	return status;
 }

Modified: branches/readonly-operation/fs/ocfs2/ocfs2.h
===================================================================
--- branches/readonly-operation/fs/ocfs2/ocfs2.h	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/ocfs2.h	2005-09-14 01:47:53 UTC (rev 2564)
@@ -303,6 +303,14 @@
  * in parallel so we want the transitions to be atomic. this also
  * means that any future flags osb_flags must be protected by spinlock
  * too! */
+static inline void ocfs2_set_osb_flag(ocfs2_super *osb,
+				      unsigned long flag)
+{
+	spin_lock(&osb->osb_lock);
+	osb->osb_flags |= flag;
+	spin_unlock(&osb->osb_lock);
+}
+
 static inline void ocfs2_set_ro_flag(ocfs2_super *osb,
 				     int hard)
 {
@@ -344,11 +352,12 @@
 #define OCFS2_IS_VALID_DINODE(ptr)					\
 	(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
 
-#define OCFS2_BUG_ON_INVALID_DINODE(__di)	do {			\
-	mlog_bug_on_msg(!OCFS2_IS_VALID_DINODE((__di)),			\
+#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di)	do {			\
+	typeof(__di) ____di = (__di);					\
+	ocfs2_error((__sb), 						\
 		"Dinode # %"MLFu64" has bad signature %.*s\n",		\
-		(__di)->i_blkno, 7,					\
-		(__di)->i_signature);					\
+		(____di)->i_blkno, 7,					\
+		(____di)->i_signature);					\
 } while (0);
 
 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr)				\

Modified: branches/readonly-operation/fs/ocfs2/slot_map.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/slot_map.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/slot_map.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -38,6 +38,7 @@
 #include "heartbeat.h"
 #include "inode.h"
 #include "slot_map.h"
+#include "super.h"
 #include "sysfile.h"
 
 #include "buffer_head_io.h"

Modified: branches/readonly-operation/fs/ocfs2/suballoc.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/suballoc.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/suballoc.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -40,6 +40,7 @@
 #include "journal.h"
 #include "localalloc.h"
 #include "suballoc.h"
+#include "super.h"
 #include "sysfile.h"
 #include "uptodate.h"
 
@@ -368,8 +369,17 @@
 	}
 
 	fe = (ocfs2_dinode *) bh->b_data;
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
-	OCFS2_BUG_ON_RO(!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL)));
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
+		status = -EIO;
+		goto bail;
+	}
+	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
+		ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
+			    "# %"MLFu64"\n", le64_to_cpu(fe->i_blkno));
+		status = -EIO;
+		goto bail;
+	}
 
 	free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
 		le32_to_cpu(fe->id1.bitmap1.i_used);
@@ -789,7 +799,11 @@
 	ocfs2_group_desc *bg = (ocfs2_group_desc *) bg_bh->b_data;
 	ocfs2_group_desc *prev_bg = (ocfs2_group_desc *) prev_bg_bh->b_data;
 
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
+		status = -EIO;
+		goto out;
+	}
 	OCFS2_BUG_ON_INVALID_GROUP_DESC(bg);
 	OCFS2_BUG_ON_INVALID_GROUP_DESC(prev_bg);
 
@@ -805,7 +819,7 @@
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_rollback;
 	}
 
 	prev_bg->bg_next_group = bg->bg_next_group;
@@ -813,14 +827,14 @@
 	status = ocfs2_journal_dirty(handle, prev_bg_bh);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_rollback;
 	}
 
 	status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_rollback;
 	}
 
 	bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
@@ -828,14 +842,14 @@
 	status = ocfs2_journal_dirty(handle, bg_bh);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_rollback;
 	}
 
 	status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_rollback;
 	}
 
 	fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
@@ -843,17 +857,17 @@
 	status = ocfs2_journal_dirty(handle, fe_bh);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_rollback;
 	}
 
 	status = 0;
-bail:
+out_rollback:
 	if (status < 0) {
 		fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
 		bg->bg_next_group = cpu_to_le64(bg_ptr);
 		prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
 	}
-
+out:
 	mlog_exit(status);
 	return status;
 }
@@ -1078,9 +1092,21 @@
 	BUG_ON(!ac->ac_bh);
 
 	fe = (ocfs2_dinode *) ac->ac_bh->b_data;
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
-	OCFS2_BUG_ON_RO(le32_to_cpu(fe->id1.bitmap1.i_used) >=
-			le32_to_cpu(fe->id1.bitmap1.i_total));
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
+		status = -EIO;
+		goto bail;
+	}
+	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
+	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
+		ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
+			    "used bits but only %u total.\n",
+			    le64_to_cpu(fe->i_blkno),
+			    le32_to_cpu(fe->id1.bitmap1.i_used),
+			    le32_to_cpu(fe->id1.bitmap1.i_total));
+		status = -EIO;
+		goto bail;
+	}
 
 	cl = (ocfs2_chain_list *) &fe->id2.i_chain;
 
@@ -1407,11 +1433,15 @@
 
 	mlog_entry_void();
 
-	OCFS2_BUG_ON_INVALID_DINODE(fe);
+	if (!OCFS2_IS_VALID_DINODE(fe)) {
+		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
+		status = -EIO;
+		goto bail;
+	}
 	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
 	mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
-	        ", starting at %u\n",
+	     ", starting at %u\n",
 	     OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
 	     start_bit);
 

Modified: branches/readonly-operation/fs/ocfs2/super.c
===================================================================
--- branches/readonly-operation/fs/ocfs2/super.c	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/super.c	2005-09-14 01:47:53 UTC (rev 2564)
@@ -389,9 +389,16 @@
 			osb->osb_flags |= OCFS2_OSB_SOFT_RO;
 		} else {
 			mlog(0, "Making ro filesystem writeable.\n");
+
+			if (osb->osb_flags & OCFS2_ERROR_FS) {
+				mlog(ML_ERROR, "Cannot remount RDWR "
+				     "filesystem due to previous errors.\n");
+				ret = -EROFS;
+				goto unlock_osb;
+			}
 			incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP);
 			if (incompat_features) {
-				mlog(ML_ERROR, "couldn't mount RDWR because "
+				mlog(ML_ERROR, "Cannot remount RDWR because "
 				     "of unsupported optional features "
 				     "(%x).\n", incompat_features);
 				ret = -EINVAL;
@@ -412,7 +419,6 @@
 		 * remount. */
 		osb->s_mount_opt = parsed_options;
 	}
-
 out:
 	return ret;
 }
@@ -1628,5 +1634,79 @@
 	mlog_exit_void();
 }
 
+/* Put OCFS2 into a readonly state, or (if the user specifies it),
+ * panic(). We do not support continue-on-error operation. */
+static void ocfs2_handle_error(struct super_block *sb)
+{
+	ocfs2_super *osb = OCFS2_SB(sb);
+
+	if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC)
+		panic("OCFS2: (device %s): panic forced after error\n",
+		      sb->s_id);
+
+	ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS);
+
+	if (sb->s_flags & MS_RDONLY &&
+	    (ocfs2_is_soft_readonly(osb) ||
+	     ocfs2_is_hard_readonly(osb)))
+		return;
+
+	printk(KERN_CRIT "Remounting filesystem read-only, "
+	       "please run fsck.ocfs2\n");
+	sb->s_flags |= MS_RDONLY;
+	ocfs2_set_ro_flag(osb, 0);
+}
+
+static char error_buf[1024];
+
+void __ocfs2_error(struct super_block *sb,
+		   const char *function,
+		   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsprintf(error_buf, fmt, args);
+	va_end(args);
+
+	/* Not using mlog here because we want to show the actual
+	 * function the error came from. */
+	printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n",
+	       sb->s_id, function, error_buf);
+
+	ocfs2_handle_error(sb);
+}
+
+/* Handle critical errors. This is intentionally more drastic than
+ * ocfs2_handle_error, so we only use for things like journal errors,
+ * etc. */
+void __ocfs2_abort(struct super_block* sb,
+		   const char *function,
+		   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsprintf(error_buf, fmt, args);
+	va_end(args);
+
+	printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
+	       sb->s_id, function, error_buf);
+
+	/* We don't have the cluster support yet to go straight to
+	 * hard readonly in here. Until then, we want to keep
+	 * ocfs2_abort() so that we can at least mark critical
+	 * errors.
+	 *
+	 * TODO: This should abort the journal and alert other nodes
+	 * that our slot needs recovery. */
+
+	/* Force a panic(). This stinks, but it's better than letting
+	 * things continue without having a proper hard readonly
+	 * here. */
+	OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
+	ocfs2_handle_error(sb);
+}
+
 module_init(ocfs2_init);
 module_exit(ocfs2_exit);

Modified: branches/readonly-operation/fs/ocfs2/super.h
===================================================================
--- branches/readonly-operation/fs/ocfs2/super.h	2005-09-13 02:45:42 UTC (rev 2563)
+++ branches/readonly-operation/fs/ocfs2/super.h	2005-09-14 01:47:53 UTC (rev 2564)
@@ -31,4 +31,14 @@
 int ocfs2_publish_get_mount_state(ocfs2_super *osb,
 				  int node_num);
 
+void __ocfs2_error(struct super_block *sb,
+		   const char *function,
+		   const char *fmt, ...);
+#define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, args)
+
+void __ocfs2_abort(struct super_block *sb,
+		   const char *function,
+		   const char *fmt, ...);
+#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, args)
+
 #endif /* OCFS2_SUPER_H */



More information about the Ocfs2-commits mailing list