[Ocfs2-commits] mfasheh commits r2409 - trunk/fs/ocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Sat Jun 18 19:55:11 CDT 2005


Author: mfasheh
Signed-off-by: manish
Date: 2005-06-18 19:55:10 -0500 (Sat, 18 Jun 2005)
New Revision: 2409

Added:
   trunk/fs/ocfs2/seqnum.c
   trunk/fs/ocfs2/seqnum.h
Modified:
   trunk/fs/ocfs2/Makefile
   trunk/fs/ocfs2/alloc.c
   trunk/fs/ocfs2/buffer_head_io.c
   trunk/fs/ocfs2/buffer_head_io.h
   trunk/fs/ocfs2/dir.c
   trunk/fs/ocfs2/dlmglue.c
   trunk/fs/ocfs2/dlmglue.h
   trunk/fs/ocfs2/file.c
   trunk/fs/ocfs2/file.h
   trunk/fs/ocfs2/inode.c
   trunk/fs/ocfs2/inode.h
   trunk/fs/ocfs2/namei.c
   trunk/fs/ocfs2/ocfs2.h
   trunk/fs/ocfs2/suballoc.c
   trunk/fs/ocfs2/super.c
Log:
* Increase the number of bits used in our buffer sequence numbers. The 
  previous value was too small and we were wrapping too quickly, causing old
  buffers to be considered clean.
        - This is, at best another stopgap. Eventually we'll wrap again - a 
          permanent solution is required.

* The sequence number code was a mess, which made finding this bug take too
  long. Clean things up by moving the code to it's own file,
  commenting things properly and avoid redundant hard coded values. This also 
  makes getting sequence numbers from another source much simpler to code - an
  approach which may be taken soon.

* We are more careful now to avoid incrementing the global sequence on newly
  created inodes.

* We weren't setting caching information and iops on char and block device  
  inodes.        

Signed-off-by: manish



Modified: trunk/fs/ocfs2/Makefile
===================================================================
--- trunk/fs/ocfs2/Makefile	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/Makefile	2005-06-19 00:55:10 UTC (rev 2409)
@@ -53,6 +53,7 @@
 	mmap.c 			\
 	namei.c 		\
 	proc.c 			\
+	seqnum.c 		\
 	slot_map.c 		\
 	suballoc.c 		\
 	super.c 		\
@@ -81,6 +82,7 @@
 	mmap.h			\
 	namei.h			\
 	proc.h			\
+	seqnum.h 		\
 	slot_map.h		\
 	suballoc.h		\
 	super.h			\

Modified: trunk/fs/ocfs2/alloc.c
===================================================================
--- trunk/fs/ocfs2/alloc.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/alloc.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -39,6 +39,7 @@
 #include "inode.h"
 #include "journal.h"
 #include "localalloc.h"
+#include "seqnum.h"
 #include "suballoc.h"
 #include "sysfile.h"
 #include "file.h"
@@ -180,7 +181,7 @@
 				goto bail;
 			}
 			set_buffer_uptodate(bhs[i]);
-			ocfs2_set_bh_seqnum(inode, bhs[i]);
+			ocfs2_set_bh_sequence(inode, bhs[i]);
 
 			status = ocfs2_journal_access(handle, inode, bhs[i],
 						      OCFS2_JOURNAL_ACCESS_CREATE);

Modified: trunk/fs/ocfs2/buffer_head_io.c
===================================================================
--- trunk/fs/ocfs2/buffer_head_io.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/buffer_head_io.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -35,6 +35,7 @@
 #include "alloc.h"
 #include "inode.h"
 #include "journal.h"
+#include "seqnum.h"
 
 #include "buffer_head_io.h"
 
@@ -114,9 +115,9 @@
 		wait_on_buffer(bh);
 
 		if (inode)
-			ocfs2_set_bh_seqnum(inode, bh);
+			ocfs2_set_bh_sequence(inode, bh);
 		else
-			ocfs2_clear_bh_seqnum(bh);
+			ocfs2_clear_bh_sequence(bh);
 	}
 	if (inode)
 		up(&OCFS2_I(inode)->ip_io_sem);
@@ -177,12 +178,11 @@
 		ignore_cache = 0;
 
 		if (flags & OCFS2_BH_CACHED && inode && 
-		    !ocfs2_test_bh_seqnum(inode, bh)) {
-			mlog(ML_SEQNUM, "(read) bh (%llu) seqnum (%lu) does "
-					"not match inode (%u)\n",
-			     (unsigned long long)bh->b_blocknr, 
-			     (bh->b_state & STATE_BIT_MASK) >> 19,
-			     ocfs2_get_inode_seq(inode));
+		    !ocfs2_test_bh_sequence(inode, bh)) {
+			mlog(ML_SEQNUM, "bh (%llu) seqnum does not match "
+			     "inode %"MLFu64"\n",
+			     (unsigned long long)bh->b_blocknr,
+			     OCFS2_I(inode)->ip_blkno);
 			ignore_cache = 1;
 		}
 
@@ -243,9 +243,9 @@
 		wait_on_buffer(bh);
 
 		if (inode)
-			ocfs2_set_bh_seqnum(inode, bh);
+			ocfs2_set_bh_sequence(inode, bh);
 		else
-			ocfs2_clear_bh_seqnum(bh);
+			ocfs2_clear_bh_sequence(bh);
 	}
 	if (inode)
 		up(&OCFS2_I(inode)->ip_io_sem);

Modified: trunk/fs/ocfs2/buffer_head_io.h
===================================================================
--- trunk/fs/ocfs2/buffer_head_io.h	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/buffer_head_io.h	2005-06-19 00:55:10 UTC (rev 2409)
@@ -56,46 +56,6 @@
 #define OCFS2_BH_CACHED            1
 #define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
 
-#define STATE_BH_BITS	8
-#define USED_BH_BITS	23 /* Number of BH bits used up through JBD */
-
-#define STATE_BIT_MAX           (1 << STATE_BH_BITS)
-#define STATE_BIT_MAX_MASK      ((1 << STATE_BH_BITS)-1)
-#define STATE_BIT_MASK		((~0UL) << USED_BH_BITS)
-
-
-static inline unsigned int ocfs2_get_inode_seq(struct inode *inode)
-{
-	return atomic_read(&OCFS2_I(inode)->ip_clean_buffer_seq);
-}
-
-static inline void ocfs2_clear_bh_seqnum(struct buffer_head *bh)
-{
-	unsigned int prev = bh->b_state & STATE_BIT_MASK;
-	bh->b_state &= ~prev;
-}
-
-static inline void ocfs2_set_bh_seqnum(struct inode *inode,
-				       struct buffer_head *bh)
-{
-	unsigned int seq = (ocfs2_get_inode_seq(inode) & 
-			    STATE_BIT_MAX_MASK) << USED_BH_BITS;
-
-	ocfs2_clear_bh_seqnum(bh);
-	bh->b_state |= seq;
-}
-
-static inline int ocfs2_test_bh_seqnum(struct inode *inode,
-				       struct buffer_head *bh)
-{
-	int ret;
-	unsigned int seq =
-		(bh->b_state & STATE_BIT_MASK) >> USED_BH_BITS;
-
-	ret = (seq == ocfs2_get_inode_seq(inode));
-	return ret;
-}
-
 static inline int ocfs2_write_block(ocfs2_super * osb, struct buffer_head *bh, 
 				    struct inode *inode)
 {

Modified: trunk/fs/ocfs2/dir.c
===================================================================
--- trunk/fs/ocfs2/dir.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/dir.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -54,6 +54,7 @@
 #include "inode.h"
 #include "journal.h"
 #include "namei.h"
+#include "seqnum.h"
 #include "suballoc.h"
 
 #include "buffer_head_io.h"
@@ -464,7 +465,7 @@
 	}
 
 	set_buffer_uptodate(new_bh);
-	ocfs2_set_bh_seqnum(dir, new_bh);
+	ocfs2_set_bh_sequence(dir, new_bh);
 	status = ocfs2_journal_access(handle, dir, new_bh, 
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {

Modified: trunk/fs/ocfs2/dlmglue.c
===================================================================
--- trunk/fs/ocfs2/dlmglue.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/dlmglue.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -49,13 +49,12 @@
 #include "heartbeat.h"
 #include "inode.h"
 #include "journal.h"
+#include "seqnum.h"
 #include "slot_map.h"
 #include "vote.h"
 
 #include "buffer_head_io.h"
 
-static spinlock_t clean_buffer_lock = SPIN_LOCK_UNLOCKED;
-
 static int ocfs2_build_lock_name(enum ocfs2_lock_type type,
 				 u64 blkno,
 				 u32 generation,
@@ -178,8 +177,6 @@
 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
 static inline void ocfs2_handle_meta_convert_action(struct inode *inode,
 						    struct ocfs2_lock_res *lockres);
-static void ocfs2_inc_inode_seq(ocfs2_super *osb,
-				struct inode *inode);
 static void ocfs2_schedule_blocked_lock(ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres);
 static void ocfs2_schedule_blocked_inode_lock(struct inode *inode,
@@ -565,37 +562,6 @@
 	mlog_exit_void();
 }
 
-void ocfs2_init_inode_seq(struct inode *inode)
-{
-	atomic_set(&OCFS2_I(inode)->ip_clean_buffer_seq,
-		   atomic_read(&OCFS2_SB(inode->i_sb)->clean_buffer_seq));
-}
-
-static void ocfs2_inc_inode_seq(ocfs2_super *osb,
-				struct inode *inode)
-{
-	mlog_entry_void();
-
-	mlog(0, "incrementing inode seq... current is %d\n", ocfs2_get_inode_seq(inode));
-
-	/* wrap to ONE after 13 bits, will need a spinlock */
-	spin_lock(&clean_buffer_lock);
-	if ((atomic_read(&osb->clean_buffer_seq) + 1) % STATE_BIT_MAX == 0)
-		atomic_set(&osb->clean_buffer_seq, 1);
-	else
-		atomic_inc(&osb->clean_buffer_seq);
-	spin_unlock(&clean_buffer_lock);
-
-	/* doesn't matter if this another process */
-	/* has already incremented the global seq */
-	ocfs2_init_inode_seq(inode);
-
-	mlog(0, "done incrementing inode seq... new is %d\n",
-	     ocfs2_get_inode_seq(inode));
-
-	mlog_exit_void();
-}
-
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
 {
 	mlog_entry_void();
@@ -618,13 +584,11 @@
 static inline void ocfs2_handle_meta_convert_action(struct inode *inode,
 						    struct ocfs2_lock_res *lockres)
 {
-	ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
 	mlog_entry_void();
 
 	/* generic_handle_convert_action will set the refresh flag for us. */
 	if (lockres->l_level == LKM_NLMODE)
-		ocfs2_inc_inode_seq(osb, inode);
+		ocfs2_inc_inode_sequence(inode);
 	ocfs2_generic_handle_convert_action(lockres);
 
 	mlog_exit_void();
@@ -652,13 +616,11 @@
 {
 	struct ocfs2_lock_res *lockres = opaque;
 	struct inode *inode;
-	ocfs2_super *osb;
 	dlm_lockstatus *lksb;
 
 	mlog_entry_void();
 
 	inode = ocfs2_lock_res_inode(lockres);
-	osb = OCFS2_SB(inode->i_sb);
 
 	mlog(0, "AST fired for inode %"MLFu64", l_action = %u, type = %s\n",
 	     OCFS2_I(inode)->ip_blkno, lockres->l_action,
@@ -684,7 +646,7 @@
 		if (lockres->l_type == OCFS2_LOCK_TYPE_META &&
 		    lockres->l_requested > LKM_NLMODE &&
 		    !(lockres->l_flags & OCFS2_LOCK_LOCAL))
-			ocfs2_inc_inode_seq(osb, inode);
+			ocfs2_inc_inode_sequence(inode);
 
 		ocfs2_generic_handle_attach_action(lockres);
 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);

Modified: trunk/fs/ocfs2/dlmglue.h
===================================================================
--- trunk/fs/ocfs2/dlmglue.h	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/dlmglue.h	2005-06-19 00:55:10 UTC (rev 2409)
@@ -121,6 +121,5 @@
 
 void ocfs2_meta_lvb_set_trunc_clusters(struct inode *inode,
 				       unsigned int trunc_clusters);
-void ocfs2_init_inode_seq(struct inode *inode);
 
 #endif	/* DLMGLUE_H */

Modified: trunk/fs/ocfs2/file.c
===================================================================
--- trunk/fs/ocfs2/file.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/file.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -993,7 +993,8 @@
 	}
 	unlock = 1;
 
-	if (attr->ia_valid & ATTR_SIZE &&
+	if (S_ISREG(inode->i_mode) &&
+	    attr->ia_valid & ATTR_SIZE &&
 	    newsize != i_size_read(inode)) {
 		bytes_added = 0;
 
@@ -1108,3 +1109,8 @@
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
 };
+
+struct inode_operations ocfs2_special_file_iops = {
+	.setattr	= ocfs2_setattr,
+	.getattr	= ocfs2_getattr,
+};

Modified: trunk/fs/ocfs2/file.h
===================================================================
--- trunk/fs/ocfs2/file.h	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/file.h	2005-06-19 00:55:10 UTC (rev 2409)
@@ -29,6 +29,7 @@
 extern struct file_operations ocfs2_fops;
 extern struct file_operations ocfs2_dops;
 extern struct inode_operations ocfs2_file_iops;
+extern struct inode_operations ocfs2_special_file_iops;
 struct _ocfs2_alloc_context;
 
 enum ocfs2_alloc_restarted {

Modified: trunk/fs/ocfs2/inode.c
===================================================================
--- trunk/fs/ocfs2/inode.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/inode.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -44,6 +44,7 @@
 #include "inode.h"
 #include "journal.h"
 #include "namei.h"
+#include "seqnum.h"
 #include "suballoc.h"
 #include "super.h"
 #include "symlink.h"
@@ -274,23 +275,20 @@
 		 * handle superblocks today ;-) */
 		BUG();
 	}
-	
+
 	switch (inode->i_mode & S_IFMT) {
 	    case S_IFREG:
-		    ocfs2_init_inode_seq(inode);
 		    inode->i_fop = &ocfs2_fops;
 		    inode->i_op = &ocfs2_file_iops;
 		    i_size_write(inode, fe->i_size);
 		    OCFS2_I(inode)->ip_mmu_private = inode->i_size;
 		    break;
 	    case S_IFDIR:
-		    ocfs2_init_inode_seq(inode);
 		    inode->i_op = &ocfs2_dir_iops;
 		    inode->i_fop = &ocfs2_dops;
 		    i_size_write(inode, fe->i_size);
 		    break;
 	    case S_IFLNK:
-		    ocfs2_init_inode_seq(inode);
 		    if (ocfs2_inode_is_fast_symlink(inode))
 			inode->i_op = &ocfs2_fast_symlink_inode_operations;
 		    else
@@ -298,6 +296,7 @@
 		    i_size_write(inode, fe->i_size);
 		    break;
 	    default:
+		    inode->i_op = &ocfs2_special_file_iops;
 		    init_special_inode(inode, inode->i_mode,
 				       inode->i_rdev);
 		    break;
@@ -768,7 +767,7 @@
 			"Clear inode of %"MLFu64" has non empty handle pointer\n",
 			oi->ip_blkno);
 
-	atomic_set(&oi->ip_clean_buffer_seq, 0);
+	oi->ip_clean_buffer_seq = OCFS2_CLEAN_SEQ_CLEAR;
 	oi->ip_flags = 0;
 	oi->ip_created_trans = 0;
 	oi->ip_last_trans = 0;

Modified: trunk/fs/ocfs2/inode.h
===================================================================
--- trunk/fs/ocfs2/inode.h	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/inode.h	2005-06-19 00:55:10 UTC (rev 2409)
@@ -55,7 +55,7 @@
 	struct list_head	ip_handle_list;
 	ocfs2_journal_handle	*ip_handle;
 
-	atomic_t		ip_clean_buffer_seq;
+	unsigned int		ip_clean_buffer_seq;
 	u32			ip_flags; /* see below */
 
 	/* protected by recovery_lock. */

Modified: trunk/fs/ocfs2/namei.c
===================================================================
--- trunk/fs/ocfs2/namei.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/namei.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -55,6 +55,7 @@
 #include "inode.h"
 #include "journal.h"
 #include "namei.h"
+#include "seqnum.h"
 #include "suballoc.h"
 #include "symlink.h"
 #include "sysfile.h"
@@ -280,7 +281,7 @@
 	}
 
 	set_buffer_uptodate(new_bh);
-	ocfs2_set_bh_seqnum(inode, new_bh);
+	ocfs2_set_bh_sequence(inode, new_bh);
 	status = ocfs2_journal_access(handle, inode, new_bh, 
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -437,8 +438,6 @@
 	}
 
 	fe = (ocfs2_dinode *)new_fe_bh->b_data;
-
-	OCFS2_I(inode)->ip_blkno = fe->i_blkno;
 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
 			       "i_blkno=%"MLFu64", i_ino=%lu\n",
@@ -545,6 +544,10 @@
 		goto leave;
 	}
 
+	/* Set block number as early as possible - many of the
+	 * functions here want to print it during debugging. */
+	OCFS2_I(inode)->ip_blkno = fe_blkno;
+
 	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
 	if (!*new_fe_bh) {
 		status = -EIO;
@@ -552,8 +555,10 @@
 		goto leave;
 	}
 	set_buffer_uptodate(*new_fe_bh);
-	ocfs2_set_bh_seqnum(inode, *new_fe_bh);
 
+	ocfs2_new_inode_sequence(inode);
+	ocfs2_set_bh_sequence(inode, *new_fe_bh);
+
 	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -584,9 +589,6 @@
 	if (S_ISCHR(mode) || S_ISBLK(mode))
 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
 
-	if (S_ISLNK(mode) || S_ISDIR(mode) || S_ISREG(mode)) 
-		ocfs2_init_inode_seq(inode);
-
 	if (S_ISDIR(mode))
 		fe->i_links_count = 2;
 	else
@@ -1519,7 +1521,7 @@
 		}
 
 		set_buffer_uptodate(bhs[virtual]);
-		ocfs2_set_bh_seqnum(inode, bhs[virtual]);
+		ocfs2_set_bh_sequence(inode, bhs[virtual]);
 		status = ocfs2_journal_access(handle, inode, bhs[virtual], 
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
@@ -1663,8 +1665,6 @@
 	}
 
 	fe = (ocfs2_dinode *) new_fe_bh->b_data;
-
-	OCFS2_I(inode)->ip_blkno = fe->i_blkno;
 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
 		     "i_blkno=%"MLFu64", i_ino=%lu\n",

Modified: trunk/fs/ocfs2/ocfs2.h
===================================================================
--- trunk/fs/ocfs2/ocfs2.h	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/ocfs2.h	2005-06-19 00:55:10 UTC (rev 2409)
@@ -211,7 +211,7 @@
 	wait_queue_head_t checkpoint_event;
 	atomic_t needs_checkpoint;
 	struct _ocfs2_journal *journal;
-	atomic_t clean_buffer_seq;
+	unsigned int osb_clean_buffer_seq;
 
 	enum ocfs2_local_alloc_state local_alloc_state;
 	struct buffer_head *local_alloc_bh;

Added: trunk/fs/ocfs2/seqnum.c
===================================================================
--- trunk/fs/ocfs2/seqnum.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/seqnum.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -0,0 +1,184 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * seqnum.c
+ *
+ * Caching sequence number handling
+ *
+ * Standard buffer head caching flags (uptodate, etc) are insufficient
+ * in a clustered environment - a buffer may be marked up to date on
+ * our local node but could have been modified by another cluster
+ * member. As a result an additional (and performant) caching scheme
+ * is required. OCFS2 uses sequence numbers, stored on the inode and
+ * buffer heads to test whether a buffer needs to be read from disk -
+ * when a new cluster lock is aquired on an inode, it's sequence
+ * number is incremented. Additionally, a buffer undergoing
+ * modification is automatically marked by jbd and we can make liberal
+ * use of buffer_jbd to shorcut many checks -- if it's in the journal,
+ * then it *must* be up to date as we do not allow a metadata block to
+ * be modified by multiple nodes at a time.
+ *
+ * Copyright (C) 2002, 2004, 2005 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+
+#define MLOG_MASK_PREFIX ML_SEQNUM
+
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+
+#include "inode.h"
+#include "seqnum.h"
+
+/* Sequence numbers are stored in 3 places: 
+ *
+ * bh->b_state - Set from inode sequence. If it differs from the inode
+ * sequence, then the buffer is considered out of date.
+ *
+ * ip_clean_buffer_seq - The "master" number, used to compare against
+ * buffer heads.
+ *
+ * osb_clean_buffer_seq - A globally incrementing value, we set inode
+ * sequence numbers to it. 
+ */
+
+/* Number of BH bits used up through JBD. The number of unused bits
+ * determine the maximum size of our sequence numbers. */
+#define OCFS2_USED_BH_BITS	22
+#define OCFS2_MAX_BH_BITS	(8 * sizeof(((struct buffer_head *) 0)->b_state))
+#define OCFS2_STATE_BH_BITS	(OCFS2_MAX_BH_BITS - OCFS2_USED_BH_BITS)
+
+#define OCFS2_MAX_SEQUENCE	(1 << OCFS2_STATE_BH_BITS)
+#define OCFS2_MAX_SEQUENCE_MASK	((1 << OCFS2_STATE_BH_BITS) - 1)
+#define OCFS2_SEQUENCE_MASK	((~0UL) << OCFS2_USED_BH_BITS)
+
+static spinlock_t ocfs2_clean_buffer_lock = SPIN_LOCK_UNLOCKED;
+
+static void __ocfs2_inc_inode_sequence(struct inode *inode,
+				       int inc_global)
+{
+	ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+	assert_spin_locked(&ocfs2_clean_buffer_lock);
+
+	/* Though it's not currently likely, we want to handle the
+	 * case where the first inode to come through here is a newly
+	 * created one - hence the check for OCFS2_CLEAN_SEQ_CLEAR */
+	if (inc_global ||
+	    osb->osb_clean_buffer_seq == OCFS2_CLEAN_SEQ_CLEAR) {
+		osb->osb_clean_buffer_seq++;
+
+		/* We are careful not to wrap the osb sequence */
+		if (osb->osb_clean_buffer_seq >= OCFS2_MAX_SEQUENCE)
+			osb->osb_clean_buffer_seq = OCFS2_CLEAN_SEQ_START;
+	}
+
+	/* Set this from the current osb sequence number. This helps
+	 * prevent inode sequence numbers from repeating themselves
+	 * too soon. */
+	oi->ip_clean_buffer_seq = osb->osb_clean_buffer_seq;
+
+	mlog(0, "(%u) Inode %"MLFu64", seq: %u\n", current->pid,
+	     OCFS2_I(inode)->ip_blkno, oi->ip_clean_buffer_seq);
+}
+
+void ocfs2_inc_inode_sequence(struct inode *inode)
+{
+	spin_lock(&ocfs2_clean_buffer_lock);
+	__ocfs2_inc_inode_sequence(inode, 1);
+	spin_unlock(&ocfs2_clean_buffer_lock);
+}
+
+/* Called only on newly created inodes. */
+void ocfs2_new_inode_sequence(struct inode *inode)
+{
+	BUG_ON(OCFS2_I(inode)->ip_clean_buffer_seq);
+
+	spin_lock(&ocfs2_clean_buffer_lock);
+	/* We don't increment the global sequence number on brand new
+	 * inodes -- they won't have any existing metadata buffers
+	 * which might be old. */
+	__ocfs2_inc_inode_sequence(inode, 0);
+	spin_unlock(&ocfs2_clean_buffer_lock);
+}
+
+/* Completely point in time, which is fine for buffer_head comparsions
+ * as they should be done under cluster lock in which case the
+ * sequence won't be incrememnted. */
+static inline unsigned int ocfs2_get_inode_sequence(struct inode *inode)
+{
+	unsigned int ret;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+	spin_lock(&ocfs2_clean_buffer_lock);
+	if (oi->ip_clean_buffer_seq >= OCFS2_MAX_SEQUENCE)
+		mlog(ML_ERROR, "Inode %"MLFu64" has bad sequence: %u\n",
+		     oi->ip_blkno, oi->ip_clean_buffer_seq);
+
+	ret = oi->ip_clean_buffer_seq & OCFS2_MAX_SEQUENCE_MASK;
+	spin_unlock(&ocfs2_clean_buffer_lock);
+
+	return ret;
+}
+
+/* The sequence numbers on buffer_head are protected by inodes
+ * ip_io_sem - this also guards access to that buffer by the journal
+ * (via ocfs2_journal_access), so we don't change the buffer state
+ * bits at the same time as JBD, which will cause many hard to find
+ * problems. Once the buffer has been passed to the journal, we can
+ * test that condition via buffer_jbd and avoid changing state
+ * bits.
+ */
+
+void ocfs2_clear_bh_sequence(struct buffer_head *bh)
+{
+	unsigned int prev = bh->b_state & OCFS2_SEQUENCE_MASK;
+	bh->b_state &= ~prev;
+}
+
+/* For a newly created inode (e.g., one that's in read_locked_inode)
+ * it's sequence number will be zero (uninitialized), until a cluster
+ * lock is later acquired. That's ok though because we never want to
+ * trust buffers for unlocked inodes. 
+ *
+ * This function is also called against newly allocated inode
+ * metadata */
+void ocfs2_set_bh_sequence(struct inode *inode,
+			   struct buffer_head *bh)
+{
+	unsigned int seq =
+		ocfs2_get_inode_sequence(inode) << OCFS2_USED_BH_BITS;
+
+	ocfs2_clear_bh_sequence(bh);
+	bh->b_state |= seq;
+}
+
+int ocfs2_test_bh_sequence(struct inode *inode,
+			   struct buffer_head *bh)
+{
+	unsigned int seq =
+		(bh->b_state & OCFS2_SEQUENCE_MASK) >> OCFS2_USED_BH_BITS;
+
+	return seq == ocfs2_get_inode_sequence(inode);
+}

Added: trunk/fs/ocfs2/seqnum.h
===================================================================
--- trunk/fs/ocfs2/seqnum.h	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/seqnum.h	2005-06-19 00:55:10 UTC (rev 2409)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * seqnum.h
+ *
+ * Caching sequence number handling
+ *
+ * Copyright (C) 2002, 2004, 2005 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef OCFS2_SEQNUM_H
+#define OCFS2_SEQNUM_H
+
+#define OCFS2_CLEAN_SEQ_CLEAR 0
+#define OCFS2_CLEAN_SEQ_START 1
+
+void ocfs2_inc_inode_sequence(struct inode *inode);
+void ocfs2_new_inode_sequence(struct inode *inode);
+
+void ocfs2_clear_bh_sequence(struct buffer_head *bh);
+void ocfs2_set_bh_sequence(struct inode *inode,
+			   struct buffer_head *bh);
+int ocfs2_test_bh_sequence(struct inode *inode,
+			   struct buffer_head *bh);
+
+#endif /* OCFS2_SEQNUM_H */

Modified: trunk/fs/ocfs2/suballoc.c
===================================================================
--- trunk/fs/ocfs2/suballoc.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/suballoc.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -39,6 +39,7 @@
 #include "inode.h"
 #include "journal.h"
 #include "localalloc.h"
+#include "seqnum.h"
 #include "suballoc.h"
 #include "sysfile.h"
 
@@ -274,7 +275,7 @@
 		goto bail;
 	}
 	set_buffer_uptodate(bg_bh);
-	ocfs2_set_bh_seqnum(alloc_inode, bg_bh);
+	ocfs2_set_bh_sequence(alloc_inode, bg_bh);
 
 	status = ocfs2_block_group_fill(handle, 
 					alloc_inode, 

Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c	2005-06-18 00:45:11 UTC (rev 2408)
+++ trunk/fs/ocfs2/super.c	2005-06-19 00:55:10 UTC (rev 2409)
@@ -57,6 +57,7 @@
 #include "journal.h"
 #include "localalloc.h"
 #include "proc.h"
+#include "seqnum.h"
 #include "slot_map.h"
 #include "super.h"
 #include "sysfile.h"
@@ -624,7 +625,7 @@
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		oi->ip_flags = 0;
-		atomic_set(&oi->ip_clean_buffer_seq, 0);
+		oi->ip_clean_buffer_seq = OCFS2_CLEAN_SEQ_CLEAR;
 		oi->ip_open_count = 0;
 		spin_lock_init(&oi->ip_lock);
 		ocfs2_extent_map_init(&oi->vfs_inode);
@@ -1092,7 +1093,7 @@
 
 	init_waitqueue_head(&osb->checkpoint_event);
 	atomic_set(&osb->needs_checkpoint, 0);
-	atomic_set(&osb->clean_buffer_seq, 1);
+	osb->osb_clean_buffer_seq = OCFS2_CLEAN_SEQ_CLEAR;
 
 	osb->node_num = O2NM_INVALID_NODE_NUM;
 	osb->slot_num = OCFS2_INVALID_SLOT;



More information about the Ocfs2-commits mailing list