[Ocfs2-commits] mfasheh commits r2644 - trunk/fs/ocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Tue Oct 11 15:41:57 CDT 2005


Author: mfasheh
Signed-off-by: jlbec
Signed-off-by: zab
Date: 2005-10-11 15:41:55 -0500 (Tue, 11 Oct 2005)
New Revision: 2644

Modified:
   trunk/fs/ocfs2/alloc.c
   trunk/fs/ocfs2/dlmglue.c
   trunk/fs/ocfs2/dlmglue.h
   trunk/fs/ocfs2/ocfs2.h
Log:
* Fix a pair of LVB issues:
  - i_size_off was prone to overflow. just stuff a full blown i_size
    instead.
  - lvb sequence numbers could wrap back to a previous value after a node
    death (as it will be set to zero then), leading to a bad value for
    trustable_clusters. Don't use the sequence numbers anymore and dump
    trustable_clusters.

This changes the lvb contents in a backwards compatible way (old versions
won't trust the new lvb and new versions won't trust the old lvb), so
rolling upgrades should be possible.

Signed-off-by: jlbec
Signed-off-by: zab



Modified: trunk/fs/ocfs2/alloc.c
===================================================================
--- trunk/fs/ocfs2/alloc.c	2005-10-07 22:06:05 UTC (rev 2643)
+++ trunk/fs/ocfs2/alloc.c	2005-10-11 20:41:55 UTC (rev 2644)
@@ -1777,9 +1777,6 @@
 	target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
 						     i_size_read(inode));
 
-	/* the extent map gets truncated in ocfs2_do_truncate */
-	ocfs2_meta_lvb_set_trunc_clusters(inode, target_i_clusters);
-
 	last_eb_bh = tc->tc_last_eb_bh;
 	tc->tc_last_eb_bh = NULL;
 

Modified: trunk/fs/ocfs2/dlmglue.c
===================================================================
--- trunk/fs/ocfs2/dlmglue.c	2005-10-07 22:06:05 UTC (rev 2643)
+++ trunk/fs/ocfs2/dlmglue.c	2005-10-11 20:41:55 UTC (rev 2644)
@@ -181,14 +181,6 @@
 } while (0)
 static void ocfs2_vote_on_unlock(ocfs2_super *osb,
 				 struct ocfs2_lock_res *lockres);
-/* Called after we refresh our inode, only has any effect if we have
- * an EX lock. This populates the LVB with the initial values for our
- * change set. */
-static void ocfs2_reset_meta_lvb_values(struct inode *inode);
-static void __ocfs2_stuff_meta_lvb(struct inode *inode);
-static void ocfs2_refresh_inode_from_lvb(struct inode *inode);
-static void __ocfs2_lvb_on_downconvert(struct ocfs2_lock_res *lockres,
-				       int new_level);
 static int ocfs2_meta_lock_update(struct inode *inode,
 				  struct buffer_head **bh);
 static void ocfs2_drop_osb_locks(ocfs2_super *osb);
@@ -203,67 +195,6 @@
 						  struct ocfs2_lock_res *lockres,
 						  int new_level);
 
-static inline int ocfs2_lvb_is_trustable(struct ocfs2_lock_res *lockres)
-{
-	struct ocfs2_lvb *lvb = (struct ocfs2_lvb *) lockres->l_lksb.lvb;
-	unsigned int lvb_seq;
-	int ret = 0;
-
-	mlog_entry_void();
-
-	spin_lock(&lockres->l_lock);
-
-	lvb_seq = be32_to_cpu(lvb->lvb_seq);
-	if (lvb_seq &&
-	    lockres->l_local_seq == lvb_seq)
-		ret = 1;
-
-	spin_unlock(&lockres->l_lock);
-
-	mlog_exit(ret);
-	return ret;
-}
-
-static inline void ocfs2_set_local_seq_from_lvb(struct ocfs2_lock_res *lockres)
-{
-	struct ocfs2_lvb *lvb = (struct ocfs2_lvb *) lockres->l_lksb.lvb;
-	unsigned int lvb_seq;
-
-	mlog_entry_void();
-
-	spin_lock(&lockres->l_lock);
-
-	lvb_seq = be32_to_cpu(lvb->lvb_seq);
-	if (lvb_seq)
-		lockres->l_local_seq = lvb_seq;
-
-	spin_unlock(&lockres->l_lock);
-
-	mlog_exit_void();
-}
-
-/* fill in new values as we add them to the lvb. */
-static inline void ocfs2_meta_lvb_get_trunc_clusters(struct ocfs2_lock_res *lockres,
-						     unsigned int *trunc_clusters)
-{
-	struct ocfs2_meta_lvb *lvb;
-
-	mlog_entry_void();
-
-	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_META);
-
-	spin_lock(&lockres->l_lock);
-	BUG_ON(lockres->l_level <= LKM_NLMODE);
-
-	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
-	if (trunc_clusters)
-		*trunc_clusters = be32_to_cpu(lvb->lvb_trunc_clusters);
-
-	spin_unlock(&lockres->l_lock);
-
-	mlog_exit_void();
-}
-
 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 				  u64 blkno,
 				  u32 generation,
@@ -380,7 +311,6 @@
 			"Lockres %s has %u ex holders\n",
 			res->l_name, res->l_ex_holders);
 
-	res->l_local_seq = 0;
 	/* Need to clear out the lock status block for the dlm */
 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
 
@@ -1267,12 +1197,9 @@
 
 /* Call this with the lockres locked. I am reasonably sure we don't
  * need ip_lock in this function as anyone who would be changing those
- * values is supposed to be blocked in ocfs2_meta_lock right now.
- * This doesn't stuff trunc_clusters as that should already be up to
- * date from the truncate code. */
+ * values is supposed to be blocked in ocfs2_meta_lock right now. */
 static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 {
-	unsigned int isize_off;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
 	struct ocfs2_meta_lvb *lvb;
@@ -1281,13 +1208,12 @@
 
 	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
 
-	/* take advantage of the fact that we don't support holes --
-	 * we only need the byte offset from i_clusters which nicely
-	 * cuts our lvb storage requirement to a u32 */
-	isize_off = ocfs2_clusters_to_bytes(inode->i_sb, oi->ip_clusters)
-		- i_size_read(inode);
+	/* Setting this to zero will ensure that old versions of the
+	 * LVB code don't trust our information. */
+	lvb->lvb_old_seq   = cpu_to_be32(0);
+	lvb->lvb_version   = cpu_to_be32(OCFS2_LVB_VERSION);
 
-	lvb->lvb_isize_off = cpu_to_be32(isize_off);
+	lvb->lvb_isize     = cpu_to_be64(i_size_read(inode));
 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
 	lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
@@ -1314,7 +1240,6 @@
 
 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 {
-	loff_t isize;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
 	struct ocfs2_meta_lvb *lvb;
@@ -1328,11 +1253,8 @@
 	/* We're safe here without the lockres lock... */
 	spin_lock(&oi->ip_lock);
 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
+	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
 
-	isize = ocfs2_clusters_to_bytes(inode->i_sb, oi->ip_clusters)
-		- be32_to_cpu(lvb->lvb_isize_off);
-	i_size_write(inode, isize);
-
 	/* fast-symlinks are a special case */
 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
 		inode->i_blocks = 0;
@@ -1355,53 +1277,20 @@
 	mlog_exit_void();
 }
 
-static void ocfs2_reset_meta_lvb_values(struct inode *inode)
+static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres)
 {
-	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
-	struct ocfs2_meta_lvb *lvb;
-	u32 i_clusters;
+	struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
 
-	mlog_entry_void();
-
-	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
-
-	spin_lock(&OCFS2_I(inode)->ip_lock);
-	i_clusters = OCFS2_I(inode)->ip_clusters;
-	spin_unlock(&OCFS2_I(inode)->ip_lock);
-
-	spin_lock(&lockres->l_lock);
-	if (lockres->l_level == LKM_EXMODE)
-		lvb->lvb_trunc_clusters = cpu_to_be32(i_clusters);
-	spin_unlock(&lockres->l_lock);
-
-	mlog_exit_void();
+	/* Old OCFS2 versions stored a "sequence" in the lvb to
+	 * determine whether the information could be trusted. We
+	 * don't want to use an lvb populated from a node running the
+	 * old code, so check that sequence is not set. */
+	if (!lvb->lvb_old_seq &&
+	    be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION)
+		return 1;
+	return 0;
 }
 
-static void __ocfs2_lvb_on_downconvert(struct ocfs2_lock_res *lockres,
-				       int new_level)
-{
-	struct ocfs2_lvb *lvb = (struct ocfs2_lvb *) lockres->l_lksb.lvb;
-	u32 lvb_seq = be32_to_cpu(lvb->lvb_seq);
-
-	mlog_entry_void();
-
-	if (lockres->l_level == LKM_EXMODE) {
-		lvb_seq++;
-		/* Overflow? */
-		if (!lvb_seq)
-			lvb_seq = 1;
-		lockres->l_local_seq = lvb_seq;
-		if (new_level == LKM_NLMODE)
-			lockres->l_local_seq++;
-		lvb->lvb_seq = cpu_to_be32(lvb_seq);
-	} else if (lockres->l_level == LKM_PRMODE) {
-		if (lvb_seq)
-			lockres->l_local_seq++;
-	}
-
-	mlog_exit_void();
-}
-
 /* Determine whether a lock resource needs to be refreshed, and
  * arbitrate who gets to refresh it.
  *
@@ -1462,7 +1351,6 @@
 				  struct buffer_head **bh)
 {
 	int status = 0;
-	u32 trustable_clusters = 0;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres;
 	ocfs2_dinode *fe;
@@ -1489,10 +1377,13 @@
 	 * for the inode metadata. */
 	ocfs2_metadata_cache_purge(inode);
 
-	if (ocfs2_lvb_is_trustable(lockres)) {
-		/* yay, fastpath! */
-		ocfs2_meta_lvb_get_trunc_clusters(lockres,
-						  &trustable_clusters);
+	/* will do nothing for inode types that don't use the extent
+	 * map (directories, bitmap files, etc) */
+	ocfs2_extent_map_trunc(inode, 0);
+
+	if (ocfs2_meta_lvb_is_trustable(lockres)) {
+		mlog(0, "Trusting LVB on inode %"MLFu64"\n",
+		     oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
 	} else {
 		/* Boo, we have to go to disk. */
@@ -1534,16 +1425,6 @@
 		ocfs2_refresh_inode(inode, fe);
 	}
 
-	mlog(0, "inode %"MLFu64", I can only trust %u clusters\n",
-	     OCFS2_I(inode)->ip_blkno, trustable_clusters);
-
-	/* will do nothing for inode types that don't use the extent
-	 * map (directories, bitmap files, etc) */
-	ocfs2_extent_map_trunc(inode, trustable_clusters);
-
-	ocfs2_set_local_seq_from_lvb(lockres);
-	ocfs2_reset_meta_lvb_values(inode);
-
 #ifdef OCFS2_DELETE_INODE_WORKAROUND
 	/* We might as well check this here - since the inode is now
 	 * locked, an up to date view will indicate whether this was
@@ -2072,12 +1953,10 @@
 
 	/* the metadata lock requires a bit more work as we have an
 	 * LVB to worry about. */
-	if (lockres->l_flags & OCFS2_LOCK_ATTACHED) {
-		if (lockres->l_level == LKM_EXMODE)
-			__ocfs2_stuff_meta_lvb(inode);
-		/* fake an NLMODE downconvert for the lvb code. */
-		__ocfs2_lvb_on_downconvert(lockres, LKM_NLMODE);
-	}
+	if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
+	    lockres->l_level == LKM_EXMODE &&
+	    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
+		__ocfs2_stuff_meta_lvb(inode);
 }
 
 int ocfs2_drop_inode_locks(struct inode *inode)
@@ -2284,14 +2163,13 @@
 		if (lockres->l_level == LKM_EXMODE)
 			set_lvb = 1;
 
-		/* If the lock hasn't been refreshed yet (rare), then our
-		 * memory inode values are old and we skip stuffing
-		 * the lvb. Additionally we cannot incrememnt the lvb
-		 * sequence numbers */
+		/* If the lock hasn't been refreshed yet (rare), then
+		 * our memory inode values are old and we skip
+		 * stuffing the lvb. There's no need to actually clear
+		 * out the lvb here as it's value is still valid. */
 		if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
 			if (set_lvb)
 				__ocfs2_stuff_meta_lvb(inode);
-			__ocfs2_lvb_on_downconvert(lockres, new_level);
 		} else
 			mlog(0, "lockres %s: downconverting stale lock!\n",
 			     lockres->l_name);
@@ -2573,21 +2451,6 @@
 	mlog_exit_void();
 }
 
-void ocfs2_meta_lvb_set_trunc_clusters(struct inode *inode,
-				       unsigned int trunc_clusters)
-{
-	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
-	struct ocfs2_meta_lvb *lvb;
-
-	spin_lock(&lockres->l_lock);
-	BUG_ON(lockres->l_level != LKM_EXMODE);
-
-	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
-	if (be32_to_cpu(lvb->lvb_trunc_clusters) > trunc_clusters)
-		lvb->lvb_trunc_clusters = cpu_to_be32(trunc_clusters);
-	spin_unlock(&lockres->l_lock);
-}
-
 /* This aids in debugging situations where a bad LVB might be involved. */
 void ocfs2_dump_meta_lvb_info(u64 level,
 			      const char *function,
@@ -2598,12 +2461,11 @@
 
 	mlog(level, "LVB information for %s (called from %s:%u):\n",
 	     lockres->l_name, function, line);
-	mlog(level, "Lockres local seq: %u, seq: %u, size_off: %u, "
-	     "clusters: %u\n",
-	     lockres->l_local_seq, be32_to_cpu(lvb->lvb.lvb_seq),
-	     be32_to_cpu(lvb->lvb_isize_off),  be32_to_cpu(lvb->lvb_iclusters));
-	mlog(level, "trunc clusters %u, uid %u, gid %u, mode 0x%x\n",
-	     be32_to_cpu(lvb->lvb_trunc_clusters), be32_to_cpu(lvb->lvb_iuid),
+	mlog(level, "old_seq: %u, version: %u, clusters: %u\n",
+	     be32_to_cpu(lvb->lvb_old_seq), be32_to_cpu(lvb->lvb_version),
+	     be32_to_cpu(lvb->lvb_iclusters));
+	mlog(level, "size: %"MLFu64", uid %u, gid %u, mode 0x%x\n",
+	     be64_to_cpu(lvb->lvb_isize), be32_to_cpu(lvb->lvb_iuid),
 	     be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode));
 	mlog(level, "nlink %u, atime_packed 0x%"MLFx64", "
 	     "ctime_packed 0x%"MLFx64", mtime_packed 0x%"MLFx64"\n",

Modified: trunk/fs/ocfs2/dlmglue.h
===================================================================
--- trunk/fs/ocfs2/dlmglue.h	2005-10-07 22:06:05 UTC (rev 2643)
+++ trunk/fs/ocfs2/dlmglue.h	2005-10-11 20:41:55 UTC (rev 2644)
@@ -27,45 +27,11 @@
 #ifndef DLMGLUE_H
 #define DLMGLUE_H
 
-/*
- * LVB Sequence number rules:
- * local seq and lvb seq are initialized to zero.
- *
- * Note that the lvb is basically invalid until the 1st EX downconvert
- * as he's the only guy that can set it valid. This is ok though as PR
- * holders would have to do an I/O under lock anyway.
- *
- * NL->PR:
- * NL->EX:
- * If LVB is valid:
- *   if local seq == lvb seq, then we are up to date with the contents.
- *   otherwise, we take the slow path to get up to date and then set our
- *   local seq to the lvb seq.
- *
- * PR->NL:
- * If LVB is valid:
- *   We increment our local seq. -- this allows up to
- *   one set of changes to the lvb before we considers ourselves
- *   invalid.
- *
- * PR->EX:
- *   Do nothing.
- *
- * EX->NL:
- * EX->PR:
- * Set the LVB as valid.
- * Populate the LVB contents (this is lock type specific)
- * Increment the LVB seq.
- * Set my local seq to the LVB seq.
- * if (EX->NL)
- *   do an additional increment of my local seq.
- */
-struct ocfs2_lvb {
-	__be32 lvb_seq;
-};
+#define OCFS2_LVB_VERSION 1
+
 struct ocfs2_meta_lvb {
-	struct ocfs2_lvb lvb;
-	__be32       lvb_trunc_clusters;
+	__be32       lvb_old_seq;
+	__be32       lvb_version;
 	__be32       lvb_iclusters;
 	__be32       lvb_iuid;
 	__be32       lvb_igid;
@@ -74,8 +40,8 @@
 	__be64       lvb_iatime_packed;
 	__be64       lvb_ictime_packed;
 	__be64       lvb_imtime_packed;
-	__be32       lvb_isize_off;
-	__be32       lvb_reserved[3];
+	__be64       lvb_isize;
+	__be32       lvb_reserved[2];
 };
 
 int ocfs2_dlm_init(ocfs2_super *osb);
@@ -119,8 +85,6 @@
 void ocfs2_process_blocked_lock(ocfs2_super *osb,
 				struct ocfs2_lock_res *lockres);
 
-void ocfs2_meta_lvb_set_trunc_clusters(struct inode *inode,
-				       unsigned int trunc_clusters);
 /* aids in debugging and tracking lvbs */
 void ocfs2_dump_meta_lvb_info(u64 level,
 			      const char *function,

Modified: trunk/fs/ocfs2/ocfs2.h
===================================================================
--- trunk/fs/ocfs2/ocfs2.h	2005-10-07 22:06:05 UTC (rev 2643)
+++ trunk/fs/ocfs2/ocfs2.h	2005-10-11 20:41:55 UTC (rev 2644)
@@ -134,7 +134,6 @@
 	unsigned int             l_ro_holders;
 	unsigned int             l_ex_holders;
 	struct dlm_lockstatus    l_lksb;
-	u32                      l_local_seq;
 
 	/* used from AST/BAST funcs. */
 	enum ocfs2_ast_action    l_action;



More information about the Ocfs2-commits mailing list