[Ocfs2-commits] mfasheh commits r1438 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Sep 8 19:32:28 CDT 2004


Author: mfasheh
Date: 2004-09-08 19:32:27 -0500 (Wed, 08 Sep 2004)
New Revision: 1438

Modified:
   trunk/src/buffer_head_io.c
   trunk/src/buffer_head_io.h
   trunk/src/dlm.c
   trunk/src/dlm.h
   trunk/src/nm.c
Log:
* Don't need io_sem in process_vote anymore. It's a serious bug for us
  to change master when there's transactions against a buffer, and we
  protect against dirty readers blowing away our lock state by changing
  it inside of lock_buffer manually.



Modified: trunk/src/buffer_head_io.c
===================================================================
--- trunk/src/buffer_head_io.c	2004-09-08 23:53:47 UTC (rev 1437)
+++ trunk/src/buffer_head_io.c	2004-09-09 00:32:27 UTC (rev 1438)
@@ -44,8 +44,8 @@
 
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_IO
 
-static void ocfs_end_buffer_io_sync(struct buffer_head *bh,
-				    int uptodate)
+void ocfs_end_buffer_io_sync(struct buffer_head *bh,
+			     int uptodate)
 {
 //	LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
 //		       uptodate);

Modified: trunk/src/buffer_head_io.h
===================================================================
--- trunk/src/buffer_head_io.h	2004-09-08 23:53:47 UTC (rev 1437)
+++ trunk/src/buffer_head_io.h	2004-09-09 00:32:27 UTC (rev 1438)
@@ -38,6 +38,9 @@
 # include <linux/buffer_head.h>
 #endif
 
+void ocfs_end_buffer_io_sync(struct buffer_head *bh,
+			     int uptodate);
+
 /* Yosh made me do it. */
 static inline int ocfs_write_bh  (ocfs_super          *osb, 
 				  struct buffer_head  *bh, 

Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-09-08 23:53:47 UTC (rev 1437)
+++ trunk/src/dlm.c	2004-09-09 00:32:27 UTC (rev 1438)
@@ -54,52 +54,67 @@
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DLM
 
-
 static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
-static int ocfs_update_disk_lock (ocfs_super * osb, struct buffer_head *bh, struct inode *inode);
 
 /*
  * ocfs_update_disk_lock()
  * inode is definitely non NULL
  */
-static int ocfs_update_disk_lock (ocfs_super * osb, struct buffer_head *bh, struct inode *inode)
+void ocfs_update_disk_lock (ocfs_super * osb, 
+			    struct buffer_head *bh, 
+			    struct inode *inode)
 {
-	int status = 0;
 	ocfs2_dinode *fe = NULL;
-	int changed = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 
 	LOG_ENTRY ();
 
-	status = ocfs_read_bh(osb,
-			      OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-			      &bh, 0, inode);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto finally;
-	}
-
 	fe = (ocfs2_dinode *) bh->b_data;
 
-	if (DISK_LOCK(fe)->dl_master != lockres->master_node_num) {
-		changed = 1;
-		DISK_LOCK(fe)->dl_master = lockres->master_node_num;
+	/* We do our own I/O here to lock out dirty readers from
+	 * refreshing the bh when we're in the middle of changing
+	 * it. We shouldn't ever get here if it's a journalled buffer
+	 * so io_sem is not necessary. */
+	if (buffer_jbd(bh)) {
+		printk("Ugh, block %llu has the JBD bit set!\n",
+		       (unsigned long long)bh->b_blocknr);
+		BUG();
 	}
 
-	if (DISK_LOCK(fe)->dl_level != lockres->lock_type) {
-		changed = 1;
-		DISK_LOCK(fe)->dl_level = lockres->lock_type;
-	}
+	if ((DISK_LOCK(fe)->dl_master == lockres->master_node_num)
+	    && (DISK_LOCK(fe)->dl_level == lockres->lock_type))
+		goto skip_write;
 
-	if (changed) {
-		status = ocfs_write_bh (osb, bh, inode);
-		if (status < 0) 
-			LOG_ERROR_STATUS (status);
+	lock_buffer(bh);
+
+	if (buffer_jbd(bh)) {
+		printk("Ugh, block %llu has the JBD bit set!\n",
+		       (unsigned long long)bh->b_blocknr);
+		BUG();
 	}
 
-finally:
-	LOG_EXIT_STATUS (status);
-	return status;
+	DISK_LOCK(fe)->dl_master = lockres->master_node_num;
+	DISK_LOCK(fe)->dl_level = lockres->lock_type;
+
+	set_buffer_uptodate(bh);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)	
+	/*
+	 * mark_buffer_clean() doesn't exist in 2.6.x kernels.
+	 * Not many places actually used mark_buffer_clean, but
+	 * at least reiserfs uses clear_buffer_dirty() as
+	 * a replacment.
+	 */
+	clear_buffer_dirty(bh);
+#else
+	mark_buffer_clean(bh);
+#endif
+	bh->b_end_io = ocfs_end_buffer_io_sync;
+	submit_bh(WRITE, bh);
+	wait_on_buffer(bh);
+	SET_BH_SEQNUM(inode, bh);
+
+skip_write:
+	LOG_EXIT ();
 }				/* ocfs_update_disk_lock */
 
 int ocfs_notify_cluster(ocfs_super *osb, 
@@ -654,9 +669,15 @@
 			       OCFS_I(inode)->ip_blkno, flags);
 			BUG();
 		}
-		status = ocfs_update_disk_lock (osb, bh, inode);
+
+		/* want to refresh the lock from the latest on disk
+		 * state before writing it back out. */
+		status = ocfs_read_bh(osb, lock_id, &bh, 0, inode);
+		if (!status)
+			ocfs_update_disk_lock(osb, bh, inode);
+
 		if (status < 0)
-			LOG_ERROR_STATUS (status);
+			LOG_ERROR_STATUS(status);
 
 		atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
 	}

Modified: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h	2004-09-08 23:53:47 UTC (rev 1437)
+++ trunk/src/dlm.h	2004-09-09 00:32:27 UTC (rev 1438)
@@ -61,4 +61,7 @@
 				   inode, 
 				   FLAG_TRUNCATE_PAGES));
 }
+void ocfs_update_disk_lock (ocfs_super * osb, 
+			    struct buffer_head *bh, 
+			    struct inode *inode);
 #endif /* OCFS2_DLM_H */

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-09-08 23:53:47 UTC (rev 1437)
+++ trunk/src/nm.c	2004-09-09 00:32:27 UTC (rev 1438)
@@ -498,41 +498,6 @@
 #define PROCESS_VOTE_TRYLOCK
 
 #ifdef PROCESS_VOTE_TRYLOCK
-static int ocfs_io_sem_write_trylock(struct inode *inode, u32 timeout)
-{
-	unsigned long jif;
-	int status;
-
-	OCFS_ASSERT(inode);
-
-	LOG_ENTRY_ARGS ("(%llu, %u)\n", OCFS_I(inode)->ip_blkno, timeout);
-
-	if (!timeout) {
-		down_write(&OCFS_I(inode)->ip_io_sem);
-		status = 0;
-		goto bail;
-	}
-
-	jif = jiffies + (timeout * HZ / 1000);
-
-	while(1) {
-		if (down_write_trylock(&OCFS_I(inode)->ip_io_sem)) {
-			status = 0;
-			break;
-		}
-
-		if (jif < jiffies) {
-			status = -ETIMEDOUT;
-			break;
-		}
-
-		ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
-	}
-
-bail:
-	LOG_EXIT_STATUS (status);
-	return status;
-}
 static int ocfs_io_sem_read_trylock(struct inode *inode, u32 timeout)
 {
 	unsigned long jif;
@@ -657,12 +622,10 @@
 	ocfs_lock_res *lockres = NULL;
 	__u32 flags, num_ident;
 	__u16 num_nodes;
-	ocfs2_dinode *fe = NULL;
 	struct buffer_head *fe_bh = NULL;
 	int vote_type = INVALID_REQUEST, vote_response = 0;
 	struct inode *inode = NULL;
 	int master_alive = 1;
-	int lockflags = 0;
 	int inc_inode_seq = 0;
 	int have_io_sem = 0;
 	int change_master_succeeded = 0;
@@ -735,25 +698,6 @@
 		 * Please see the note in ocfs_delete_inode. */
 		osb->voting_ino = inode->i_ino;
 
-#ifdef PROCESS_VOTE_TRYLOCK
-		if (flags & FLAG_CHANGE_MASTER) {
-			if (ocfs_io_sem_write_trylock(inode, 
-						      (OCFS_NM_HEARTBEAT_TIME/2))) {
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: Could not get i/o lock "
-				       "on inode %llu, flags 0x%x\n",
-				       OCFS_I(inode)->ip_blkno, flags);
-#endif
-			} else 
-				have_io_sem = 1;
-		}
-#else
-		if (flags & FLAG_CHANGE_MASTER) {
-			down_write(&OCFS_I(inode)->ip_io_sem);
-			have_io_sem = 1;
-		}
-#endif
-
 		lockres = GET_INODE_LOCKRES(inode);
 
 		status = ocfs_acquire_lockres (lockres, (OCFS_NM_HEARTBEAT_TIME/2)); // ocfs_process_vote
@@ -872,17 +816,6 @@
 			goto vote;
 		}
 
-		if (!have_io_sem) {
-#ifdef VERBOSE_PROCESS_VOTE
-			printk("process_vote: We haven't got io_sem on"
-			       " Lock id (%llu)\n",
-			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-#endif
-			vote_response = FLAG_VOTE_UPDATE_RETRY;
-			status = 0;
-			goto vote;
-		}
-
 		/* this is currently a readonly cache lock.
 		 * need to communicate to all the nodes in the 
 		 * map that lock will be changing to RW before we
@@ -914,28 +847,21 @@
 		sync_mapping_buffers(inode->i_mapping);
 
 #warning do we need a truncate_inode_pages here?
-		lockflags = lockres->master_node_num == osb->node_num ? 
-			OCFS_BH_CACHED : 0;
-		status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
+		if (lockres->master_node_num != osb->node_num) {
+			printk("are we giving away a lock we don't own!?! "
+			       "inode %llu\n", OCFS_I(inode)->ip_blkno);
+			BUG();
 		}
 
-		fe = (ocfs2_dinode *) fe_bh->b_data;
-		DISK_LOCK(fe)->dl_master = node_num;
-		DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
-
-		/* need to do the write only if fe lock values
-		 * need to change */
-		status = ocfs_write_bh(osb, fe_bh, inode);
+		status = ocfs_read_bh(osb, lock_id, &fe_bh, OCFS_BH_CACHED, 
+				      inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
-			brelse(fe_bh);
 			goto leave;
 		}
 		lockres->master_node_num = node_num;
 		lockres->lock_type = OCFS_LKM_NLMODE;
+		ocfs_update_disk_lock(osb, fe_bh, inode);
 
 		brelse(fe_bh);
 		vote_response = FLAG_VOTE_NODE;
@@ -1093,10 +1019,6 @@
 	    ((vote_response == 0) || (vote_response > FLAG_VOTE_OIN_UPDATED)))
 	    BUG();
 
-	if(have_io_sem && inode)
-		up_write(&OCFS_I(inode)->ip_io_sem);
-	have_io_sem = 0;
-
 	if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) && 
 	    ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
 	    (flags & FLAG_RELEASE_LOCK))) {
@@ -1141,11 +1063,6 @@
 	if (!inode)
 		goto no_inode_leave;
 
-	if (have_io_sem) {
-		up_write(&OCFS_I(inode)->ip_io_sem);
-		have_io_sem = 0;
-	}
-
 	if (atomic_read(&OCFS_I(inode)->ip_needs_verification) 
 	    && vote_type == UPDATE_OIN_INODE) {
 		have_io_sem = 1;



More information about the Ocfs2-commits mailing list