[Ocfs2-commits] khackel commits r1441 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Sep 9 03:03:28 CDT 2004


Author: khackel
Date: 2004-09-09 03:03:26 -0500 (Thu, 09 Sep 2004)
New Revision: 1441

Modified:
   trunk/src/dlm.c
   trunk/src/inode.c
   trunk/src/journal.c
   trunk/src/lockres.c
   trunk/src/lockres.h
   trunk/src/nm.c
   trunk/src/nm.h
   trunk/src/ocfs.h
   trunk/src/ocfs_log.h
   trunk/src/vote.c
Log:
changes lockres to rwsem...  any use of the lockres that will not make changes should be changed to a read lock for performance

Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/dlm.c	2004-09-09 08:03:26 UTC (rev 1441)
@@ -129,14 +129,14 @@
 		       OCFS_I(inode)->ip_blkno, message_flags);
 
 	while (status == -EAGAIN) {
-		ocfs_acquire_lockres(lockres, 0); // ocfs_file_open 
+		ocfs_acquire_lockres_write(inode);
 
 		flags = message_flags;
 		if (ocfs_inode_is_new(osb, inode))
 			flags |= FLAG_FAST_PATH_LOCK;
 
 		if (ocfs_task_interruptible ()) {
-			ocfs_release_lockres (lockres);
+			ocfs_release_lockres_write (inode);
 			LOG_TRACE_ARGS("interrupted... inode = %llu\n",
 				       OCFS_I(inode)->ip_blkno);
 			status = -EINTR;
@@ -148,7 +148,7 @@
 		if (status < 0) {
 			if (status != -EAGAIN)
 				LOG_ERROR_STATUS (status);
-			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
+			ocfs_release_lockres_write (inode); // ocfs_file_open ocfs_symlink
 			if (status == -EAGAIN || status == -ETIMEDOUT) {
 				ocfs_sleep (50);
 				status = -EAGAIN;
@@ -157,7 +157,7 @@
 
 			goto bail;
 		}
-		ocfs_release_lockres (lockres); // ocfs_file_open 
+		ocfs_release_lockres_write (inode); // ocfs_file_open 
 	}
 bail:
 	LOG_EXIT_STATUS (status);
@@ -222,7 +222,7 @@
 	}
 
 	updated = 0;
-	ocfs_acquire_lockres (lockres, 0);  // ocfs_acquire_lock
+	ocfs_acquire_lockres_write (inode);
 
 again:
 	LOG_TRACE_ARGS("attempting to get lock, pass: %d\n", ++k);
@@ -230,7 +230,7 @@
 	if (!updated) {
 		status = ocfs_update_lockres(osb, *bh, inode, 1);
 		if (status < 0) {
-			ocfs_release_lockres (lockres);
+			ocfs_release_lockres_write (inode);
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
@@ -248,7 +248,7 @@
 			       lockres->master_node_num, no_owner?"no":"yes",
 			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
 		LOG_ERROR_STATUS (status = -EINVAL);
-		ocfs_release_lockres (lockres); // ocfs_acquire_lock
+		ocfs_release_lockres_write (inode); // ocfs_acquire_lock
 		goto finally;
 	}
 
@@ -258,27 +258,32 @@
 		       ocfs_node_is_alive(&osb->publ_map, 
 				 	  lockres->master_node_num));
 	if ((owner_dead || wait_on_recovery) && 
-	    lockres->readonly_node == lockres->master_node_num) {
+	    test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
 		// if owner is dead or in recovery and the lockres 
-		// has the readonly owner set, clear it
-		lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+		// has the readonly flag set, clear it
+		clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
 	}
 
 	status = 0;
 	extra_lock_flags = 0;
 
 	if (flags & FLAG_READONLY) {
-		if (lockres->readonly_node != OCFS_INVALID_NODE_NUM)
-			goto skip_lock_write;
-		if (lockres->master_node_num == osb->node_num &&
-		    lockres->lock_type == OCFS_LKM_EXMODE) {
-			/* local node is master */
+		if (test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ||
+		    (lockres->master_node_num == osb->node_num &&
+		     lockres->lock_type == OCFS_LKM_EXMODE)) {
+			/* already readonly or local node is master */
+			/* THIS node will see it as readonly, but OTHER
+			 * nodes will have to wait until lock_holders drops 
+			 * to 0 (to finish journal flush on this inode) */
+			if (!test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
 #ifdef VERBOSE_LOCKING_TRACE
-			printk("acquire_lock: lockid %llu, setting ronode, was=%d, now=%d, master=%d\n", lock_id, 
-			       lockres->readonly_node, osb->node_num, lockres->master_node_num);
+				printk("acquire_lock: lockid %llu, setting readonly\n",
+				       lock_id);
 #endif
-			lockres->readonly_node = osb->node_num;
-			goto skip_lock_write;
+				set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
+			}
+			ocfs_release_lockres_write (inode);
+			goto finally;
 		}
 
 		if (lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
@@ -295,17 +300,17 @@
 		goto do_lock;
 	} 
 
+#warning NEED MORE HANDLING HERE NOW FOR DROPPING LOCAL READONLY!!!
 	// anything else is NOT a readonly request
-	if (lockres->readonly_node != osb->node_num)
-		lockres->readonly_node = OCFS_INVALID_NODE_NUM; // clear any owner
+	clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
 
 	status = ocfs_wait_for_readonly_drop(osb, inode);
 	if (status < 0) {
 		if (status == -EAGAIN) {
 			// the rodrop thread is already running and needs the lockres
-			ocfs_release_lockres(lockres); // ocfs_acquire_lock
+			ocfs_release_lockres_write(inode);
 			ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
-			ocfs_acquire_lockres(lockres, 0);  // ocfs_acquire_lock
+			ocfs_acquire_lockres_write(inode);
 			goto reevaluate;
 		}
 		LOG_ERROR_STATUS(status);
@@ -342,10 +347,11 @@
 
 #ifdef VERBOSE_LOCKING_TRACE
 	printk("acquire_lock: lockid=%llu, this=%d, master=%d, locktype=%d, "
-	       "flags=%08x, ronode=%d\n",
+	       "flags=%08x, readonly=%s\n",
 	       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits, 
 	       osb->node_num, lockres->master_node_num, lockres->lock_type, 
-	       flags|extra_lock_flags, lockres->readonly_node);
+	       flags|extra_lock_flags,
+	       test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
 #endif	
 	if (wait_on_recovery 
 	    && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
@@ -362,7 +368,7 @@
 	}
 
 	if (ocfs_task_interruptible ()) {
-		ocfs_release_lockres (lockres);
+		ocfs_release_lockres_write (inode);
 		LOG_TRACE_ARGS("interrupted... inode %llu\n",
 			       OCFS_I(inode)->ip_blkno);
 		status = -EINTR;
@@ -371,7 +377,7 @@
 	status = new_lock_function(osb, lock_type, flags|extra_lock_flags, *bh, inode);
 
 	if (status < 0) {
-		ocfs_release_lockres (lockres); // ocfs_acquire_lock
+		ocfs_release_lockres_write (inode); // ocfs_acquire_lock
 		if (status == -EAGAIN || status == -ETIMEDOUT) {
 			if (status == -ETIMEDOUT)
 				LOG_ERROR_ARGS("Timed out acquiring lock for "
@@ -380,7 +386,7 @@
 					       OCFS_I(inode)->ip_blkno, 
 					       lock_id);
 			ocfs_sleep (50);
-			ocfs_acquire_lockres(lockres, 0);
+			ocfs_acquire_lockres_write(inode);
 			/* if we're going to jump back up, we want to update
 			 * if we're not the master... */
 			if (lockres->master_node_num != osb->node_num)
@@ -402,7 +408,7 @@
 	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
 	LOG_TRACE_ARGS("lockres->uncommitted_holders = %u\n", 
 		       lockres->uncommitted_holders);
-	ocfs_release_lockres (lockres); // ocfs_acquire_lock
+	ocfs_release_lockres_write (inode); // ocfs_acquire_lock
 
 	if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
 		ocfs2_dinode *fe = (ocfs2_dinode *) (*bh)->b_data;
@@ -430,6 +436,7 @@
 	__u64 lock_id;
 	__u32 num_to_send;
 	ocfs_node_map votemap;
+	int lockres_lock_held = NO_LOCK;
 
 	LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
 			osb, lock_type, flags, lockres);
@@ -442,25 +449,33 @@
 
 	flags |= FLAG_RELEASE_LOCK;
 
-	ocfs_acquire_lockres (lockres, 0); // ocfs_release_lock
+	ocfs_acquire_lockres_read (inode);
+	lockres_lock_held = READ_LOCK;
 
 	if ((lockres->lock_type == OCFS_LKM_EXMODE) &&
 	    (lockres->master_node_num == osb->node_num) &&
-	    !(flags & FLAG_FILE_EXTEND) && !(FLAG_FILE_TRUNCATE)) {
+	    !(flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE))) {
 		status = 0;
 		goto finally;
 	}
 
 	if (flags & FLAG_READONLY) {
 		if (lockres->lock_type != OCFS_LKM_EXMODE ||
-		    lockres->master_node_num != lockres->readonly_node ||
-		    lockres->master_node_num == OCFS_INVALID_NODE_NUM)
-			LOG_ERROR_ARGS("READONLY release has issues! type=%d, master=%d, ronode=%d\n",
-				       lockres->lock_type, lockres->master_node_num, lockres->readonly_node);
+		    lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
+		    !(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state))) {
+			LOG_ERROR_ARGS("READONLY release has issues! type=%d, master=%d, readonly=%s\n",
+				       lockres->lock_type, lockres->master_node_num, 
+				       test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ?  "yes" : "no");
+			BUG();
+		}
 		status = 0;
-		goto finally;
+		goto unlock;
 	}
 
+	ocfs_release_lockres_read(inode);
+	ocfs_acquire_lockres_write(inode);
+	lockres_lock_held = WRITE_LOCK;
+
 	OCFS_ASSERT(lockres->uncommitted_holders <= lockres->lock_holders);
 
 	num_to_send = num_ident;
@@ -518,9 +533,9 @@
 			status = -EAGAIN;
 
 			LOG_ERROR_ARGS("Timed out releasing lock for inode %llu, retrying...\n", OCFS_I(inode)->ip_blkno);
-			ocfs_release_lockres(lockres);
+			ocfs_release_lockres_write(inode);
 			ocfs_sleep(200);
-			ocfs_acquire_lockres(lockres, 0);
+			ocfs_acquire_lockres_write(inode);
 			continue;
 		} else 
 			LOG_ERROR_STATUS (status);
@@ -535,8 +550,13 @@
 #warning "is this wise, or shouldn't we be retrying the lock release later?"
 	lockres->lock_holders -= num_ident;
 	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
-	ocfs_release_lockres (lockres); // ocfs_release_lock
-	LOG_EXIT_STATUS (status);
+
+unlock:
+	if (lockres_lock_held == READ_LOCK)
+		ocfs_release_lockres_read (inode);
+	else if (lockres_lock_held == WRITE_LOCK)
+		ocfs_release_lockres_write (inode);
+  	LOG_EXIT_STATUS (status);
 	return (status);
 }				/* ocfs_release_lock_full */
 
@@ -558,15 +578,14 @@
 
 	if (flags & FLAG_READONLY) {
 		if (flags & (FLAG_CHANGE_MASTER | FLAG_REMASTER)) {
-			/* there is no readonly_node.  treat like normal change master. */
+			/* not currently readonly.  treat like normal change master. */
 			flags &= ~FLAG_READONLY;
 		}
 	} else if (flags & FLAG_CHANGE_MASTER) {
-		/* non-readonly with CHANGE_MASTER should have no readonly_node */
-		if (lockres->readonly_node != OCFS_INVALID_NODE_NUM) {
-			LOG_ERROR_ARGS("change_master but readonly_node was %d\n", 
-				       lockres->readonly_node);
-			lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+		/* non-readonly with CHANGE_MASTER should have no readonly flag */
+		if (test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
+			LOG_ERROR_ARGS("change_master but currently readonly\n");
+			clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
 		}
 	}
 	if (flags & (FLAG_CHANGE_MASTER | FLAG_REMASTER)) {
@@ -646,6 +665,10 @@
 	/* just alerting owner on open */
 	if (flags & FLAG_TRUNCATE_PAGES)
 		goto bail;
+	
+	/* converted EX to readonly EX */
+	if (flags & FLAG_READONLY)
+		goto bail;
 
 	/* drop readonly should remove anyone who has responded */
        	if (flags & FLAG_DROP_READONLY) {
@@ -653,12 +676,6 @@
 		goto bail;
 	}
 
-	/* converted cachelock to readonly cachelock */
-	if (flags & FLAG_READONLY) {
-		lockres->readonly_node = lockres->master_node_num;
-		goto bail;
-	}
-
 	/* update the disk lock */
 	if (need_lock_write) {
 		lockres->lock_type = requested_lock;
@@ -683,9 +700,10 @@
 	}
 
 bail:
-	/* if we removed FLAG_READONLY above, set the readonly_node now */
-	if (is_readonly && !(flags & FLAG_READONLY)) {
-		lockres->readonly_node = lockres->master_node_num;
+	/* if we removed FLAG_READONLY above, or converted an
+	 * EX to readonly, set the readonly state now */
+	if (status >= 0 && (is_readonly || flags & FLAG_READONLY)) {
+		set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
 	}
 
 	LOG_EXIT_STATUS (status);

Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/inode.c	2004-09-09 08:03:26 UTC (rev 1441)
@@ -881,6 +881,7 @@
 	struct inode *inode = dentry->d_inode;
 	int status = 0;
 	ocfs_super *osb;
+	ocfs_lock_res *lockres;
 
 	LOG_SET_CONTEXT(REVALIDATE);
 
@@ -910,14 +911,15 @@
 		goto bail;
 	}
 
+	lockres = GET_INODE_LOCKRES(inode);
 	/* if I hold cache lock, no revalidate needed */
-	ocfs_acquire_lockres(GET_INODE_LOCKRES(inode), 0);
+	ocfs_acquire_lockres_read(inode);
 	if (ocfs_is_local_cache_lock(osb, inode)) {
-		ocfs_release_lockres(GET_INODE_LOCKRES(inode));
+		ocfs_release_lockres_read(inode);
 		LOG_TRACE_STR("local cache lock\n");
 		goto bail;
 	}
-	ocfs_release_lockres(GET_INODE_LOCKRES(inode));
+	ocfs_release_lockres_read(inode);
 
 	atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
 	status = ocfs_verify_update_inode(osb, inode);
@@ -1106,9 +1108,9 @@
 		goto leave;
 	}
 
-	ocfs_acquire_lockres(lockres, 0);
+	ocfs_acquire_lockres_write(inode);
 	status = ocfs_update_lockres (osb, fe_bh, inode, 0);
-	ocfs_release_lockres(lockres);
+	ocfs_release_lockres_write(inode);
 
 	status = 0;
 leave:

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/journal.c	2004-09-09 08:03:26 UTC (rev 1441)
@@ -279,10 +279,10 @@
 
 		if (lock->drop_holders) {
 			lockres = GET_INODE_LOCKRES(lock->inode);
-			ocfs_acquire_lockres(lockres, 0);
+			ocfs_acquire_lockres_write(lock->inode);
 			OCFS_ASSERT(lockres->lock_holders >= lock->drop_holders);
 			lockres->lock_holders -= lock->drop_holders;
-			ocfs_release_lockres(lockres);
+			ocfs_release_lockres_write(lock->inode);
 		}
 
 		iput(lock->inode);

Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/lockres.c	2004-09-09 08:03:26 UTC (rev 1441)
@@ -101,14 +101,6 @@
 	lockres->lock_type = DISK_LOCK(fe)->dl_level;
 	lockres->master_node_num = DISK_LOCK(fe)->dl_master;
 
-	if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
-	    lockres->readonly_node != lockres->master_node_num) {
-		LOG_ERROR_ARGS("no longer readonly! ronode=%d, master=%d, lockid=%llu\n",
-			       lockres->readonly_node, lockres->master_node_num,
-			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-		lockres->readonly_node = OCFS_INVALID_NODE_NUM;
-	}
-
 out:
 	if (bh && drop_bh)
 		brelse(bh);
@@ -117,66 +109,8 @@
 	return status;
 }				/* ocfs_update_lockres */
 
-/*
- * ocfs_acquire_lockres()
- *
- * @lockres: lockres to acquire
- * @timeout: timeout in ms, 0 == no timeout
- */
-int ocfs_acquire_lockres (ocfs_lock_res * lockres, __u32 timeout)
-{
-	unsigned long jif;
-	int status;
 
-	LOG_ENTRY_ARGS ("(0x%p, %u)\n", lockres, timeout);
-
-	OCFS_ASSERT(lockres);
-
-	if (!timeout) {
-		down(&lockres->lock_mutex);
-		status = 0;
-		goto bail;
-	}
-
-	jif = jiffies + (timeout * HZ / 1000);
-
-	while(1) {
-		if (!down_trylock(&lockres->lock_mutex)) {
-			status = 0;
-			break;
-		}
-
-		if (jif < jiffies) {
-			status = -ETIMEDOUT;
-			break;
-		}
-
-		ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
-	}
-
-bail:
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_acquire_lockres */
-
 /*
- * ocfs_release_lockres()
- *
- */
-void ocfs_release_lockres (ocfs_lock_res * lockres)
-{
-	LOG_ENTRY_ARGS ("(0x%p)\n", lockres);
-
-	OCFS_ASSERT(lockres);
-
-	up(&lockres->lock_mutex);
-
-	LOG_EXIT ();
-	return;
-}				/* ocfs_release_lockres */
-
-
-/*
  * ocfs_init_lockres()
  *
  */
@@ -187,12 +121,11 @@
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, lockres);
 
 	lockres->master_node_num = OCFS_INVALID_NODE_NUM;
-	lockres->lock_state = 0;
 	lockres->lock_holders = 0;
+	lockres->readonly_state = 0;
 	lockres->uncommitted_holders = 0;
 	lockres->lock_type = OCFS_LKM_NLMODE;
-	init_MUTEX(&lockres->lock_mutex);
-	lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+	init_rwsem(&lockres->lock);
 	ocfs_node_map_init(osb, &lockres->readonly_map);
 
 	LOG_EXIT ();

Modified: trunk/src/lockres.h
===================================================================
--- trunk/src/lockres.h	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/lockres.h	2004-09-09 08:03:26 UTC (rev 1441)
@@ -29,11 +29,93 @@
 #ifndef OCFS2_LOCKRES_H
 #define OCFS2_LOCKRES_H
 
+#include "util.h"
+
 /* lockres.c */
-int ocfs_acquire_lockres(ocfs_lock_res *lockres, __u32 timeout);
+
+
+/*
+ * ocfs_acquire_lockres_write_timeout()
+ *
+ * @lockres: lockres to acquire
+ * @timeout: timeout in ms, 0 == no timeout
+ */
+static inline int ocfs_acquire_lockres_write_timeout (struct inode *inode, __u32 timeout)
+{
+	unsigned long jif = jiffies + (timeout * HZ / 1000);
+	ocfs_lock_res * lockres = GET_INODE_LOCKRES(inode);
+
+	while(1) {
+		if (down_write_trylock(&lockres->lock))
+			return 0;
+
+		if (jif < jiffies)
+			return -ETIMEDOUT;
+
+		ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
+	}
+
+	return 0;
+}
+
+/*
+ * ocfs_acquire_lockres_write()
+ */
+static inline int ocfs_acquire_lockres_write (struct inode *inode)
+{
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	down_write(&lockres->lock);
+	return 0;
+}
+
+/*
+ * ocfs_acquire_lockres_read_timeout()
+ *
+ * @lockres: lockres to acquire
+ * @timeout: timeout in ms, 0 == no timeout
+ */
+static inline int ocfs_acquire_lockres_read_timeout (struct inode *inode, __u32 timeout)
+{
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	unsigned long jif = jiffies + (timeout * HZ / 1000);
+	
+	while(1) {
+		if (down_read_trylock(&lockres->lock))
+			return 0;
+
+		if (jif < jiffies)
+			return -ETIMEDOUT;
+
+		ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
+	}
+
+	return 0;
+}
+
+/*
+ * ocfs_acquire_lockres_read()
+ */
+static inline int ocfs_acquire_lockres_read (struct inode *inode)
+{
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	down_read(&lockres->lock);
+	return 0;
+}
+
+static inline void ocfs_release_lockres_write(struct inode *inode)
+{
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	up_write(&lockres->lock);
+}
+static inline void ocfs_release_lockres_read(struct inode *inode)
+{
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	up_read(&lockres->lock);
+}
+
 void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
-void ocfs_release_lockres(ocfs_lock_res *lockres);
 int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh, 
 			struct inode *inode, int reread);
 
+
 #endif /* OCFS2_LOCKRES_H */

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/nm.c	2004-09-09 08:03:26 UTC (rev 1441)
@@ -60,8 +60,16 @@
 static spinlock_t oin_num_ext_lock = SPIN_LOCK_UNLOCKED;
 struct semaphore recovery_list_sem;
 
-static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, __u32 flags, int status, int *master_alive, struct inode *inode);
+static inline int need_write_lock(ocfs_super *osb, ocfs_lock_res *lockres, __u32 flags);
+static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, 
+					  __u32 flags, int *master_alive, int *write_lock, 
+					  int *change_master, struct inode *inode);
 
+static int ocfs_process_vote_pre_change_master(int vote_type, int flags, int *vote_response, int *status, struct inode *inode);
+static int ocfs_process_vote_change_master(ocfs_super *osb, int *vote_response, int *status, struct inode *inode, ocfs_lock_res *lockres, __s16 node_num, __u64 lock_id);
+static int ocfs_process_vote_post_change_master(ocfs_super *osb, int vote_type, int flags, int *vote_response, struct inode *inode, ocfs_lock_res *lockres, int *status, __s16 node_num, int *inc_seq);
+static int ocfs_lock_busy(ocfs_super *osb, struct inode *inode, ocfs_lock_res *lockres);
+
 static int _ocfs_drop_readonly_cache_lock(void *arg);
 
 typedef struct _ocfs_ro_cache_drop_ctxt
@@ -85,7 +93,6 @@
 	"INVALID_REQUEST",      // reply with a NO vote
 	"UPDATE_OIN_INODE",     // update both oin and inode
 	"DELETE_ACQUIRE",// delete or rename request
-	"DELETE_RELEASE",// delete or rename release request
 	"CHANGE_MASTER",        // request to change master to requestor
 	"NOT_MASTER",           // I am not master, retry
 	"REMASTER_THIS",        // remaster lock to me
@@ -290,42 +297,67 @@
 	return 0;
 }				/* ocfs_volume_thread */
 
+
+// gets a best guess (based on dirty read of lockres)
+// of whether down_read or down_write should be used on lockres
+// NOTE: always RECHECK after getting the lock and follow what
+// get_process_vote_action says
+static inline int need_write_lock(ocfs_super *osb, ocfs_lock_res *lockres, __u32 flags)
+{
+	// always need write access to lockres if not master
+	if (lockres->master_node_num != osb->node_num)
+		return 1;
+	// usually need write access for these so just get it
+	if (flags & (FLAG_CHANGE_MASTER|FLAG_DROP_READONLY|FLAG_READONLY))
+		return 1;
+	// nothing else will need it, assuming it didnt just change under us
+	return 0;
+}
+
 static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, 
-					  __u32 flags, int status, int *master_alive, struct inode *inode)
+					  __u32 flags, int *master_alive, int *write_lock, 
+					  int *change_master, struct inode *inode)
 {
 	int vote_type = INVALID_REQUEST;
 	int my_node_wins = 0;
+	int this_node_master = 0;
 	__u64 lockid = 0;
 	ocfs_vote_obj_lookup_data data;
 
-	LOG_ENTRY_ARGS("(status=%d, lockid=%llu, node_num=%d, flags=%08x)\n", status,
+	LOG_ENTRY_ARGS("(lockid=%llu, node_num=%d, flags=%08x)\n", 
 		       lockid, node_num, flags);
 
-	if (inode)
-		lockid = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
+	OCFS_ASSERT(inode);
+	OCFS_ASSERT(lockres);
 
-	*master_alive = 1;
+	lockid = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
 
-	if (status < 0) {
-		if (status == -ETIMEDOUT) {
-			LOG_TRACE_STR("(INVALID_REQUEST) status == -ETIMEDOUT");
-			return INVALID_REQUEST;
-		}
-	} else if (lockres) {
-		*master_alive =
-			(lockres->master_node_num !=
-			 OCFS_INVALID_NODE_NUM) &&
-			ocfs_node_is_alive(&osb->publ_map, 
-					   lockres->master_node_num);
+	*change_master = 0;
+	*write_lock = 0;
+	this_node_master = (lockres->master_node_num == osb->node_num);
+	*master_alive = (lockres->master_node_num != OCFS_INVALID_NODE_NUM) &&
+			ocfs_node_is_alive(&osb->publ_map, lockres->master_node_num);
 
-		// if an outstanding vote request is found on this lockid
-		// and this node number is higher, this node wins
-		data.func = ocfs_lookup_obj_by_lockid;
-		data.u.s.lock_id = lockid;
-		data.ret = NULL;
-		if (ocfs_lookup_vote_request_obj(osb, &data) == 0)
-			my_node_wins = (node_num < osb->node_num);
+	// if an outstanding vote request is found on this lockid
+	// and this node number is higher, this node wins
+	data.func = ocfs_lookup_obj_by_lockid;
+	data.u.s.lock_id = lockid;
+	data.ret = NULL;
+	if (ocfs_lookup_vote_request_obj(osb, &data) == 0)
+		my_node_wins = (node_num < osb->node_num);
+
+	/* NOTE: FLAG_CHANGE_MASTER may be combined with
+	 * other flags and result in a process_vote action
+	 * other than CHANGE_MASTER.  the change_master
+	 * value returned here is independent of this action */
+	if (this_node_master && flags & FLAG_CHANGE_MASTER) {
+		*write_lock = 1;
+		*change_master = 1;
 	}
+	
+	// if this node is not master, we will need to update the lockres
+	if (!this_node_master)
+		*write_lock = 1;
 
 	if (flags & (FLAG_RELEASE_DENTRY | FLAG_FILE_RENAME)) {
 		vote_type = RELEASE_DENTRY;
@@ -334,11 +366,12 @@
 
 	if (flags & FLAG_DROP_READONLY) {
 		vote_type = DROP_READONLY;
+		*write_lock = 1;
 		goto done;
 	} else if (flags & FLAG_READONLY) {
-		if (lockres->master_node_num == osb->node_num &&
-		    lockres->lock_type == OCFS_LKM_EXMODE) {
+		if (this_node_master && lockres->lock_type == OCFS_LKM_EXMODE) {
 			vote_type = READONLY;
+			*write_lock = 1;
 		} else 
 			vote_type = INVALID_REQUEST;
 		goto done;
@@ -346,7 +379,7 @@
 
 	if (flags & FLAG_FILE_DELETE) {
 		if (flags & FLAG_RELEASE_LOCK)
-			vote_type = DELETE_RELEASE;
+			vote_type = INVALID_REQUEST;
 		else if (flags & FLAG_ACQUIRE_LOCK)
 			vote_type = DELETE_ACQUIRE;
 		else
@@ -359,7 +392,7 @@
 			vote_type = UPDATE_OIN_INODE;
 	} else if (flags & FLAG_TRUNCATE_PAGES) {
 		vote_type = TRUNCATE_PAGES;
-	} else if (lockres->master_node_num == osb->node_num) {
+	} else if (this_node_master) {
 		if (flags & FLAG_CHANGE_MASTER)
 			vote_type = CHANGE_MASTER;
 		else {
@@ -375,12 +408,6 @@
 			vote_type = REMASTER_REQUESTOR;
 	}
 	
-	if (inode == NULL && 
-	    (vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES)) {
-		printk("inode is null and it's not a delete release or a truncate pages!\n");
-		vote_type = INVALID_REQUEST;
-	}
-
 done:
 	LOG_EXIT_STATUS(vote_type);
 	return vote_type;
@@ -435,10 +462,10 @@
 	OCFS_I(inode)->ip_num_extends++;
 	list_add_tail(&OCFS_I(inode)->ip_recovery_list, 
 		      &osb->lock_recovery_lists[node_num]);
-#ifdef VERBOSE_PROCESS_VOTE
-	printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
+
+	LOG_TRACE_PROCESS_VOTE("inode %llu, num = %d\n",
 	       OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-#endif
+	
 	spin_unlock(&oin_num_ext_lock);
 	up(&recovery_list_sem);
 
@@ -459,22 +486,18 @@
 		 * to get a release with a count > what we've had if
 		 * we mount after the acquires have been sent. */
 
-#ifdef VERBOSE_PROCESS_VOTE
-		printk("ocfs_clear_inode_for_extend: inode %llu, num_rel of "
+		LOG_TRACE_PROCESS_VOTE("inode %llu, num_rel of "
 		       "%d would result in negative count (ip_num_extends "
 		       "= %d)\n", 
 		       OCFS_I(inode)->ip_blkno, num_rel,
 		       OCFS_I(inode)->ip_num_extends);
-#endif
 		OCFS_I(inode)->ip_num_extends = 0;
 	} else {
 		OCFS_I(inode)->ip_num_extends -= num_rel;
 	}
 
-#ifdef VERBOSE_PROCESS_VOTE
-	printk("ocfs_clear_inode_for_extend: inode %llu, num = %d\n",
+	LOG_TRACE_PROCESS_VOTE("inode %llu, num = %d\n",
 	       OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-#endif
 
 	if (!OCFS_I(inode)->ip_num_extends) {
 		list_del(&OCFS_I(inode)->ip_recovery_list);
@@ -495,9 +518,7 @@
 	return;
 }
 
-#define PROCESS_VOTE_TRYLOCK
 
-#ifdef PROCESS_VOTE_TRYLOCK
 static int ocfs_io_sem_read_trylock(struct inode *inode, u32 timeout)
 {
 	unsigned long jif;
@@ -534,7 +555,6 @@
 	return status;
 }
 
-#endif
 
 static int ocfs_process_inode_delete(struct inode *inode)
 {
@@ -550,10 +570,8 @@
 	down (&(OCFS_I(inode)->ip_sem));
 	/* vote no if the file is still open. */
 	if (OCFS_I(inode)->ip_open_cnt > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
-		printk("process_vote: (delete) open count = %u\n", 
+		LOG_TRACE_PROCESS_VOTE("open count = %u\n", 
 		       OCFS_I(inode)->ip_open_cnt);
-#endif
 		up(&(OCFS_I(inode)->ip_sem));
 		status = 0;
 		goto done;
@@ -564,9 +582,7 @@
 	spin_lock(&oin_num_ext_lock);
 	if (OCFS_I(inode)->ip_num_extends) {
 		spin_unlock(&oin_num_ext_lock);
-#ifdef VERBOSE_PROCESS_VOTE
-		printk("process_vote: (delete) extends pending\n");
-#endif
+		LOG_TRACE_PROCESS_VOTE("extends pending\n");
 		status = 0;
 		goto done;
 	}
@@ -576,10 +592,8 @@
 	 * it? We want to make sure the inode is removed completely as
 	 * a result of the iput in process_vote. */
 	if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
-#ifdef VERBOSE_PROCESS_VOTE
-		printk("process_vote: (delete) i_count = %u\n", 
+		LOG_TRACE_PROCESS_VOTE("i_count = %u\n", 
 		       atomic_read(&inode->i_count));
-#endif
 		status = 0;
 		goto done;
 	}
@@ -607,6 +621,7 @@
 	up(&OCFS_I(inode)->ip_sem);
 }
 
+
 /*
  * ocfs_process_vote()
  *
@@ -615,37 +630,28 @@
  * @node_num: node asking for the vote
  *
  */
-int ocfs_process_vote (ocfs_super * osb, ocfs_vote_request_ctxt *ctxt)
+int ocfs_process_vote (ocfs_super * osb, ocfs_dlm_msg *dlm_msg)
 {
 	int status = 0;
 	int tmpstat = 0;
 	ocfs_lock_res *lockres = NULL;
 	__u32 flags, num_ident;
 	__u16 num_nodes;
-	struct buffer_head *fe_bh = NULL;
 	int vote_type = INVALID_REQUEST, vote_response = 0;
 	struct inode *inode = NULL;
-	int master_alive = 1;
+	int master_alive = 1, change_master = 0, write_lock = 0;
 	int inc_inode_seq = 0;
 	int have_io_sem = 0;
 	int change_master_succeeded = 0;
-	ocfs_dlm_msg *dlm_msg = ctxt->dlm_msg;
-	__s16 node_num = ctxt->node_num;
+	__s16 node_num = dlm_msg->src_node;
 	__u64 lock_id, seq_num;
 	ocfs_dlm_req_master *req_master = NULL;
+	int lockres_lock_held = NO_LOCK;
 
-	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, ctxt);
+	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, dlm_msg);
 
 	down(&osb->vote_sem);
 
-	if (!dlm_msg) {
-		status = -EINVAL;
-		LOG_ERROR_STR("invalid vote reply context!");
-		LOG_ERROR_STATUS (status);
-		LOG_EXIT_STATUS (status);
-		return status;
-	}
-
 	num_nodes = osb->max_nodes;
 	
 	req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
@@ -667,50 +673,70 @@
 	 * instead of letting the other guy's network timeout. */
 	vote_response = FLAG_VOTE_UPDATE_RETRY;
 
-	/* delete / rename is slightly different -- we don't want to
-	 * look up the inode in the release case -- it should already
-	 * be gone. Eventually what we'll really want to do is get it
-	 * via the old offsets and set the new ones. */
-	if ((flags & FLAG_FILE_DELETE) && (flags & FLAG_RELEASE_LOCK))
-		inode = NULL;
-	else {
-		if(flags & FLAG_TRUNCATE_PAGES) {
-			inode = ocfs_ilookup(osb, 
-					lock_id >> osb->sb->s_blocksize_bits);
-			if(!inode)
-				goto no_inode_ok;
-		} else {
-			inode = ocfs_iget(osb, 
-					lock_id >> osb->sb->s_blocksize_bits);
+	if (flags & FLAG_TRUNCATE_PAGES) {
+		inode = ocfs_ilookup(osb, 
+				lock_id >> osb->sb->s_blocksize_bits);
+		if(!inode) {
+			vote_type = TRUNCATE_PAGES;
+			goto got_vote_type;
 		}
+	} else {
+		inode = ocfs_iget(osb, 
+				lock_id >> osb->sb->s_blocksize_bits);
+	}
 
-		if (!inode) {
-			status = -EFAIL;
-			LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
-				       "node=%u, seq=%llu, flags=0x%x\n",
-				       lock_id, node_num, seq_num, flags);
-			LOG_ERROR_STATUS(status);
-			goto vote;
-		}
+	if (!inode) {
+		status = -EFAIL;
+		LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
+			       "node=%u, seq=%llu, flags=0x%x\n",
+			       lock_id, node_num, seq_num, flags);
+		LOG_ERROR_STATUS(status);
+		goto vote;
+	}
 
-		/* ahh, so you find yourself asking "what the
-		 * heck is this?"
-		 * Please see the note in ocfs_delete_inode. */
-		osb->voting_ino = inode->i_ino;
+	/* ahh, so you find yourself asking "what the
+	 * heck is this?"
+	 * Please see the note in ocfs_delete_inode. */
+	osb->voting_ino = inode->i_ino;
 
-		lockres = GET_INODE_LOCKRES(inode);
+	lockres = GET_INODE_LOCKRES(inode);
 
-		status = ocfs_acquire_lockres (lockres, (OCFS_NM_HEARTBEAT_TIME/2)); // ocfs_process_vote
-		if (status < 0) {
-			lockres = NULL;
-			LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
-				       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-			goto vote;
-		}
+	// take a good guess...
+	// at worst, we will take 2 passes through
+	write_lock = need_write_lock(osb, lockres, flags);
 
+retake_lock:
+	OCFS_ASSERT(lockres_lock_held == NO_LOCK);
+	if (write_lock)
+		status = ocfs_acquire_lockres_write_timeout (inode, (OCFS_NM_HEARTBEAT_TIME/2));
+	else
+		status = ocfs_acquire_lockres_read_timeout (inode, (OCFS_NM_HEARTBEAT_TIME/2));
+
+	if (status < 0) {
+		LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
+			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+		goto vote;
+	} else
+		lockres_lock_held = (write_lock ? WRITE_LOCK : READ_LOCK);
+
+	// find out everything now that a lock is held
+	vote_type = get_process_vote_action(osb, lockres, node_num, flags, 
+					    &master_alive, &write_lock, 
+					    &change_master, inode);
+
+	// bummer. we got the wrong lock. get the write lock and start over.
+	if (write_lock && lockres_lock_held == READ_LOCK) {
+		ocfs_release_lockres_read(inode);
+		lockres_lock_held = NO_LOCK;
+		goto retake_lock;
+	}
+
+	if (lockres->master_node_num != osb->node_num) {
 		/* since we pass a NULL bh, this'll only do a read if
-		 * we're not the master. */
+	 	* we're not the master. */
+		OCFS_ASSERT(lockres_lock_held == WRITE_LOCK);
 		status = ocfs_update_lockres (osb, NULL, inode, 1);
+
 		if (status < 0) {
 			if (status != -ETIMEDOUT)
 				LOG_ERROR_STATUS (status);
@@ -718,50 +744,152 @@
 		}
 	}
 
-no_inode_ok:
-	/* fail here if no inode, unless this is a delete/rename release */
-	vote_type = get_process_vote_action(osb, lockres, node_num, flags, 
-					    status, &master_alive, inode);
-
-#ifdef VERBOSE_PROCESS_VOTE
-	printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, num_ident = %u\n", current->pid,
+got_vote_type:
+	
+	LOG_TRACE_PROCESS_VOTE("type: %s, lockid: %llu, action: (%u) %s, num_ident: %u, "
+	       "alive: %d, write: %d, change: %d, held: %d\n", 
 	       flags & FLAG_RELEASE_LOCK ? "RELEASE" : 
 	       (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
- 	       vote_type, process_vote_strings[vote_type], num_ident);
+ 	       vote_type, process_vote_strings[vote_type], num_ident,
+	       master_alive, write_lock, change_master, lockres_lock_held);
+	
 	if (vote_type == INVALID_REQUEST)
-		printk("Invalid request! flags = 0x%x master=%d, level=%d\n", 
-		       flags, lockres->master_node_num, lockres->lock_state);
+		printk("Invalid request! flags = 0x%x master=%d, readonly=%s\n", 
+		       flags, lockres->master_node_num, 
+		       test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
 
-#endif
-	/* get_process_vote_action will only allow CHANGE_MASTER and
-	 * RELEASE_CACHE, on a CACHE lock held by this node.  the
-	 * CHANGE_MASTER/RELEASE_CACHE path needs to check the
-	 * readonly map to see if any nodes need to be updated. */
+	/* get_process_vote_action will only allow CHANGE_MASTER on a CACHE lock 
+	 * held by this node.  the CHANGE_MASTER path needs to check the readonly 
+	 * map to see if any nodes need to be updated. */
 
-	vote_response = 0;
+	vote_response = FLAG_VOTE_NODE;
 
-	/* some lock requests need to be processed before a possible
-	 * change master. Beware however that the change_master might
-	 * very well send a no vote, so you can't do things here that
-	 * cannot be rolled back. */
-	switch (vote_type) {
-		case DELETE_ACQUIRE:
-			LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
-			if (!ocfs_process_inode_delete(inode))
-				vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
-			else 
-				vote_response = FLAG_VOTE_NODE;
-			break;
-		case TRUNCATE_PAGES:
-			LOG_TRACE_STR("TRUNCATE_PAGES");
-			if(inode) {
-				status = ocfs_sync_inode(inode);
-				if (status < 0) {
-					LOG_ERROR_STATUS(status);
-					vote_response = FLAG_VOTE_UPDATE_RETRY;
-					goto vote;
-				}
+	if (ocfs_process_vote_pre_change_master(vote_type, flags, &vote_response, &status, inode))
+		goto vote;
 
+	if (change_master) {
+		tmpstat = ocfs_process_vote_change_master(osb, &vote_response, &status, 
+							  inode, lockres, node_num, lock_id);
+		if (tmpstat < 0)
+			goto leave;
+		else if (tmpstat == 1)
+			goto vote;
+		change_master_succeeded = 1;
+		inc_inode_seq = 1;
+	}
+
+	tmpstat = ocfs_process_vote_post_change_master(osb, vote_type, flags, &vote_response, inode, 
+					     lockres, &status, node_num, &inc_inode_seq);
+
+	/* if we made it this far, and change_master, then it had better be voting yes */
+	if (change_master && vote_response != FLAG_VOTE_NODE)
+		BUG();
+
+	if (have_io_sem && inode)
+		up_write(&OCFS_I(inode)->ip_io_sem);
+	have_io_sem = 0;
+
+	if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) && 
+	    ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
+	    (flags & FLAG_RELEASE_LOCK))) {
+		LOG_TRACE_ARGS("responding YES to %s %s request, inode=%p, node=%u\n", flags & FLAG_FILE_EXTEND ?
+			      "extend" : "truncate", flags & FLAG_RELEASE_LOCK ? 
+			      "release" : "acquire", inode, node_num);
+
+		if (flags & FLAG_ACQUIRE_LOCK)
+			ocfs_mark_inode_for_extend(osb, inode, node_num);
+		else if (flags & FLAG_RELEASE_LOCK)
+			ocfs_clear_inode_for_extend(osb, inode, node_num, 
+						    num_ident);
+		else {
+			printk("uhoh, bad vote flags! 0x%x\n", flags);
+			BUG();
+		}
+	}
+
+vote:
+	status = ocfs_send_vote_reply(osb, dlm_msg, vote_response);
+
+	LOG_TRACE_PROCESS_VOTE("vote: lockid=%llu, node=%d, seqnum=%llu, response=%d\n",
+			       lock_id, node_num, seq_num, vote_response);
+		       
+	if (status < 0)
+		LOG_ERROR_STATUS (status);
+	else {
+		ocfs_compute_dlm_stats (0, vote_response,
+					&(OcfsGlobalCtxt.net_reply_stats));
+		ocfs_compute_dlm_stats (0, vote_response,
+				       	&(osb->net_reply_stats));
+	}
+
+leave:
+	if (lockres_lock_held == READ_LOCK)
+		ocfs_release_lockres_read (inode);
+	else if (lockres_lock_held == WRITE_LOCK)
+		ocfs_release_lockres_write (inode);
+	lockres_lock_held = NO_LOCK;
+
+	if (!inode)
+		goto no_inode_leave;
+
+	if (have_io_sem) {
+		up_write(&OCFS_I(inode)->ip_io_sem);
+		have_io_sem = 0;
+	}
+
+	if (atomic_read(&OCFS_I(inode)->ip_needs_verification) 
+	    && vote_type == UPDATE_OIN_INODE) {
+		if (ocfs_io_sem_read_trylock(inode, (OCFS_NM_HEARTBEAT_TIME/2))) {
+			LOG_ERROR_ARGS("Could not verify_update on %llu\n",
+				       OCFS_I(inode)->ip_blkno);
+		} else {
+			tmpstat = ocfs_verify_update_inode(osb, inode);
+			if (tmpstat < 0)
+				LOG_ERROR_STATUS(tmpstat);
+			up_read(&OCFS_I(inode)->ip_io_sem);
+		}
+	}
+
+	if (inc_inode_seq) {
+		ocfs_inc_inode_seq(osb, inode);
+		sync_mapping_buffers(inode->i_mapping);
+	}
+	iput(inode);
+
+no_inode_leave:
+	osb->voting_ino = 0;
+
+	up(&osb->vote_sem);
+
+	LOG_EXIT_STATUS (status);
+	return status;
+}				/* ocfs_process_vote */
+
+
+/* some lock requests need to be processed before a possible
+ * change master. Beware however that the change_master might
+ * very well send a no vote, so you can't do things here that
+ * cannot be rolled back. */
+
+/* Returns: 1 if process_vote should vote immediately, 0 otherwise */
+
+static int ocfs_process_vote_pre_change_master(int vote_type, int flags, int *vote_response, int *status, struct inode *inode)
+{
+	if (vote_type == DELETE_ACQUIRE) {
+		LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
+		if (!ocfs_process_inode_delete(inode)) {
+			*vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
+			return 1;
+		}
+		*vote_response = FLAG_VOTE_NODE;
+		return 0;
+	} 
+	if (vote_type == TRUNCATE_PAGES) {
+		LOG_TRACE_STR("TRUNCATE_PAGES");
+		*vote_response = FLAG_VOTE_NODE;
+		if (inode) {
+			*status = ocfs_sync_inode(inode);
+			if (*status >= 0) {
 				ocfs_truncate_inode_pages(inode, 0);
 				down(&OCFS_I(inode)->ip_sem);
 				ocfs_extent_map_destroy(&OCFS_I(inode)->ip_ext_map);
@@ -770,119 +898,145 @@
 				if (flags & FLAG_FILE_UPDATE_OIN)
 					atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
 				up(&OCFS_I(inode)->ip_sem);
+			} else {
+				// if this fails, it's an EIO
+				// should probably BUG
+				LOG_ERROR_STATUS(*status);
+				*vote_response = FLAG_VOTE_UPDATE_RETRY;
+				return 1;
 			}
-			vote_response = FLAG_VOTE_NODE;
-			break;
-		case INVALID_REQUEST:
-			/* we catch INVALID_REQUEST up here now as we
-			 * don't want to do a change_master on a
-			 * messed up vote... */
-			LOG_TRACE_STR("INVALID_REQUEST");
-			goto vote;
-		default:
-			break;
+		}
+		return 0;
 	}
+		
+	*vote_response = 0;
+	if (vote_type == INVALID_REQUEST) {
+		/* we catch INVALID_REQUEST up here now as we
+		 * don't want to do a change_master on a
+		 * messed up vote... */
+		LOG_TRACE_STR("INVALID_REQUEST");
+		return 1;
+	}
+	return 0;
+}
 
-	if (vote_response > FLAG_VOTE_NODE) {
-		/* we shouldn't even get to the other cases. */
-		goto vote;
+
+
+
+static int ocfs_lock_busy(ocfs_super *osb, struct inode *inode, ocfs_lock_res *lockres)
+{
+	/* requestor will need to retry if anyone is using the lockres */
+	if (lockres->lock_holders > 0) {
+		LOG_TRACE_PROCESS_VOTE("Lock id (%llu) has %u holders\n",
+		       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+		       lockres->lock_holders);
+		// kick the commit thread
+		atomic_set(&osb->flush_event_woken, 1);
+		wake_up(&osb->flush_event);
+
+		return 1;
 	}
+	return 0;
+}
 
-	if (lockres && (lockres->master_node_num == osb->node_num)
-	    && (flags & FLAG_CHANGE_MASTER)) {
-		LOG_TRACE_STR("CHANGE_MASTER");
 
-		status = -EFAIL;
-#ifdef VERBOSE_PROCESS_VOTE
-		printk("process_vote: doing CHANGE_MASTER for this request\n");
-#endif
-		/* requestor will need to retry if anyone is using the
-		 * lockres */
-		if (lockres->lock_holders > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
-			printk("process_vote: Lock id (%llu) has %u "
-			       "holders\n",
-			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-			       lockres->lock_holders);
-#endif	
-			// kick the commit thread
-			atomic_set(&osb->flush_event_woken, 1);
-			wake_up(&osb->flush_event);
+/* Returns: <0 if an I/O error occurred, 
+ *           1 if process_vote should vote immediately, 
+ *           0 if change master succeeded */
 
-			vote_response = FLAG_VOTE_UPDATE_RETRY;
-			status = 0;
-			goto vote;
-		}
+static int ocfs_process_vote_change_master(ocfs_super *osb, int *vote_response, int *status, struct inode *inode, 
+				ocfs_lock_res *lockres, __s16 node_num, __u64 lock_id)
+{
+	struct buffer_head *fe_bh = NULL;
 
-		/* this is currently a readonly cache lock.
-		 * need to communicate to all the nodes in the 
-		 * map that lock will be changing to RW before we
-		 * continue.  RETRY this request while we spawn 
-		 * off a thread to collect up the communication */
+	/* lockres is held with down_write throughout this call */ 
+	
+	LOG_TRACE_STR("CHANGE_MASTER");
+	LOG_TRACE_PROCESS_VOTE("doing CHANGE_MASTER for this request\n");
+
+	if (ocfs_lock_busy(osb, inode, lockres)) {
+		*vote_response = FLAG_VOTE_UPDATE_RETRY;
+		*status = 0;
+		return 1;
+	}
+
+	/* this is currently a readonly EX lock.
+	 * need to communicate to all the nodes in the 
+	 * map that lock will be changing to RW before we
+	 * continue.  RETRY this request while we spawn 
+	 * off a thread to collect up the communication */
+	if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
+		// assumption: node asking for vote has already dropped readonly
+		ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
+		// should not be in there, but...
+		ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
 		if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
-			// assumption: node asking for vote has already dropped readonly_node
-			ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
-			// should not be in there, but...
-			ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
-			if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
-				OCFS_ASSERT(lockres->readonly_node == osb->node_num);
-				OCFS_ASSERT(inode);
-				status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
-				if (status < 0)
-					LOG_ERROR_STATUS(status);
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: node map is not "
-				       "empty on readonly drop "
-				       "request\n");
-#endif
-				vote_response = FLAG_VOTE_UPDATE_RETRY;
-				goto vote;
-			}
-			// noone left in map, so continue
-			lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+			OCFS_ASSERT(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) &&
+				    lockres->master_node_num == osb->node_num);
+			OCFS_ASSERT(inode);
+			*status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
+			if (*status < 0)
+				LOG_ERROR_STATUS(*status);
+			LOG_TRACE_PROCESS_VOTE("node map not empty on RO drop request\n");
+			*vote_response = FLAG_VOTE_UPDATE_RETRY;
+			// did not change master, send response
+			return 1;
 		}
+		// noone left in map, so continue
+		clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
+	}
 
-		sync_mapping_buffers(inode->i_mapping);
+	sync_mapping_buffers(inode->i_mapping);
 
 #warning do we need a truncate_inode_pages here?
-		if (lockres->master_node_num != osb->node_num) {
-			printk("are we giving away a lock we don't own!?! "
-			       "inode %llu\n", OCFS_I(inode)->ip_blkno);
-			BUG();
-		}
+	if (lockres->master_node_num != osb->node_num) {
+		printk("are we giving away a lock we don't own!?! "
+		       "inode %llu\n", OCFS_I(inode)->ip_blkno);
+		BUG();
+	}
 
-		status = ocfs_read_bh(osb, lock_id, &fe_bh, OCFS_BH_CACHED, 
-				      inode);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
-		}
-		lockres->master_node_num = node_num;
-		lockres->lock_type = OCFS_LKM_NLMODE;
-		ocfs_update_disk_lock(osb, fe_bh, inode);
-
-		brelse(fe_bh);
-		vote_response = FLAG_VOTE_NODE;
-		inc_inode_seq = 1;
-		status = 0;
-		change_master_succeeded = 1;
+	*status = ocfs_read_bh(osb, lock_id, &fe_bh, OCFS_BH_CACHED, 
+			      inode);
+	if (status < 0) {
+		LOG_ERROR_STATUS ((*status));
+		return *status;
 	}
+	lockres->master_node_num = node_num;
+	lockres->lock_type = OCFS_LKM_NLMODE;
+	ocfs_update_disk_lock(osb, fe_bh, inode);
+	brelse(fe_bh);
+	*vote_response = FLAG_VOTE_NODE;
+	*status = 0;
 
-	/* Below here, we can't have any of these cases failing if
-	 * there was a successfull change master request. */
+	// master successfully changed
+	return 0;
+}
+
+
+
+/* Returns:  1 if process_vote should vote immediately, 
+ *           0 on success */
+
+/* we can't have any of these cases failing if the change master already succeeded */
+static int ocfs_process_vote_post_change_master(ocfs_super *osb, int vote_type, int flags, int *vote_response, struct inode *inode, ocfs_lock_res *lockres, int *status, __s16 node_num, int *inc_seq)
+{
 	switch (vote_type) {
+		case TRUNCATE_PAGES:
+		case CHANGE_MASTER:
+			/* we dealt with this all above. */
+			break;
+
 		case UPDATE_OIN_INODE:
 			LOG_TRACE_STR("UPDATE_OIN_INODE");
 			atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
-			vote_response = FLAG_VOTE_OIN_UPDATED;
+			*vote_response = FLAG_VOTE_OIN_UPDATED;
 			break;
 
 		case RELEASE_DENTRY:
-			if (!inode)
-				BUG();
+			OCFS_ASSERT(inode);
 
 			/* we always vote yes on this one. */
-			vote_response = FLAG_VOTE_NODE;
+			*vote_response = FLAG_VOTE_NODE;
 
 			/* do nothing in the release case... hmm,
 			 * perhaps we should just do a verify_update
@@ -900,21 +1054,16 @@
 					inode->i_nlink--;
 			}
 
-			LOG_TRACE_ARGS("pruned dentries for inode %lu, nlink "
-				       "= %u\n", inode->i_ino, inode->i_nlink);
+			LOG_TRACE_ARGS("pruned dentries for inode %lu, nlink = %u\n", 
+				       inode->i_ino, inode->i_nlink);
 			break;
 
-		case DELETE_RELEASE:
-			/* ACK and done */
-			vote_response = FLAG_VOTE_NODE;
-			break;
-
 		case DELETE_ACQUIRE:
 			LOG_TRACE_STR("DELETE_ACQUIRE (part two)");
 			/* If we got this far, then we assume we've
 			 * done the 1st part of the DELETE_ACQUIRE
 			 * case and we just have to commit it. */
-			if (vote_response != FLAG_VOTE_NODE)
+			if (*vote_response != FLAG_VOTE_NODE)
 				BUG();
 
 			ocfs_commit_inode_delete(inode);
@@ -922,23 +1071,14 @@
 
 		case READONLY:
 			LOG_TRACE_STR("READONLY");
-			OCFS_ASSERT(lockres->readonly_node==osb->node_num ||
-				    lockres->readonly_node==OCFS_INVALID_NODE_NUM);
+			// WRITELOCK
+			OCFS_ASSERT(!(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) ||
+				    lockres->master_node_num == osb->node_num);
 
-			if (lockres->lock_holders > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: (readonly) Lock id (%llu) has %u "
-				       "holders\n",
-				       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-				       lockres->lock_holders);
-#endif
-				// kick the commit thread
-				atomic_set(&osb->flush_event_woken, 1);
-				wake_up(&osb->flush_event);
-
-				vote_response = FLAG_VOTE_UPDATE_RETRY;
-				status = 0;
-				break;
+			if (ocfs_lock_busy(osb, inode, lockres)) {
+				*vote_response = FLAG_VOTE_UPDATE_RETRY;
+				*status = 0;
+				return 1;
 			}
 
 			// if the requestor just wants to do readonly, we 
@@ -946,9 +1086,9 @@
 			sync_mapping_buffers(inode->i_mapping);
 
 			ocfs_node_map_set_bit(&lockres->readonly_map, node_num);
-			lockres->readonly_node = osb->node_num;
-			vote_response = FLAG_VOTE_NODE;
-			status = 0;
+			set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
+			*vote_response = FLAG_VOTE_NODE;
+			*status = 0;
 			break;
 
 		case DROP_READONLY:
@@ -967,137 +1107,52 @@
 
 			/* this path should always succeed on the vote *
 			 * even in the error case.  do nothing for error. */	
+			
+			// WRITELOCK
 			if (lockres->master_node_num != node_num ||
 			    lockres->lock_type != OCFS_LKM_EXMODE ||
 			    !ocfs_node_map_is_empty(&lockres->readonly_map))
-				LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d ronode=%d\n",
+				LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d readonly=%s\n",
 				       lockres->master_node_num, node_num, lockres->lock_type, 
-				       lockres->readonly_node);
+				       test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
 			else
-				lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+				set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
 
-			status = 0;
-			vote_response = FLAG_VOTE_NODE;
-			inc_inode_seq = 1;
+			*status = 0;
+			*vote_response = FLAG_VOTE_NODE;
+			*inc_seq = 1;
 			break;
 			
 		case NOT_MASTER:
 			LOG_TRACE_STR("NOT_MASTER");
-			vote_response = FLAG_VOTE_UPDATE_RETRY;
+			*vote_response = FLAG_VOTE_UPDATE_RETRY;
 			if (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE) &&
 			    lockres->master_node_num == node_num) {
 				LOG_TRACE_STR("owner is requesting extend/truncate");
-				vote_response = FLAG_VOTE_NODE;
+				*vote_response = FLAG_VOTE_NODE;
 			}
 			break;
 
 		case REMASTER_THIS:
 			LOG_TRACE_STR("REMASTER_THIS");
-			vote_response = FLAG_VOTE_UPDATE_RETRY;
+			*vote_response = FLAG_VOTE_UPDATE_RETRY;
 			break;
 
 		case REMASTER_REQUESTOR:
 			LOG_TRACE_STR("REMASTER_REQUESTOR");
-			vote_response = FLAG_VOTE_NODE;
+			*vote_response = FLAG_VOTE_NODE;
 			break;
 
-		case TRUNCATE_PAGES:
-		case CHANGE_MASTER:
-			/* we dealt with this all above. */
-			break;
-
 		case INVALID_REQUEST:
 		default:
 			LOG_TRACE_STR("INVALID_REQUEST");
-			vote_response = 0;
+			*vote_response = 0;
 			break;
 	}
+	return 0;
+}
 
-	if (change_master_succeeded && 
-	    ((vote_response == 0) || (vote_response > FLAG_VOTE_OIN_UPDATED)))
-	    BUG();
 
-	if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) && 
-	    ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
-	    (flags & FLAG_RELEASE_LOCK))) {
-
-		LOG_TRACE_ARGS("responding YES to %s %s request, inode=%p, node=%u\n", flags & FLAG_FILE_EXTEND ?
-				      "extend" : "truncate", flags & FLAG_RELEASE_LOCK ? 
-				      "release" : "acquire", inode, node_num);
-
-		if (flags & FLAG_ACQUIRE_LOCK)
-			ocfs_mark_inode_for_extend(osb, inode, node_num);
-		else if (flags & FLAG_RELEASE_LOCK)
-			ocfs_clear_inode_for_extend(osb, inode, node_num, 
-						    num_ident);
-		else {
-			printk("uhoh, bad vote flags! 0x%x\n", flags);
-			BUG();
-		}
-	}
-
-vote:
-	status = ocfs_send_vote_reply(osb, dlm_msg, vote_response);
-
-#ifdef VERBOSE_PROCESS_VOTE
-	printk("(%u) vote: lockid=%llu, node=%d, seqnum=%llu, response=%d\n",current->pid, lock_id, node_num, seq_num, vote_response);
-#endif
-		       
-	if (status < 0)
-		LOG_ERROR_STATUS (status);
-	else {
-		ocfs_compute_dlm_stats (0, vote_response,
-					&(OcfsGlobalCtxt.net_reply_stats));
-		ocfs_compute_dlm_stats (0, vote_response,
-				       	&(osb->net_reply_stats));
-	}
-
-leave:
-	if (lockres) {
-		ocfs_release_lockres (lockres); // ocfs_process_vote
-		lockres = NULL;
-	}
-
-	if (!inode)
-		goto no_inode_leave;
-
-	if (atomic_read(&OCFS_I(inode)->ip_needs_verification) 
-	    && vote_type == UPDATE_OIN_INODE) {
-		have_io_sem = 1;
-#ifdef PROCESS_VOTE_TRYLOCK
-		if (ocfs_io_sem_read_trylock(inode, (OCFS_NM_HEARTBEAT_TIME/2))) {
-			LOG_ERROR_ARGS("Could not verify_update on %llu\n",
-				       OCFS_I(inode)->ip_blkno);
-			have_io_sem = 0;
-		}
-#else
-		down_read(&OCFS_I(inode)->ip_io_sem);
-#endif
-		if (have_io_sem) {
-			tmpstat = ocfs_verify_update_inode(osb, inode);
-			if (tmpstat < 0)
-				LOG_ERROR_STATUS(tmpstat);
-			up_read(&OCFS_I(inode)->ip_io_sem);
-			have_io_sem = 0;
-		}
-	}
-
-	if (inc_inode_seq) {
-		ocfs_inc_inode_seq(osb, inode);
-		sync_mapping_buffers(inode->i_mapping);
-	}
-	iput(inode);
-
-no_inode_leave:
-	osb->voting_ino = 0;
-
-	up(&osb->vote_sem);
-
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_process_vote */
-
-
 /* inode is definitely non NULL */
 void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode)
 {
@@ -1169,7 +1224,7 @@
 	int status = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 					
-	if (lockres->lock_state & FLAG_READONLY_DROPPING) {
+	if (test_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state)) {
 		// if coming from process_vote, go about our merry way
 		if (yield)
 			return 0;
@@ -1209,25 +1264,25 @@
 
 	/* this will wait until process_vote gets to the release */
 	if (yield)
-		ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
-
+		ocfs_acquire_lockres_write(inode);
 	/* check these under the lock */	
-	if (lockres->readonly_node != osb->node_num ||
+	if (!(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) ||
 	    lockres->master_node_num != osb->node_num ||
 	    lockres->lock_type != OCFS_LKM_EXMODE) {
-		LOG_ERROR_ARGS("bad RO lockres!  this=%d, ro_node=%d, master=%d, locktype=%u\n",
-			       osb->node_num, lockres->readonly_node, 
+		LOG_ERROR_ARGS("bad RO lockres!  this=%d, readonly=%s, master=%d, locktype=%u\n",
+			       osb->node_num, 
+			       test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no", 
 			       lockres->master_node_num, lockres->lock_type);
 		status = -EINVAL;
 		goto leave;
 	}
 
-	if (lockres->lock_state & FLAG_READONLY_DROPPING) {	
+	if (test_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state)) {
 		status = 0;
 		goto leave;
 	}
 
-	lockres->lock_state |= FLAG_READONLY_DROPPING;
+	set_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state);
 	/* remove this node */
 	ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
 	
@@ -1241,9 +1296,9 @@
 			status = 0;
 			if (yield) {
 				/* from nm thread, give some time to waiters */
-				ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+				ocfs_release_lockres_write(inode);
 				ocfs_sleep(50);
-				ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
+				ocfs_acquire_lockres_write(inode);
 			}
 			continue;
 		}
@@ -1253,14 +1308,15 @@
 	}
 			
 	if (ocfs_node_map_is_empty(&lockres->readonly_map) &&	
-	    lockres->readonly_node == osb->node_num)
-		lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+	    test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) &&
+	    lockres->master_node_num == osb->node_num)
+		clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
 
-	lockres->lock_state &= ~FLAG_READONLY_DROPPING;
+	clear_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state);
 
 leave:
 	if (yield)
-		ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+		ocfs_release_lockres_write(inode); // ocfs_process_vote ocfs_acquire_lock
 
 	if (inode)
 		iput(inode);

Modified: trunk/src/nm.h
===================================================================
--- trunk/src/nm.h	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/nm.h	2004-09-09 08:03:26 UTC (rev 1441)
@@ -32,7 +32,7 @@
 int ocfs_drop_readonly_cache_lock(ocfs_super *osb, struct inode *inode,
 				  int yield);
 void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode);
-int ocfs_process_vote(ocfs_super *osb, ocfs_vote_request_ctxt *ctxt);
+int ocfs_process_vote (ocfs_super * osb, ocfs_dlm_msg *dlm_msg);
 int ocfs_recv_thread(void *unused);
 void ocfs_recover_oin_locks(ocfs_super *osb, __u32 node_num);
 int ocfs_volume_thread(void *arg);

Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/ocfs.h	2004-09-09 08:03:26 UTC (rev 1441)
@@ -192,9 +192,22 @@
 
 #define  OCFS_INVALID_NODE_NUM         -1
 
-/* lockres->lock_state flags */
-#define  FLAG_READONLY_DROPPING        0x00000008
+/* lockres->lock_state bits */
+enum {
+	LOCK_STATE_READONLY,
+	LOCK_STATE_READONLY_DROPPING,
+	LOCK_STATE_BLOCK_EXCLUSIVE,
+	LOCK_STATE_BLOCK_READONLY
+};
 
+enum {
+	NO_LOCK=0,
+	READ_LOCK,
+	WRITE_LOCK
+};
+
+
+
 /* osb->osb_flags flags */
 #define  OCFS_OSB_FLAGS_BEING_DISMOUNTED  (0x00000004)
 #define  OCFS_OSB_FLAGS_SHUTDOWN          (0x00000008)
@@ -343,12 +356,11 @@
 struct _ocfs_lock_res
 {
 	__s16 master_node_num;	/* Master Node */
-	__u32 lock_state;
 	__u32 lock_holders;
 	__u32 uncommitted_holders;
 	__u8 lock_type;
-	struct semaphore lock_mutex;
-	__s16 readonly_node;
+	struct rw_semaphore lock;
+	int readonly_state;
 	ocfs_node_map readonly_map;
 };
 

Modified: trunk/src/ocfs_log.h
===================================================================
--- trunk/src/ocfs_log.h	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/ocfs_log.h	2004-09-09 08:03:26 UTC (rev 1441)
@@ -269,6 +269,7 @@
 # define  LOG_TRACE_ARGS(fmt, arg...)
 # define  LOG_PID_PRINTK(fmt, arg...)
 # define  LOG_PID_STR(str)
+# define  LOG_TRACE_PROCESS_VOTE(fmt, arg...) 
 #endif                          /* !defined(TRACE) */
 
 	
@@ -354,6 +355,12 @@
 #define LOG_TRACE_STR(str)     LOG_TRACE_ARGS("%s\n", str)
 #define LOG_TRACE_STATUS(val)  LOG_TRACE_ARGS("%d\n", val);
 
+#ifdef VERBOSE_PROCESS_VOTE
+#define LOG_TRACE_PROCESS_VOTE  LOG_TRACE_ARGS
+#else
+#define LOG_TRACE_PROCESS_VOTE(fmt, arg...) 
+#endif
+
 #endif				/* TRACE */
 
 

Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c	2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/vote.c	2004-09-09 08:03:26 UTC (rev 1441)
@@ -655,7 +655,6 @@
 	ocfs_dlm_msg *dlm_msg;
 	ocfs_dlm_req_master *req_master;
 	struct list_head *iter_osb, *temp_iter;
-	ocfs_vote_request_ctxt ctxt;
 	__s16 src_node;
 
 	LOG_ENTRY ();
@@ -690,10 +689,7 @@
 
 	switch (dlm_msg->msg_type) {
 	case OCFS_VOTE_REQUEST:
-		ctxt.dlm_msg = dlm_msg;
-		ctxt.node_num = dlm_msg->src_node;
-		ctxt.status = 0;
-		ocfs_process_vote (osb, &ctxt);
+		status = ocfs_process_vote (osb, dlm_msg);
 		break;
 
 	case OCFS_VOTE_REPLY:
@@ -893,7 +889,7 @@
 			       OCFS_I(inode)->ip_blkno, obj->vote_status, 
 			       obj->vote_state, lock_id, flags, lock_type, 
 			       GET_INODE_LOCKRES(inode)->master_node_num, 
-			       GET_INODE_LOCKRES(inode)->lock_state, 
+			       GET_INODE_LOCKRES(inode)->readonly_state, 
 			       GET_INODE_LOCKRES(inode)->lock_type);
 	}
 	*vote_status = obj->vote_status;



More information about the Ocfs2-commits mailing list