[Ocfs2-commits] mfasheh commits r1364 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Aug 19 18:18:02 CDT 2004


Author: mfasheh
Date: 2004-08-19 17:18:00 -0500 (Thu, 19 Aug 2004)
New Revision: 1364

Modified:
   trunk/src/alloc.c
   trunk/src/aops.c
   trunk/src/dlm.c
   trunk/src/file.c
   trunk/src/inode.c
   trunk/src/journal.c
   trunk/src/lockres.c
   trunk/src/lockres.h
   trunk/src/namei.c
   trunk/src/nm.c
   trunk/src/ocfs.h
   trunk/src/ocfs_journal.h
   trunk/src/vote.c
Log:
* get rid of most unused lock flags (FLAG_DIR is still there), and
  re-arrange them so they're easier to read. All nodes in a cluster will have
  to be simulataneously upgraded after this revision as a result though (the
  values have changed).

* Fix ocfs_inode_is_new to be nicer in return value. Also, fix a bug where
  it wasn't dealing with system files properly.

* We were printing out an error on -EBUSY in vote.c and dlm.c even though
  it's an entirely valid return vote (can't delete because file is still
  open).

* ocfs_notify_on_open() didn't reloop properly on -ETIMEDOUT

* fix up ocfs_update_lockres() to do things correctly, and to only read when
  it's absolutely necessary. As a result it's safe to call this from
  process_vote now (so I removed that #warning)

* we had a nasty bug in which certain lock requests (like a release_dentry  
  for instance) skipped the whole CHANGE_MASTER stuff on the other node. We
  now make acquire_lock put that in there for those types of requests and
  process_vote can handle a change master along with most other requests now
  too.

* clean up the flag computation in acquire_lock and new_lock_function.
  - we now avoid doing lock writes (and cache zapping) unless it's actually
    necessary.
  - new_lock_function() takes on the full task of figuring out who to vote
    with, as opposed to before where it was split between that and
    acquire_lock.
  - stuff is actually readable now.

* fix a bug in new_lock_function() where we were setting
  lockres->master_node_num unconditionally. It's now set only when we're going
  to write out a new lock state.

* update and fill out the lockres in symlink and in read_inode, two critical
  places where we were leaving the lockres with invalid data.

* as a result of these last two cleanups, I managed to get rid of a case in
  process_vote where we had a stale lockres which was causing voting issues.

* BUG() in find_inode/find_actor if we have no generic_ip.



Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/alloc.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -362,7 +362,7 @@
 
 			status = ocfs_acquire_lock (osb, 
 						    OCFS_LKM_EXMODE,
-						    FLAG_FILE_CREATE,
+						    0,
 						    NULL,
 						    inode_alloc_inode[i]);
 			if (status < 0) {
@@ -373,7 +373,7 @@
 				goto abort;
 			}
 			ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-					     FLAG_FILE_CREATE, 
+					     0, 
 					     inode_alloc_inode[i]);
 		}
 	}
@@ -391,7 +391,7 @@
 
 			status = ocfs_acquire_lock (osb, 
 						    OCFS_LKM_EXMODE,
-						    FLAG_FILE_CREATE,
+						    0,
 						    NULL,
 						    ext_alloc_inode[i]);
 			if (status < 0) {
@@ -402,7 +402,7 @@
 				goto abort;
 			}
 			ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-					     FLAG_FILE_CREATE,  
+					     0,  
 					     ext_alloc_inode[i]);
 		}
 	}
@@ -417,7 +417,7 @@
 		ocfs_handle_add_inode(handle, vol_inode);
 
 		status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
-					   FLAG_FILE_CREATE,
+					   0,
 					   &globalbh, vol_inode);
 		if (status < 0) {
 			iput(vol_inode);
@@ -428,7 +428,7 @@
 			goto abort;
 		}
 		ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-				     FLAG_FILE_CREATE, vol_inode);
+				     0, vol_inode);
 	}
 
 	/* free vol block */
@@ -3140,14 +3140,14 @@
 	OCFS_ASSERT (blockSize);
 
 	status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
-			     FLAG_FILE_CREATE, &bh, inode);
+			     0, &bh, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
 
 	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE, inode);
+			     0, inode);
 
 	numBits = ((bytes_wanted + (blockSize-1)) >> blockSizeBits);
 	numBytes = (u64)numBits << blockSizeBits;

Modified: trunk/src/aops.c
===================================================================
--- trunk/src/aops.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/aops.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -87,7 +87,7 @@
 
 	/* We don't use the page cache to create symlink data, so if
 	 * need be, copy it over from the buffer cache. */
-	if (!buffer_uptodate(bh_result) && !ocfs_inode_is_new(osb, inode)) {
+	if (!buffer_uptodate(bh_result) && ocfs_inode_is_new(osb, inode)) {
 		buffer_cache_bh = sb_getblk(osb->sb, 
 					    fe->id2.i_list.l_recs[0].e_blkno + iblock);
 		if (!buffer_cache_bh) {
@@ -100,7 +100,7 @@
 		 * the bh, even if it commits while we're doing the
 		 * copy, the data is still good. */
 		if (buffer_jbd(buffer_cache_bh) 
-		    && !ocfs_inode_is_new(osb, inode)) {
+		    && ocfs_inode_is_new(osb, inode)) {
 			kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
 			if (!kaddr) {
 				LOG_ERROR_ARGS("couldn't kmap!\n");

Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/dlm.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -451,7 +451,7 @@
 		ocfs_acquire_lockres(lockres, 0); // ocfs_file_open 
 
 		flags = FLAG_TRUNCATE_PAGES;
-		if (!ocfs_inode_is_new(osb, inode))
+		if (ocfs_inode_is_new(osb, inode))
 			flags |= FLAG_FAST_PATH_LOCK;
 
 		status = new_lock_function(osb, lockres->lock_type, 
@@ -460,7 +460,7 @@
 			if (status != -EAGAIN)
 				LOG_ERROR_STATUS (status);
 			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
-			if (status == -EAGAIN) {
+			if (status == -EAGAIN || status == -ETIMEDOUT) {
 				if (ocfs_task_interruptible ()) {
 					LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
 					OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
@@ -468,6 +468,7 @@
 					goto bail;
 				}
 				ocfs_sleep (50);
+				status = -EAGAIN;
 				continue;
 			}
 
@@ -512,8 +513,7 @@
 		   __u32 flags, struct buffer_head **bh, struct inode *inode)
 {
 	int status = -EFAIL;
-	ocfs2_dinode *disklock = NULL;
-	__u32 updated = 0;
+	__u32 updated;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 	int k = 0;
 	struct buffer_head *tmpbh = NULL, **b = NULL;
@@ -526,7 +526,7 @@
 
 	LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
 			osb, lock_type, flags, bh);
-	
+
 	OCFS_ASSERT(lock_type != OCFS_LKM_NLMODE);
 	OCFS_ASSERT(inode);
 
@@ -540,41 +540,31 @@
 
 	flags |= FLAG_ACQUIRE_LOCK;
 
-	status = ocfs_update_lockres (osb, b, &updated, 0, inode, 
-				      1, 0);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto bail;
+	*b = sb_getblk(osb->sb, OCFS_I(inode)->ip_blkno);
+	if (*b == NULL) {
+		status = -EIO;
+		LOG_ERROR_STATUS(status);
+		goto finally;
 	}
 
+	updated = 0;
 again:
 	ocfs_acquire_lockres (lockres, 0);  // ocfs_acquire_lock
 
 	LOG_TRACE_ARGS("attempting to get lock, pass: %d\n", ++k);
 
-	if (lockres->master_node_num == osb->node_num)
-		updated = 1;
-
 	if (!updated) {
-		status = ocfs_read_bh (osb, lock_id, b, 0, inode);
+		status = ocfs_update_lockres(osb, *b, 0, inode, 1, 1);
 		if (status < 0) {
-			ocfs_release_lockres (lockres); // ocfs_acquire_lock
+			ocfs_release_lockres (lockres);
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
-		disklock = (ocfs2_dinode *) (*b)->b_data;
-
-		if (!IS_VALID_FILE_ENTRY(disklock))
-			BUG();
-
-		if (lockres->master_node_num != osb->node_num || 
-		    lockres->master_node_num != DISK_LOCK(disklock)->dl_master) {
-			lockres->master_node_num = DISK_LOCK(disklock)->dl_master;
-			lockres->lock_type = DISK_LOCK(disklock)->dl_level;
-		}
-		updated = 1;
 	}
 
+	/* alright, if we own it then no more updates are necessary. */
+	if (lockres->master_node_num == osb->node_num)
+		updated = 1;
 reevaluate:
 	no_owner = (lockres->master_node_num == OCFS_INVALID_NODE_NUM);
 
@@ -648,46 +638,50 @@
 		goto finally;
 	}
 
-	if (lockres->master_node_num != osb->node_num &&
-	    (wait_on_recovery || no_owner || owner_dead)) {
-		extra_lock_flags = FLAG_REMASTER;
-	} else if (flags & (FLAG_FILE_DELETE | FLAG_FILE_RENAME 
-			    | FLAG_RELEASE_DENTRY)) {
-		if (ocfs_inode_is_new(osb, inode)!=0) {
-			extra_lock_flags = 0;
-		} else if (lockres->lock_type == OCFS_LKM_EXMODE)
-			extra_lock_flags = FLAG_FAST_PATH_LOCK;
-		else 
-			extra_lock_flags = FLAG_CHANGE_MASTER;
-	} else if (lockres->master_node_num == osb->node_num) {
-		if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_TRUNCATE) && 
-		    ocfs_inode_is_new(osb, inode)!=0)
-			extra_lock_flags = FLAG_REMASTER;
-		else if (lockres->lock_type == OCFS_LKM_EXMODE)
-			extra_lock_flags = FLAG_FAST_PATH_LOCK;
-		else 
-			extra_lock_flags = FLAG_CHANGE_MASTER;
-	} else {
-		extra_lock_flags = FLAG_CHANGE_MASTER;
+	if (ocfs_inode_is_new(osb, inode)) {
+	    if (lockres->master_node_num != osb->node_num) {
+		printk("inode is new, but lockres is out of date! "
+			"owner = %d, type = %d\n", 
+		       lockres->master_node_num, lockres->lock_type);
+
+		BUG();
+	    }
+	    extra_lock_flags |= FLAG_FAST_PATH_LOCK;
 	}
-			
 
+	/* some lock requests are simple messages and don't require a
+	 * master change. */
+	if ((flags & FLAG_TRUNCATE_PAGES) || (flags & FLAG_FILE_RENAME))
+		goto do_lock;
+
+	if ((lockres->master_node_num != osb->node_num)
+	    && (wait_on_recovery || no_owner || owner_dead)) {
+		extra_lock_flags |= FLAG_REMASTER;
+	} else if (lockres->master_node_num != osb->node_num) {
+		extra_lock_flags |= FLAG_CHANGE_MASTER;
+	}
+
 do_lock:
 	LOG_TRACE_ARGS("lockres: master=%d, locktype=%d, flags: %08x\n",
-		       lockres->master_node_num, lockres->lock_type, flags|extra_lock_flags);
-	
+		       lockres->master_node_num, lockres->lock_type, 
+		       flags|extra_lock_flags);
+
 #ifdef VERBOSE_LOCKING_TRACE
-	printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, flags=%08x, ronode=%d\n",
-		       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits, osb->node_num, lockres->master_node_num, lockres->lock_type, 
-		       flags|extra_lock_flags, lockres->readonly_node);
+	printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, "
+	       "flags=%08x, ronode=%d\n",
+	       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits, 
+	       osb->node_num, lockres->master_node_num, lockres->lock_type, 
+	       flags|extra_lock_flags, lockres->readonly_node);
 #endif	
-	if (wait_on_recovery && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
+	if (wait_on_recovery 
+	    && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
 		int waitcnt = 0;
 		LOG_TRACE_ARGS("Waiting on node %u to be recovered\n",
 			       	lockres->master_node_num);
 		while (1) {
 			LOG_TRACE_ARGS("waitcnt = %d\n", waitcnt);
-			if (!ocfs_node_is_recovering(osb, lockres->master_node_num))
+			if (!ocfs_node_is_recovering(osb, 
+						     lockres->master_node_num))
 				break;
 			ocfs_sleep(500);
 		}
@@ -696,10 +690,18 @@
 	status = new_lock_function(osb, lock_type, flags|extra_lock_flags, 
 				   *b, &disk_vote, inode);
 	if (status < 0) {
+		/* if we're going to jump back up, we want to update
+		 * if we're not the master... */
+		if (lockres->master_node_num != osb->node_num)
+			updated = 0;
 		ocfs_release_lockres (lockres); // ocfs_acquire_lock
 		if (status == -EAGAIN || status == -ETIMEDOUT) {
 			if (status == -ETIMEDOUT)
-				LOG_ERROR_ARGS("Timed out acquiring lock for inode %llu, (lockid = %llu) retrying...\n", OCFS_I(inode)->ip_blkno, lock_id);
+				LOG_ERROR_ARGS("Timed out acquiring lock for "
+					       "inode %llu, (lockid = %llu) "
+					       "retrying...\n", 
+					       OCFS_I(inode)->ip_blkno, 
+					       lock_id);
 
 			if (ocfs_task_interruptible ()) {
 				LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
@@ -708,7 +710,6 @@
 				goto finally;
 			}
 			ocfs_sleep (50);
-			updated = 0;
 			goto again;
 		}
 		goto finally;
@@ -729,7 +730,6 @@
 	ocfs_release_lockres (lockres); // ocfs_acquire_lock
 
 finally:
-bail:
 	if (tmpbh)
 		brelse(tmpbh);
 
@@ -885,9 +885,6 @@
 		goto finally;
 	}
 
-	if (flags & FLAG_FILE_RELEASE_MASTER)
-		lockres->master_node_num = OCFS_INVALID_NODE_NUM;
-
 	OCFS_ASSERT(lockres->uncommitted_holders <= num_ident);
 
 	/* we don't want to send over a count for any size change
@@ -959,16 +956,20 @@
 	} else if (flags & (FLAG_DROP_READONLY | FLAG_TRUNCATE_PAGES)) {
 		need_lock_write = 0;
 		need_to_zap_buffers = 0;
-	} else if (flags & (FLAG_READDIR | FLAG_FILE_RELEASE_CACHE)) {
+	} else if (flags & FLAG_READDIR) {
 		need_lock_write = 0;
 		need_to_zap_buffers = 1;
+	} else if (!bh) {
+		need_lock_write = 0;
+		need_to_zap_buffers = 0;
 	} else {
-		if (bh) {
-			fe = (ocfs2_dinode *) bh->b_data;
-			/* may not need to rewrite the lock later if we already have a cachelock */
-			need_lock_write = (DISK_LOCK(fe)->dl_master != osb->node_num ||
-					DISK_LOCK(fe)->dl_level != OCFS_LKM_EXMODE);
-		} else
+		fe = (ocfs2_dinode *) bh->b_data;
+		/* may not need to rewrite the lock later if
+		 * we already have a cachelock */
+		if ((DISK_LOCK(fe)->dl_master == osb->node_num)
+		    && (DISK_LOCK(fe)->dl_level != requested_lock))
+			need_lock_write = 1;
+		else
 			need_lock_write = 0;
 		need_to_zap_buffers = 0; 
 	}
@@ -978,11 +979,10 @@
 		goto vote_success;
 
 
+#define BROADCAST_FLAGS (FLAG_FILE_DELETE | FLAG_FILE_RENAME | FLAG_RELEASE_DENTRY | FLAG_FILE_EXTEND | FLAG_FILE_TRUNCATE | FLAG_FILE_UPDATE_OIN | FLAG_TRUNCATE_PAGES | FLAG_DROP_READONLY | FLAG_REMASTER)
 
 	/* figure out who to vote with */
-	if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE |
-		     FLAG_FILE_RENAME | FLAG_DROP_READONLY |
-		     FLAG_RELEASE_DENTRY | FLAG_TRUNCATE_PAGES)) {
+	if (flags & BROADCAST_FLAGS) {
 		ocfs_node_map_dup(osb, &vote_map, &osb->publ_map); /* broadcast */
 		/* only nodes that see this is readonly */
 		if (flags & FLAG_DROP_READONLY)
@@ -1013,7 +1013,8 @@
 	if (status < 0) {
 		if (status != -EAGAIN &&
 		    status != -ETIMEDOUT &&
-		    status != -EINTR)
+		    status != -EINTR &&
+		    status != -EBUSY)
 			LOG_ERROR_STATUS(status);
 		goto bail;
 	}
@@ -1037,14 +1038,11 @@
 		lockres->readonly_node = lockres->master_node_num;
 		goto bail;
 	}
-	
-	/* update the lockres */
-	if (!(flags & FLAG_FILE_RELEASE_CACHE))
-		lockres->master_node_num = osb->node_num;
-	lockres->lock_type = requested_lock;
 
 	/* update the disk lock */
 	if (need_lock_write) {
+		lockres->lock_type = requested_lock;
+		lockres->master_node_num = osb->node_num;
 		status = ocfs_update_disk_lock (osb, lock_write_flags, &bh, inode);
 		if (status < 0)
 			LOG_ERROR_STATUS (status);

Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/file.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -488,7 +488,7 @@
 {
 	int status = 0;
 	ocfs2_dinode *fileEntry = NULL;
-	u32 lock_flags = FLAG_FILE_UPDATE;
+	u32 lock_flags = 0;
 	struct buffer_head *bh = NULL;
 	ocfs_journal_handle *handle = NULL;
 

Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/inode.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -178,6 +178,12 @@
 
 	LOG_ENTRY_ARGS ("(0x%p, %lu, %llu, 0x%p)\n", inode, inode->i_ino, OCFS_I(inode)->ip_blkno, opaque);
 
+	if (!inode->u.generic_ip) {
+		LOG_ERROR_ARGS("inode %lu has no generic_ip (is_bad_inode = "
+			       "%d)!\n", inode->i_ino, is_bad_inode(inode));
+		BUG();
+	}
+
 	if (inode == NULL)
 		goto bail;
 
@@ -231,6 +237,12 @@
 
 	LOG_ENTRY_ARGS ("(0x%p, %lu, 0x%p)\n", inode, ino, opaque);
 	
+	if (!inode->u.generic_ip) {
+		LOG_ERROR_ARGS("inode %lu has no generic_ip (is_bad_inode = "
+			       "%d)!\n", inode->i_ino, is_bad_inode(inode));
+		BUG();
+	}
+
 	/* We have unique inode numbers so these are just sanity
 	 * checks at this point. */
 	if (opaque == NULL || inode == NULL)
@@ -494,6 +506,11 @@
 		OCFS_SET_FLAG(OCFS_I(inode)->ip_flags, OCFS_INODE_SYSTEM_FILE);
 
 	ocfs_init_lockres (osb, inode);
+	status = ocfs_update_lockres(osb, bh, 0, inode, 0, 1);
+	if (status < 0) {
+		make_bad_inode(inode);
+		goto bail;
+	}
 	status = 0;
 
 bail:
@@ -599,15 +616,13 @@
 
 	ocfs_handle_add_inode(handle, orphan_dir_inode);
 
-	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 
-				   FLAG_FILE_CREATE | FLAG_DIR,
+	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR,
 				   &orphan_dir_bh, orphan_dir_inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto bail_locked;
 	}
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE | FLAG_DIR,
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR,
 			     orphan_dir_inode);
 
 	if (S_ISDIR(inode->i_mode))
@@ -1074,7 +1089,7 @@
 	up(&OCFS_I(inode)->ip_sem);
 	drop_ip_sem = 0;
 
-	status = ocfs_update_lockres (osb, &fe_bh, NULL, 0, inode, 0, 
+	status = ocfs_update_lockres (osb, fe_bh, 0, inode, 0, 
 				      lockres_locked);
 
 	status = 0;

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/journal.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -1023,7 +1023,7 @@
 	SET_INODE_JOURNAL(inode);
 
 	status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
-				    FLAG_FILE_CREATE, &bh, inode);
+				    0, &bh, inode);
 	if (status < 0) {
 		up_write(&OCFS_I(inode)->ip_io_sem);
 
@@ -1170,8 +1170,7 @@
 	OCFS_I(inode)->ip_open_cnt--;
 
 	/* unlock our journal */
-	status = ocfs_release_lock (osb, OCFS_LKM_EXMODE,
-				    FLAG_FILE_CREATE, inode);
+	status = ocfs_release_lock (osb, OCFS_LKM_EXMODE, 0, inode);
 	if (status < 0)
 		LOG_ERROR_STATUS (status);
 
@@ -1460,8 +1459,7 @@
 	if (osb->node_num == node_num)
 		BUG();
 
-	status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
-				    FLAG_FILE_CREATE|FLAG_FILE_RECOVERY, 
+	status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_FILE_RECOVERY, 
 				    &bh, inode);
 
 	up_write(&OCFS_I(inode)->ip_io_sem);
@@ -1554,8 +1552,8 @@
 	/* drop the lock on this nodes journal */
 	if (got_lock) {
 		tmpstat = ocfs_release_lock(osb, OCFS_LKM_EXMODE, 
-					   FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
-					   inode);
+					    FLAG_FILE_RECOVERY,
+					    inode);
 		if (tmpstat < 0)
 			LOG_ERROR_STATUS(tmpstat);
 	}

Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/lockres.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -47,90 +47,93 @@
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_LOCKRES
 
 
-
-
 /*
  * ocfs_update_lockres()
  *
  * @osb: ocfs super block for the volume
  * @fe: corresponding file entry
- * @updated: set to 1 if lockres is refreshed from disk
  *
  * the lockres is refreshed from the disk.
  *
  * Returns 0 if success, < 0 if error.
  */
-int ocfs_update_lockres(ocfs_super *osb, struct buffer_head **bh,
-			__u32 *updated, __u32 timeout,
-			struct inode *inode, int reread, int locked)
+int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh,
+			__u32 timeout, struct inode *inode, int reread, 
+			int locked)
 {
 	int status = 0;
-	struct buffer_head *tmpbh = NULL, **b = NULL;
 	ocfs2_dinode *fe;
 	int flags;
+	int drop_bh = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);;
 
-	LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, 0x%p, 0x%p)\n", osb,
+	LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, 0x%p)\n", osb,
 		       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-		       lockres, bh, updated);
+		       lockres, bh);
 
-	b = (bh == NULL) ? &tmpbh : bh;
+	/* hey, you can't do that!  ;) */
+	if ((!bh) && !reread)
+		BUG();
 
+	if (!bh) 
+		drop_bh = 1;
+
+	if (!locked)
+		status = ocfs_acquire_lockres (lockres, timeout);
+	if (status < 0) {
+		LOG_TRACE_ARGS ("Timedout locking lockres for inode: %llu\n",
+				OCFS_I(inode)->ip_blkno);
+		goto out;
+	}
+
+	/* Behavior for process_vote: if you don't pass a buffer, then
+	 * we'll only read if you're not he master. */
+	if ((bh == NULL) && (lockres->master_node_num == osb->node_num))
+		goto out_unlock;
+
 	if (reread) {
 		flags = lockres->master_node_num == osb->node_num ? 
 		    	OCFS_BH_CACHED : 0;
 		status = ocfs_read_bh(osb,
 				      OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-				      b, flags, inode);
+				      &bh, flags, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
-			goto finally;
+			goto out_unlock;
 		}
 	}
 
-	if (lockres->master_node_num != osb->node_num || !reread) {
-		if (!locked)
-			status = ocfs_acquire_lockres (lockres, timeout); // ocfs_file_open ocfs_symlink ocfs_acquire_lock ocfs_process_vote ocfs_dentry_revalidate ocfs_file_write ocfs_file_read ocfs_setattr ocfs_getattr ocfs_direct_IO_get_blocks ocfs_rename
-		if (status < 0) {
-			LOG_TRACE_ARGS ("Timedout locking lockres for id: %llu\n",
-					OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-			goto finally;
-		}
+	fe = (ocfs2_dinode *) bh->b_data;
+	if (!IS_VALID_FILE_ENTRY(fe))
+		BUG();
 
-		fe = (ocfs2_dinode *) (*b)->b_data;
-		if (!IS_VALID_FILE_ENTRY(fe))
-			BUG();
+	lockres->lock_type = DISK_LOCK(fe)->dl_level;
+	lockres->master_node_num = DISK_LOCK(fe)->dl_master;
 
-		lockres->lock_type = DISK_LOCK(fe)->dl_level;
-		lockres->master_node_num = DISK_LOCK(fe)->dl_master;
+	if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
+	    lockres->readonly_node != lockres->master_node_num) {
+		LOG_ERROR_ARGS("no longer readonly! ronode=%d, master=%d, lockid=%llu\n",
+			       lockres->readonly_node, lockres->master_node_num,
+			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+		lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+	}
 
-		if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
-    	    	    lockres->readonly_node != lockres->master_node_num) {
-			LOG_ERROR_ARGS("no longer readonly! ronode=%d, master=%d, lockid=%llu\n",
-					lockres->readonly_node, lockres->master_node_num,
-					OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-			lockres->readonly_node = OCFS_INVALID_NODE_NUM;
-		}
+out_unlock:
+	if (!locked)
+		ocfs_release_lockres (lockres);
 
-		if (!locked)
-			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink ocfs_acquire_lock ocfs_process_vote ocfs_dentry_revalidate ocfs_file_write ocfs_file_read ocfs_setattr ocfs_getattr ocfs_direct_IO_get_blocks ocfs_rename
-
-		if (updated)
-			*updated = 1;
-	}
-
-finally:
+out:
 	if (status < 0) {
 		if (status != -ETIMEDOUT)
 			LOG_ERROR_STATUS (status);
 	}
 
-	if (tmpbh)
-		brelse(tmpbh);
+	if (bh && drop_bh)
+		brelse(bh);
 
 	LOG_EXIT_STATUS (status);
 	return status;
-}				/* ocfs_find_update_res */
+}				/* ocfs_update_lockres */
 
 /*
  * ocfs_acquire_lockres()
@@ -201,18 +204,15 @@
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, lockres);
 
-	lockres->lock_type = OCFS_LKM_NLMODE;
 	lockres->master_node_num = OCFS_INVALID_NODE_NUM;
 	lockres->lock_state = 0;
-
+	lockres->lock_holders = 0;
+	lockres->uncommitted_holders = 0;
+	lockres->lock_type = OCFS_LKM_NLMODE;
 	init_MUTEX(&lockres->lock_mutex);
-
+	lockres->readonly_node = OCFS_INVALID_NODE_NUM;
 	ocfs_node_map_init(osb, &lockres->readonly_map);
-	lockres->readonly_node = OCFS_INVALID_NODE_NUM;
 
-	lockres->lock_holders = 0;
-	lockres->uncommitted_holders = 0;
-
 	LOG_EXIT ();
 	return;
 }				/* ocfs_init_lockres */

Modified: trunk/src/lockres.h
===================================================================
--- trunk/src/lockres.h	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/lockres.h	2004-08-19 22:18:00 UTC (rev 1364)
@@ -33,8 +33,8 @@
 int ocfs_acquire_lockres(ocfs_lock_res *lockres, __u32 timeout);
 void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
 void ocfs_release_lockres(ocfs_lock_res *lockres);
-int ocfs_update_lockres(ocfs_super *osb, struct buffer_head **bh, 
-			__u32 *updated, __u32 timeout, struct inode *inode, 
+int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh, 
+			__u32 timeout, struct inode *inode, 
 			int reread, int locked);
 
 #endif /* OCFS2_LOCKRES_H */

Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/namei.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -230,8 +230,7 @@
 	}
 
 	/* lock the parent directory */
-	status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
-				    FLAG_FILE_CREATE | FLAG_DIR, 
+	status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_DIR, 
 				    &parent_fe_bh, dir);
 	if (status < 0) {
 		if (status != -EINTR)
@@ -240,8 +239,7 @@
 	}
 
 	/* Ok, we got the lock -- we'd better add it to our transaction */
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE | FLAG_DIR, dir);
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, dir);
 
 	/* do the real work now. */
 	status = ocfs_mknod_locked(osb, dir, dentry, mode, dev,
@@ -267,7 +265,7 @@
 
 	ocfs_init_lockres(osb, inode);
 
-	status = ocfs_update_lockres(osb, &new_fe_bh, NULL, 0, inode, 0, 0);
+	status = ocfs_update_lockres(osb, new_fe_bh, 0, inode, 0, 0);
 	if (S_ISDIR (mode)) {
 		struct buffer_head *newdirbh = NULL;
 		int retval = 0;
@@ -574,29 +572,25 @@
 	down_write(&OCFS_I(dir)->ip_io_sem);
 	drop_dir_sem = 1;
 	/* lock the parent directory */
-	err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
-				    FLAG_FILE_CREATE | FLAG_DIR, 
+	err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_DIR, 
 				    &parent_fe_bh, dir);
 	if (err < 0) {
 		if (err != -EINTR)
 			LOG_ERROR_STATUS (err);
 		goto bail;
 	}
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE|FLAG_DIR, dir);
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, dir);
 
 	down_write(&OCFS_I(inode)->ip_io_sem);
 	drop_inode_sem = 1;
-	err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
-				    FLAG_FILE_CREATE, 
-				    &fe_bh, inode);
+	err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0, &fe_bh, inode);
 	if (err < 0) {
 		if (err != -EINTR)
 			LOG_ERROR_STATUS (err);
 		goto bail;
 	}
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE|FLAG_FILE_UPDATE_OIN, inode);
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_FILE_UPDATE_OIN, 
+			     inode);
 
 	fe = (ocfs2_dinode *) fe_bh->b_data;
 	if (fe->i_links_count >= OCFS2_LINK_MAX) {
@@ -699,17 +693,13 @@
 		goto leave;
 	}
 
-	/* lock parent directory, yes we use FLAG_FILE_CREATE even
-	 * though we're deleting ;) */
-	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
-				   FLAG_FILE_CREATE|FLAG_DIR, &parent_node_bh, 
-				   parentInode);
+	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR, 
+				   &parent_node_bh, parentInode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE|FLAG_DIR, parentInode);
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, parentInode);
 
 	/* this will re-read the directory now with the EXCLUSIVE */
 	/* lock already held; it will also return the blkno to us */
@@ -961,7 +951,6 @@
 	__u64 oldfe_lockid;
 	u64 newfe_blkno;
 	ocfs_journal_handle *handle = NULL;
-	__u32 dir_lock_flags = FLAG_FILE_CREATE | FLAG_DIR;
 	struct buffer_head *old_dir_bh = NULL;
 	struct buffer_head *new_dir_bh = NULL;
 	__u32 oldfe_flags = FLAG_RELEASE_DENTRY | FLAG_FILE_RENAME;
@@ -1019,10 +1008,10 @@
 	/* if old and new are the same, this'll just do one lock. */
 	status = ocfs_double_lock(osb, handle, 
 				  OCFS_LKM_EXMODE, 
-				  dir_lock_flags,
+				  FLAG_DIR,
 				  &old_dir_bh, old_dir,
 				  OCFS_LKM_EXMODE, 
-				  dir_lock_flags,
+				  FLAG_DIR,
 				  &new_dir_bh, new_dir);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
@@ -1483,8 +1472,7 @@
 	}
 
 	/* lock the parent directory */
-	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
-				   FLAG_FILE_CREATE | FLAG_DIR, 
+	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR, 
 				   &parent_fe_bh, dir);
 	if (status < 0) {
 		if (status != -EINTR)
@@ -1492,8 +1480,7 @@
 		goto abort_trans;
 	}
 
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-		FLAG_FILE_CREATE|FLAG_DIR, dir);
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, dir);
 
 	status = ocfs_mknod_locked(osb, dir, dentry, 
  				   S_IFLNK | S_IRWXUGO, 0,
@@ -1516,6 +1503,9 @@
 
 	ocfs_inode_set_new(osb, inode);
 	ocfs_init_lockres(osb, inode);
+	status = ocfs_update_lockres(osb, new_fe_bh, 0, inode, 0, 0);
+	if (status < 0)
+		LOG_ERROR_STATUS(status);
 
 	status = ocfs_extend_file(osb, newsize, handle, inode, NULL, 0,
 				  new_fe_bh);
@@ -1951,15 +1941,14 @@
 	ocfs_handle_add_inode(handle, orphan_dir_inode);
 
 	/* disk lock orphan dir here. */
-	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
-				   FLAG_FILE_CREATE | FLAG_DIR,
+	status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR,
 				   &orphan_dir_bh, orphan_dir_inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
-	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 
-			     FLAG_FILE_CREATE | FLAG_DIR, orphan_dir_inode);
+	ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, 
+			     orphan_dir_inode);
 
 	status = __ocfs_add_entry(handle, orphan_dir_inode, name, namelen, 
 				  inode, OCFS_I(inode)->ip_blkno, 

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/nm.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -79,6 +79,8 @@
 					__u32 node_num, u32 num_rel);
 
 static void ocfs_process_vote_worker(void *val);
+static int ocfs_process_inode_delete(struct inode *inode);
+static void ocfs_commit_inode_delete(struct inode *inode);
 
 #ifdef VERBOSE_PROCESS_VOTE
 static const char *process_vote_strings[] = {
@@ -86,7 +88,6 @@
 	"UPDATE_OIN_INODE",     // update both oin and inode
 	"DELETE_ACQUIRE",// delete or rename request
 	"DELETE_RELEASE",// delete or rename release request
-	"RELEASE_CACHE",        // release a cache lock I hold
 	"CHANGE_MASTER",        // request to change master to requestor
 	"NOT_MASTER",           // I am not master, retry
 	"REMASTER_THIS",        // remaster lock to me
@@ -451,7 +452,6 @@
 	return 0;
 }				/* ocfs_volume_thread */
 
-
 static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, 
 					  __u32 flags, int status, int *master_alive, struct inode *inode)
 {
@@ -513,9 +513,7 @@
 			vote_type = DELETE_ACQUIRE;
 		else
 			vote_type = INVALID_REQUEST;
-	} else if (flags & FLAG_FILE_RELEASE_CACHE)
-		vote_type = RELEASE_CACHE;
-	else if (flags & FLAG_FILE_UPDATE_OIN) {
+	} else if (flags & FLAG_FILE_UPDATE_OIN) {
 		if ((flags & FLAG_FILE_TRUNCATE) &&
 		    (flags & FLAG_ACQUIRE_LOCK))
 			vote_type = TRUNCATE_PAGES;
@@ -540,8 +538,11 @@
 	}
 	
 	if (inode == NULL && 
-		(vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES))
+	    (vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES)) {
+		printk("inode is null and it's not a delete release or a truncate pages!\n");
 		vote_type = INVALID_REQUEST;
+	}
+
 done:
 	LOG_EXIT_STATUS(vote_type);
 	return vote_type;
@@ -732,6 +733,61 @@
 
 #endif
 
+static int ocfs_process_inode_delete(struct inode *inode)
+{
+	int status;
+
+	LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
+		       "lnk_cnt = %u\n", inode->i_ino, 
+		       inode->i_nlink);
+
+	/* force this as ours may be out of date. */
+	inode->i_nlink = 0;
+
+	down (&(OCFS_I(inode)->ip_sem));
+	/* vote no if the file is still open. */
+	if (OCFS_I(inode)->ip_open_cnt > 0) {
+#ifdef VERBOSE_PROCESS_VOTE
+		printk("process_vote: open count = %u\n", 
+		       OCFS_I(inode)->ip_open_cnt);
+#endif
+		up(&(OCFS_I(inode)->ip_sem));
+		status = 0;
+		goto done;
+	}
+	up(&(OCFS_I(inode)->ip_sem));
+
+	/* vote no if someone's extending it. */
+	spin_lock(&oin_num_ext_lock);
+	if (OCFS_I(inode)->ip_num_extends) {
+		spin_unlock(&oin_num_ext_lock);
+#ifdef VERBOSE_PROCESS_VOTE
+		printk("process_vote: extends pending\n");
+#endif
+		status = 0;
+		goto done;
+	}
+	spin_unlock(&oin_num_ext_lock);
+
+	status = 1;
+done:
+	return(status);
+}
+
+static void ocfs_commit_inode_delete(struct inode *inode)
+{
+	SET_INODE_DELETED(inode);
+	/* if we vote yes, then we set the SKIP_DELETE
+	 * flag on the inode so we don't try to delete
+	 * it in delete_inode ourselves. */
+	OCFS_SET_FLAG(OCFS_I(inode)->ip_flags, 
+		      OCFS_INODE_SKIP_DELETE);
+
+	d_prune_aliases (inode);
+	sync_mapping_buffers(inode->i_mapping);
+	ocfs_truncate_inode_pages(inode, 0);
+}
+
 /*
  * ocfs_process_vote()
  *
@@ -752,13 +808,14 @@
 	struct buffer_head *fe_bh = NULL, *vote_bh = NULL;
 	int vote_type = INVALID_REQUEST, vote_response = 0;
 	struct inode *inode = NULL;
-	int master_alive = 1, is_dir = 0;
-	int is_locked, open_handle;
+	int master_alive = 1;
+	int open_handle = 0;
 	int lockflags = 0;
 	int inc_inode_seq = 0;
 	int disk_vote = (ctxt->request_method == DISK_VOTE);
 	int comm_vote = (ctxt->request_method == COMM_VOTE);
 	int have_io_sem = 0;
+	int change_master_succeeded = 0;
 	ocfs_publish *publish = (disk_vote ? ctxt->u.publish : NULL);
 	ocfs_dlm_msg *dlm_msg = (comm_vote ? ctxt->u.dlm_msg : NULL);
 	__s16 node_num = ctxt->node_num;
@@ -809,6 +866,10 @@
 		}
 	}
 
+	/* if we timeout on any of the locks, we want to send a retry
+	 * instead of letting the other guy's network timeout. */
+	vote_response = FLAG_VOTE_UPDATE_RETRY;
+
 	/* delete / rename is slightly different -- we don't want to
 	 * look up the inode in the release case -- it should already
 	 * be gone. Eventually what we'll really want to do is get it
@@ -832,7 +893,7 @@
 				       "node=%u, seq=%llu, flags=0x%x\n",
 				       lock_id, node_num, seq_num, flags);
 			LOG_ERROR_STATUS(status);
-			goto leave;
+			goto vote;
 		}
 
 		/* ahh, so you find yourself asking "what the
@@ -841,7 +902,7 @@
 		osb->voting_ino = inode->i_ino;
 
 #ifdef PROCESS_VOTE_TRYLOCK
-		if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
+		if (flags & FLAG_CHANGE_MASTER) {
 			if (ocfs_io_sem_write_trylock(inode, 
 						      (OCFS_NM_HEARTBEAT_TIME/2))) {
 				LOG_ERROR_ARGS("Could not get i/o lock on "
@@ -851,33 +912,34 @@
 				have_io_sem = 1;
 		}
 #else
-		    if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
+		if (flags & FLAG_CHANGE_MASTER) {
 			down_write(&OCFS_I(inode)->ip_io_sem);
 			have_io_sem = 1;
 		}
 #endif
 
 		lockres = GET_INODE_LOCKRES(inode);
-		status = ocfs_update_lockres (osb, NULL, NULL,
-					      (OCFS_NM_HEARTBEAT_TIME/2), 
-					      inode, 1, 0);
-		if (status < 0) {
-			if (status != -ETIMEDOUT)
-				LOG_ERROR_STATUS (status);
-			goto leave;
-		}
 
 		status = ocfs_acquire_lockres (lockres, (OCFS_NM_HEARTBEAT_TIME/2)); // ocfs_process_vote
 		if (status < 0) {
+			lockres = NULL;
 			LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
 				       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+			goto vote;
+		}
+
+		/* since we pass a NULL bh, this'll only do a read if
+		 * we're not the master. */
+		status = ocfs_update_lockres (osb, NULL, 
+					      (OCFS_NM_HEARTBEAT_TIME/2), 
+					      inode, 1, 1);
+		if (status < 0) {
+			if (status != -ETIMEDOUT)
+				LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 	}
 
-	lockflags = (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
-	       	? 0 : OCFS_BH_CACHED;
-
 no_inode_ok:
 	/* fail here if no inode, unless this is a delete/rename release */
 	vote_type = get_process_vote_action(osb, lockres, node_num, flags, 
@@ -889,7 +951,8 @@
 	       (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
  	       vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote", num_ident);
 	if (vote_type == INVALID_REQUEST)
-		printk("Invalid request! flags = 0x%x\n", flags);
+		printk("Invalid request! flags = 0x%x master=%d, level=%d\n", 
+		       flags, lockres->master_node_num, lockres->lock_state);
 
 #endif
 	/* get_process_vote_action will only allow CHANGE_MASTER and
@@ -912,6 +975,134 @@
 	}
 
 	vote_response = 0;
+
+	/* some lock requests need to be processed before a possible
+	 * change master. Beware however that the change_master might
+	 * very well send a no vote, so you can't do things here that
+	 * cannot be rolled back. */
+	switch (vote_type) {
+		case DELETE_ACQUIRE:
+			LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
+			if (!ocfs_process_inode_delete(inode))
+				vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
+			else 
+				vote_response = FLAG_VOTE_NODE;
+			break;
+		case INVALID_REQUEST:
+			/* we catch INVALID_REQUEST up here now as we
+			 * don't want to do a change_master on a
+			 * messed up vote... */
+			LOG_TRACE_STR("INVALID_REQUEST");
+			goto vote;
+		default:
+			break;
+	}
+
+	if (vote_response > FLAG_VOTE_NODE) {
+		/* we shouldn't even get to the other cases. */
+		goto vote;
+	}
+
+	if ((lockres->master_node_num == osb->node_num)
+	    && (flags & FLAG_CHANGE_MASTER)) {
+		LOG_TRACE_STR("CHANGE_MASTER");
+
+		status = -EFAIL;
+#ifdef VERBOSE_PROCESS_VOTE
+		printk("process_vote: doing CHANGE_MASTER for this request\n");
+#endif
+		/* requestor will need to retry if anyone is using the
+		 * lockres */
+		if (lockres->lock_holders > 0) {
+#ifdef VERBOSE_PROCESS_VOTE
+			printk("process_vote: Lock id (%llu) has %u "
+			       "holders\n",
+			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+			       lockres->lock_holders);
+#endif	
+			// kick the commit thread
+			atomic_set(&osb->flush_event_woken, 1);
+			wake_up(&osb->flush_event);
+
+			vote_response = FLAG_VOTE_UPDATE_RETRY;
+			status = 0;
+			goto vote;
+		}
+
+		if (!have_io_sem) {
+#ifdef VERBOSE_PROCESS_VOTE
+			printk("process_vote: We haven't got io_sem on"
+			       " Lock id (%llu)\n",
+			       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+#endif
+			vote_response = FLAG_VOTE_UPDATE_RETRY;
+			status = 0;
+			goto vote;
+		}
+
+		/* this is currently a readonly cache lock.
+		 * need to communicate to all the nodes in the 
+		 * map that lock will be changing to RW before we
+		 * continue.  RETRY this request while we spawn 
+		 * off a thread to collect up the communication */
+		if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
+			// assumption: node asking for vote has already dropped readonly_node
+			ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
+			// should not be in there, but...
+			ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
+			if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
+				OCFS_ASSERT(lockres->readonly_node == osb->node_num);
+				OCFS_ASSERT(inode);
+				status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
+				if (status < 0)
+					LOG_ERROR_STATUS(status);
+#ifdef VERBOSE_PROCESS_VOTE
+				printk("process_vote: node map is not "
+				       "empty on readonly drop "
+				       "request\n");
+#endif
+				vote_response = FLAG_VOTE_UPDATE_RETRY;
+				goto vote;
+			}
+			// noone left in map, so continue
+			lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+		}
+
+		sync_mapping_buffers(inode->i_mapping);
+
+#warning do we need a truncate_inode_pages here?
+		lockflags = lockres->master_node_num == osb->node_num ? 
+			OCFS_BH_CACHED : 0;
+		status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			goto leave;
+		}
+
+		fe = (ocfs2_dinode *) fe_bh->b_data;
+		DISK_LOCK(fe)->dl_master = node_num;
+		DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
+
+		/* need to do the write only if fe lock values
+		 * need to change */
+		status = ocfs_write_bh(osb, fe_bh, 0, inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			brelse(fe_bh);
+			goto leave;
+		}
+		lockres->master_node_num = node_num;
+		lockres->lock_type = OCFS_LKM_NLMODE;
+
+		brelse(fe_bh);
+		vote_response = FLAG_VOTE_NODE;
+		inc_inode_seq = 1;
+		status = 0;
+		change_master_succeeded = 1;
+	}
+
+	/* Below here, we can't have any of these cases failing if
+	 * there was a successfull change master request. */
 	open_handle = 0;
 	switch (vote_type) {
 		case UPDATE_OIN_INODE:
@@ -953,54 +1144,14 @@
 			break;
 
 		case DELETE_ACQUIRE:
-			LOG_TRACE_STR("DELETE_ACQUIRE");
+			LOG_TRACE_STR("DELETE_ACQUIRE (part two)");
+			/* If we got this far, then we assume we've
+			 * done the 1st part of the DELETE_ACQUIRE
+			 * case and we just have to commit it. */
+			if (vote_response != FLAG_VOTE_NODE)
+				BUG();
 
-			LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
-				       "lnk_cnt = %u\n", inode->i_ino, 
-				       inode->i_nlink);
-
-			/* force this as ours may be out of date. */
-			inode->i_nlink = 0;
-
-			down (&(OCFS_I(inode)->ip_sem));
-			/* vote no if the file is still open. */
-			if (OCFS_I(inode)->ip_open_cnt > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: open count = %u\n", 
-				       OCFS_I(inode)->ip_open_cnt);
-#endif
-				vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
-				up(&(OCFS_I(inode)->ip_sem));
-				break;
-			}
-			up(&(OCFS_I(inode)->ip_sem));
-
-			/* vote no if someone's extending it. */
-			spin_lock(&oin_num_ext_lock);
-			if (OCFS_I(inode)->ip_num_extends) {
-				spin_unlock(&oin_num_ext_lock);
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: extends pending\n");
-#endif
-				vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
-				break;
-			}
-			spin_unlock(&oin_num_ext_lock);
-
-			vote_response = FLAG_VOTE_NODE;
-
-			SET_INODE_DELETED(inode);
-			/* if we vote yes, then we set the SKIP_DELETE
-			 * flag on the inode so we don't try to delete
-			 * it in delete_inode ourselves. */
-			OCFS_SET_FLAG(OCFS_I(inode)->ip_flags, 
-				      OCFS_INODE_SKIP_DELETE);
-
-			inode->i_nlink = 0;
-			d_prune_aliases (inode);
-			sync_mapping_buffers(inode->i_mapping);
-			ocfs_truncate_inode_pages(inode, 0);
-			/* Alright, for the YES case, we're done here. */
+			ocfs_commit_inode_delete(inode);
 			break;
 
 		case READONLY:
@@ -1034,126 +1185,10 @@
 			status = 0;
 			break;
 
-		case RELEASE_CACHE:
 		case CHANGE_MASTER:
-#warning nm thread could get this too early
-			if (vote_type == RELEASE_CACHE)
-				LOG_TRACE_STR("RELEASE_CACHE");
-			else
-				LOG_TRACE_STR("CHANGE_MASTER");
+			/* we dealt with this all above. */
+			break;
 
-			status = -EFAIL;
-
-			/* requestor will need to retry if anyone is using the lockres */
-			if (lockres->lock_holders > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: Lock id (%llu) has %u "
-				       "holders\n",
-				       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-				       lockres->lock_holders);
-#endif
-				// kick the commit thread
-				atomic_set(&osb->flush_event_woken, 1);
-				wake_up(&osb->flush_event);
-
-				vote_response = FLAG_VOTE_UPDATE_RETRY;
-				status = 0;
-				break;
-			}
-
-			if (!have_io_sem) {
-#ifdef VERBOSE_PROCESS_VOTE
-				printk("process_vote: We haven't got io_sem on"
-				       " Lock id (%llu)\n",
-				       OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-#endif
-				vote_response = FLAG_VOTE_UPDATE_RETRY;
-				status = 0;
-				break;
-			}
-
-			/* this is currently a readonly cache lock.
-			 * need to communicate to all the nodes in the 
-			 * map that lock will be changing to RW before we
-			 * continue.  RETRY this request while we spawn 
-			 * off a thread to collect up the communication */
-			if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
-				// assumption: node asking for vote has already dropped readonly_node
-				ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
-				// should not be in there, but...
-				ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
-				if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
-					OCFS_ASSERT(lockres->readonly_node == osb->node_num);
-					OCFS_ASSERT(inode);
-					status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
-					if (status < 0)
-						LOG_ERROR_STATUS(status);
-#ifdef VERBOSE_PROCESS_VOTE
-					printk("process_vote: node map is not "
-					       "empty on readonly drop "
-					       "request\n");
-#endif
-					vote_response = FLAG_VOTE_UPDATE_RETRY;
-					break;
-				}
-				// noone left in map, so continue
-				lockres->readonly_node = OCFS_INVALID_NODE_NUM;
-			}
-
-			sync_mapping_buffers(inode->i_mapping);
-
-#warning do we need a truncate_inode_pages here?
-
-			/* nobody currently owns the lock so fastpath it */
-			if (vote_type == CHANGE_MASTER)
-				lockres->master_node_num = node_num;
-
-//			if (oin != NULL) {
-				lockres->lock_type = OCFS_LKM_NLMODE;
-//			}
-
-			status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
-			if (status < 0) {
-				LOG_ERROR_STATUS (status);
-				break;
-			}
-	
-			fe = (ocfs2_dinode *) fe_bh->b_data;
-			is_dir = S_ISDIR(fe->i_mode);
-			is_locked = DISK_LOCK(fe)->dl_level > OCFS_LKM_NLMODE;
-			if (vote_type == CHANGE_MASTER) {
-				fe = (ocfs2_dinode *) fe_bh->b_data;
-				DISK_LOCK(fe)->dl_master = node_num;
-			}
-
-			if (is_dir && vote_type == RELEASE_CACHE)
-				ocfs_inc_inode_seq(osb, inode);
-
-			/* need to do the write only if fe lock values need to change */
-			if (is_locked || vote_type == CHANGE_MASTER) {
-				if (vote_type == RELEASE_CACHE) {
-					LOG_TRACE_STR("release cache vote, setting to NO_LOCK");
-					fe = (ocfs2_dinode *) fe_bh->b_data;
-					DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
-				}
-				status = ocfs_write_bh(osb, fe_bh, 0, inode);
-				if (status < 0) {
-					LOG_ERROR_STATUS (status);
-					brelse(fe_bh);
-					break;
-				}
-				if (vote_type == RELEASE_CACHE) {
-					lockres->lock_type = OCFS_LKM_NLMODE;
-				}
-				else // CHANGE_MASTER
-					lockres->master_node_num = node_num;
-			} 
-			brelse(fe_bh);
-			vote_response = FLAG_VOTE_NODE;
-			inc_inode_seq = 1;
-			status = 0;
-			break;
-		
 		case DROP_READONLY:
 			/* TODO: may need locking in here to lock out 
 			 * the actual IO that a readdir may have in 
@@ -1203,7 +1238,7 @@
 			LOG_TRACE_STR("REMASTER_REQUESTOR");
 			vote_response = FLAG_VOTE_NODE;
 			if (inode && OCFS_I(inode)->ip_open_cnt 
-			    && flags & (FLAG_FILE_EXTEND|FLAG_FILE_UPDATE) 
+			    && flags & FLAG_FILE_EXTEND 
 			    && !(flags & FLAG_DIR))
 				open_handle = 1;
 			break;
@@ -1232,6 +1267,10 @@
 			break;
 	}
 
+	if (change_master_succeeded && 
+	    ((vote_response == 0) || (vote_response > FLAG_VOTE_OIN_UPDATED)))
+	    BUG();
+
 	if(have_io_sem && inode)
 		up_write(&OCFS_I(inode)->ip_io_sem);
 	have_io_sem = 0;
@@ -1255,6 +1294,7 @@
 		}
 	}
 
+vote:
 	if (disk_vote) {
 		vote = (ocfs_vote *) vote_bh->b_data;
 		vote->lock_id = lock_id;
@@ -1288,10 +1328,12 @@
 		}
 	}
 
-	if (lockres)
+leave:
+	if (lockres) {
 		ocfs_release_lockres (lockres); // ocfs_process_vote
+		lockres = NULL;
+	}
 
-leave:
 	if (!inode)
 		goto no_inode_leave;
 

Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/ocfs.h	2004-08-19 22:18:00 UTC (rev 1364)
@@ -81,7 +81,6 @@
 	UPDATE_OIN_INODE,     // update both oin and inode
 	DELETE_ACQUIRE,// delete or rename acquire request
 	DELETE_RELEASE,// delete or rename release request
-	RELEASE_CACHE,        // release a cache lock I hold
 	CHANGE_MASTER,        // request to change master to requestor
 	NOT_MASTER,           // I am not master, retry
 	REMASTER_THIS,        // remaster lock to me
@@ -120,41 +119,39 @@
 **   by ocfs_create_modify_file
 */
 // FILEFLAG MASK
-#define  FLAG_FILE_CREATE         0x00000001
-#define  FLAG_FILE_EXTEND         0x00000002
-#define  FLAG_FILE_DELETE         0x00000004
-#define  FLAG_FILE_RENAME         0x00000008
-#define  FLAG_FILE_UPDATE         0x00000010
+#define  FLAG_ACQUIRE_LOCK        0x00000001
+#define  FLAG_RELEASE_LOCK        0x00000002
+#define  FLAG_FILE_EXTEND         0x00000004
+#define  FLAG_FILE_DELETE         0x00000008
+#define  FLAG_FILE_RENAME         0x00000010
 #define  FLAG_FILE_RECOVERY       0x00000020
-#define  FLAG_FILE_CREATE_DIR     0x00000040
-#define  FLAG_FILE_UPDATE_OIN     0x00000080
-#define  FLAG_FILE_RELEASE_MASTER 0x00000100
-#define  FLAG_RELEASE_DENTRY      0x00000200
-#define  FLAG_CHANGE_MASTER       0x00000400
-#define  FLAG_FILE_UNUSED5        0x00000800
-#define  FLAG_DIR                 0x00001000
-#define  FLAG_REMASTER            0x00002000
-#define  FLAG_FAST_PATH_LOCK      0x00004000
-#define  FLAG_TRUNCATE_PAGES      0x00008000
-#define  FLAG_FILE_UNUSED6        0x00010000
-//#define  FLAG_DEL_NAME            0x00020000
-//#define  FLAG_DEL_INODE           0x00040000
-#define  FLAG_FILE_UNUSED7        0x00080000
-#define  FLAG_FILE_UNUSED8        0x00100000
-#define  FLAG_FILE_UNUSED9        0x00200000
-#define  FLAG_FILE_RELEASE_CACHE  0x00400000
-#define  FLAG_FILE_UNUSED10       0x00800000
-#define  FLAG_FILE_UNUSED11       0x01000000
-#define  FLAG_FILE_UNUSED12       0x02000000
-#define  FLAG_FILE_UNUSED13       0x04000000
-#define  FLAG_FILE_TRUNCATE       0x08000000
-#define  FLAG_DROP_READONLY       0x10000000 
-#define  FLAG_READDIR             0x20000000 
-#define  FLAG_ACQUIRE_LOCK        0x40000000 
-#define  FLAG_RELEASE_LOCK        0x80000000 
-									    
+#define  FLAG_FILE_UPDATE_OIN     0x00000040
+#define  FLAG_RELEASE_DENTRY      0x00000080
+#define  FLAG_CHANGE_MASTER       0x00000100
+#define  FLAG_DIR                 0x00000200
+#define  FLAG_REMASTER            0x00000400
+#define  FLAG_FAST_PATH_LOCK      0x00000800
+#define  FLAG_TRUNCATE_PAGES      0x00001000
+#define  FLAG_FILE_TRUNCATE       0x00002000
+#define  FLAG_DROP_READONLY       0x00004000
+#define  FLAG_READDIR             0x00008000
+#define  FLAG_FILE_UNUSED01       0x00010000
+#define  FLAG_FILE_UNUSED02       0x00020000
+#define  FLAG_FILE_UNUSED03       0x00040000
+#define  FLAG_FILE_UNUSED04       0x00080000
+#define  FLAG_FILE_UNUSED05       0x00100000
+#define  FLAG_FILE_UNUSED06       0x00200000
+#define  FLAG_FILE_UNUSED07       0x00400000
+#define  FLAG_FILE_UNUSED08       0x00800000
+#define  FLAG_FILE_UNUSED09       0x01000000
+#define  FLAG_FILE_UNUSED10       0x02000000
+#define  FLAG_FILE_UNUSED11       0x04000000
+#define  FLAG_FILE_UNUSED12       0x08000000
+#define  FLAG_FILE_UNUSED13       0x10000000
+#define  FLAG_FILE_UNUSED14       0x20000000
+#define  FLAG_FILE_UNUSED15       0x40000000
+#define  FLAG_FILE_UNUSED16       0x80000000
 
-
 #define  OCFS_MAX_OSB_ID             65536
 
 

Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/ocfs_journal.h	2004-08-19 22:18:00 UTC (rev 1364)
@@ -119,16 +119,23 @@
 /* convenience function to check if an inode has been checkpointed
  * yet. Replaces ocfs_journal_new_file_search. Will do you a favor and
  * set created_trans = 0 when you've been checkpointed. 
- * returns '0' if the inode hasn't been checkpointed yet, 1 otherwise
+ * returns '1' if the inode hasn't been checkpointed yet.
+ *
  */
 static inline int ocfs_inode_is_new(ocfs_super *osb, 
 				    struct inode *inode)
 {
 	int ret;
+
+	/* System files are never "new" as they're written out by
+	 * mkfs. This helps us early during mount, before we have the
+	 * journal open and trans_id could be junk. */
+	if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
+		return(0);
 	spin_lock(&trans_inc_lock);
-	ret = time_after(osb->journal->trans_id, 
-			 OCFS_I(inode)->ip_created_trans);
-	if (ret)
+	ret = !(time_after(osb->journal->trans_id, 
+			 OCFS_I(inode)->ip_created_trans));
+	if (!ret)
 		OCFS_I(inode)->ip_created_trans = 0;
 	spin_unlock(&trans_inc_lock);
 	return ret;

Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c	2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/vote.c	2004-08-19 22:18:00 UTC (rev 1364)
@@ -898,7 +898,7 @@
 	spin_lock (&obj->lock);
 	if (obj->vote_status >= 0 && obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
 		LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
-	} else if (obj->vote_status != -EAGAIN || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
+	} else if ((obj->vote_status != -EAGAIN && obj->vote_status != -EBUSY) || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
 #warning "should we even be erroring here at all!"
 		LOG_ERROR_ARGS("inode %llu, vote_status=%d, vote_state=%d, "
 			       "lockid=%llu, flags = 0x%x, asked type = %u "



More information about the Ocfs2-commits mailing list