[Ocfs2-commits] mfasheh commits r1351 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Aug 12 15:39:11 CDT 2004


Author: mfasheh
Date: 2004-08-12 14:39:10 -0500 (Thu, 12 Aug 2004)
New Revision: 1351

Modified:
   trunk/src/dlm.c
   trunk/src/dlm.h
   trunk/src/file.c
   trunk/src/file.h
   trunk/src/journal.c
   trunk/src/lockres.c
   trunk/src/namei.c
   trunk/src/nm.c
   trunk/src/ocfs.h
   trunk/src/ocfs2_fs.h
   trunk/src/super.c
   trunk/src/vote.c
   trunk/src/vote.h
Log:
* We no longer have an open_map in the on disk lock structure. This
  takes the number of used fields in that structure down to basically
  owner and type.

* During 1st open, we also no longer force a release_cache message
  which was way to heavy as it required a lock level change, rather
  than just syncing the inode. Instead we simply send the
  TRUNCATE_INODE_PAGES message which does a sync anyway. The really
  good thing about this is that there's no meta data I/O on the
  recieving nodes now, and no locks need to be taken either.

* as a result some changes had to be made in the extend counting - we
  always count now, not just when we have the file open as we no
  longer force the owning node to finish all his extends before we can
  open it.

* The open_map changes fix the deadlock described in bugzilla #118

* In ocfs_notify_on_open we were doing an ocfs_sleep somewhat
  prematurely.

* There was an erroneous up_write in the error handling in
  ocfs_file_open.

IMPORTANT NOTE: 
  We will be incompatible with older revisions as they
  still think there's an open map, so you'd better
  upgrade all your nodes :)



Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/dlm.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -57,10 +57,10 @@
 
 static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
 
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u32 num_ident, ocfs_node_map *open_map);
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map);
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u32 num_ident);
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num);
 static int ocfs_reset_voting (ocfs_super * osb);
-static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map);
+static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num);
 static int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct inode *inode, __u32 num_ident);
 
 
@@ -75,22 +75,7 @@
 	ocfs_node_map_set_from_disk(vote_map, publish->vote_map);
 }
 
-void ocfs_set_disk_lock_open_map(ocfs_super *osb,
-				 ocfs2_disk_lock *lock,
-				 ocfs_node_map *open_map)
-{
-	ocfs_node_map_set_to_disk(lock->dl_node_map, open_map);
-}
 
-void ocfs_get_disk_lock_open_map(ocfs_super *osb,
-				 ocfs2_disk_lock *lock,
-				 ocfs_node_map *open_map)
-{
-	ocfs_node_map_init(osb, open_map);
-	ocfs_node_map_set_from_disk(open_map, lock->dl_node_map);
-}
-	
-
 /* inode is definitely non NULL */
 static int ocfs_disk_request_vote(ocfs_super *osb, __u64 lock_id,
 				  __u32 lock_type, __u32 flags,
@@ -98,8 +83,7 @@
 				  __u64 *lock_seq_num,
 				  __u32 time_to_wait,
 				  struct inode *inode,
-				  __u32 num_ident,
-				  ocfs_node_map *open_map)
+				  __u32 num_ident)
 {
 	int status = 0;
 	__u64 pub_off;
@@ -203,7 +187,7 @@
 
 	if (status >= 0) {
 		status = ocfs_wait_for_vote (osb, lock_id, lock_type, flags, vote_map, 
-					     time_to_wait, *lock_seq_num, open_map);
+					     time_to_wait, *lock_seq_num);
 		if (status < 0 && status != -EAGAIN)
 			LOG_ERROR_STATUS (status);
 	}
@@ -213,165 +197,6 @@
 }				/* ocfs_disk_request_vote */
 
 /*
- * ocfs_wait_for_lock_release()
- * inode is definitely non NULL
- */
-int ocfs_wait_for_lock_release (ocfs_super * osb, __u64 offset, __u32 time_to_wait, __u32 lock_type, struct inode *inode)
-{
-	int status = -ETIMEDOUT;
-	int tmpstat = -ETIMEDOUT;
-	__u32 timewaited = 0;
-	ocfs2_dinode *fe = NULL;
-	struct buffer_head *bh = NULL;
-	__s16 curr_master;
-	__u8 lock_level;
-	int disk_vote = 0;
-	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
-	LOG_ENTRY_ARGS ("(0x%p, %llu, %u, 0x%p, %u)\n",
-			osb, offset, time_to_wait,
-			lockres, lock_type);
-
-	while (time_to_wait > timewaited) {
-		bh = NULL;
-		fe = NULL;
-
-		tmpstat = ocfs_read_bh (osb, offset, &bh, 0, inode);
-		if (tmpstat < 0) {
-			LOG_ERROR_STATUS (status = tmpstat);
-			goto finally;
-		}
-		fe = (ocfs2_dinode *) bh->b_data;
-		curr_master = DISK_LOCK(fe)->dl_master;
-		lock_level = DISK_LOCK(fe)->dl_level;
-
-		if ((curr_master == OCFS_INVALID_NODE_NUM) ||
-		    (curr_master == osb->node_num)) {
-			goto got_it;
-		}
-		
-		if ((!ocfs_node_is_alive(&osb->publ_map, curr_master)) && 
-		    (!ocfs_node_is_recovering(osb, curr_master))) {
-			/* Reset the lock as not owned and return success?? */
-			/* This needs to be under some sort of cluster wide lock, */
-			DISK_LOCK(fe)->dl_master = OCFS_INVALID_NODE_NUM;
-			DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
-			tmpstat = ocfs_write_bh (osb, bh, 0, NULL);
-			if (tmpstat < 0) {
-				LOG_ERROR_STATUS (status = tmpstat);
-			}
-			goto got_it;
-		}
-
-		/* The local node is not the master */
-		if (lock_level == OCFS_LKM_EXMODE) {
-			ocfs_acquire_lockres(lockres, 0); // ocfs_file_open ocfs_symlink
-			lockres->lock_type = lock_level;
-			lockres->master_node_num = curr_master;
-				
-			if (S_ISDIR(fe->i_mode)) {
-				if (lockres->readonly_node != OCFS_INVALID_NODE_NUM) {
-					if (lockres->readonly_node == curr_master) {
-						// readonly cachelock already on this dir
-#ifdef VERBOSE_LOCKING_TRACE
-						printk("ocfs_wait_for_lock_release: ronode=master=%d\n", curr_master);
-#endif
-						ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
-						goto got_it;
-					} else {
-						LOG_ERROR_ARGS("(1) readonly node changed! was %d, now master is %d\n",
-						       	lockres->readonly_node, curr_master);
-						lockres->readonly_node = OCFS_INVALID_NODE_NUM;
-					}
-				}
-
-				// no readonly node, need to alert owner to get readonly access
-				status = new_lock_function(osb, lockres->lock_type, 
-							   FLAG_DIR | FLAG_READDIR | FLAG_ACQUIRE_LOCK,
-							   bh, &disk_vote, inode);
-				if (status < 0) {
-					ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
-					if (status == -EAGAIN) {
-						if (ocfs_task_interruptible ()) {
-							LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
-								offset);
-							status = -EINTR;
-							goto finally;
-						}
-						goto again;
-					}
-					goto finally;
-				}
-
-#ifdef VERBOSE_LOCKING_TRACE
-				printk("waitforlockrelease: setting ronode, was=%d, now=%d\n", lockres->readonly_node, lockres->master_node_num);
-				printk("ocfs_wait_for_lock_release: cache->readonly ronode=master=%d\n", curr_master);
-#endif
-				lockres->readonly_node = lockres->master_node_num;	
-				ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
-				goto got_it;
-			} else {
-				for (status = -EAGAIN; status==-EAGAIN; ) {
-					status = new_lock_function(osb, OCFS_LKM_NLMODE, 
-						   FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE,
-						   NULL, &disk_vote, inode);
-					if (status == -EAGAIN)
-						ocfs_sleep(500);
-				}
-				ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
-				if (status < 0) {
-					if (status != -EINTR)
-						LOG_ERROR_STATUS (status);
-					goto finally;
-				}
-				tmpstat = ocfs_read_bh (osb, offset, &bh, 0, inode);
-				if (tmpstat < 0) {
-					LOG_ERROR_STATUS (tmpstat);
-					status = tmpstat;
-					goto finally;
-				}
-#warning "isn't this broken!?!"
-				LOG_TRACE_ARGS("broke cache lock, setting to NO_LOCK\n");
-				fe = (ocfs2_dinode *) bh->b_data;
-				DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
-				lock_level = OCFS_LKM_NLMODE;
-				tmpstat = ocfs_write_bh (osb, bh, 0, inode);
-				if (tmpstat < 0) {
-					LOG_ERROR_STATUS (tmpstat);
-					status = tmpstat;
-					goto finally;
-				}
-			}
-		}
-
-
-		if (lock_level <= lock_type)
-			goto got_it;
-
-again:	
-		brelse(bh);
-		ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
-		timewaited += WAIT_FOR_VOTE_INCREMENT;
-		continue;
-got_it:
-		status = 0;
-		break;
-	}
-
-finally:
-	if (lockres && status >= 0)
-		status = ocfs_update_lockres(osb, &bh, NULL, 0, inode,
-					     0, 0);
-
-	if (bh)
-		brelse(bh);
-
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_wait_for_lock_release */
-
-
-/*
  * ocfs_get_vote_on_disk()
  *
  */
@@ -379,8 +204,7 @@
 				 __u32 lock_type, __u32 flags,
 				 ocfs_node_map *got_vote_map,
 				 ocfs_node_map *vote_map,
-				 __u64 lock_seq_num,
-				 ocfs_node_map *oin_open_map)
+				 __u64 lock_seq_num)
 {
 	int status = 0;
 	__u32 i;
@@ -426,7 +250,6 @@
 			ocfs_vote_reply_ctxt ctxt;
 			ctxt.reply_method = DISK_VOTE;
 			ctxt.got_vote_map = got_vote_map;
-			ctxt.open_map = oin_open_map;
 			ctxt.status = &status;
 			ctxt.flags = flags;
 			ctxt.u.vote = vote;
@@ -455,7 +278,7 @@
  * ocfs_wait_for_vote()
  *
  */
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map)
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num)
 {
 	int status = -EAGAIN;
 	__u32 timewaited = 0;
@@ -478,7 +301,7 @@
 		}
 
 		status = ocfs_get_vote_on_disk (osb, lock_id, lock_type, flags,
-				&gotvotemap, vote_map, lock_seq_num, open_map);
+				&gotvotemap, vote_map, lock_seq_num);
 		if (status < 0) {
 			if (status != -EAGAIN)
 				LOG_ERROR_STATUS (status);
@@ -611,24 +434,6 @@
 		DISK_LOCK(fe)->dl_level = lockres->lock_type;
 	}
 
-	if (flags & DLOCK_FLAG_OPEN_MAP) {
-		ocfs_node_map disk_map;
-		
-		ocfs_node_map_init(osb, &disk_map);
-		ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-		
-		if (flags & DLOCK_FLAG_ADD_SELF)
-			ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
-
-		if (!ocfs_node_map_is_equal(&disk_map, &lockres->oin_openmap)) {
-			if (!changed) {
-				fe = (ocfs2_dinode *) (*bh)->b_data;
-			}
-			changed = 1;
-			ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
-	       }
-	}
-
 	if (flags & DLOCK_FLAG_SEQ_NUM &&
 	    DISK_LOCK(fe)->dl_seq_num != lockres->last_upd_seq_num) {
 		if (!changed) {
@@ -656,89 +461,17 @@
 }				/* ocfs_update_disk_lock */
 
 /*
- * ocfs_update_master_on_open()
- * inode is definitely non NULL
+ * ocfs_notify_on_open()
+ *
+ * TODO: if we're not opening for write then lets send an additional
+ * flag over to tell the other node it's not necessary to do the
+ * truncate_inode_pages (he just has to sync).
  */
-int ocfs_update_master_on_open (ocfs_super * osb, struct inode *inode, ocfs_journal_handle *handle)
+int ocfs_notify_on_open (ocfs_super * osb, struct inode *inode)
 {
 	int status = -EAGAIN;
 	int disk_vote = 0;
-	struct buffer_head *bh = NULL;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
-	LOG_ENTRY ();
-
-	/* no need to alert master for these cases */
-	if (S_ISDIR (inode->i_mode) ||
-	    lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
-	     ocfs_node_is_alive(&lockres->oin_openmap,
-				osb->node_num)) {
-		status = 0;
-		goto bail;
-	}
-
-	while (status == -EAGAIN) {
-		if (!ocfs_node_is_alive(&osb->publ_map,
-					lockres->master_node_num)) {
-			LOG_TRACE_ARGS ("Master (%u) dead, lockid %llu\n",
-				lockres->master_node_num,
-				OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-			status = 0;
-			goto bail;
-		}
-
-		ocfs_acquire_lockres(lockres, 0); // ocfs_file_open ocfs_symlink
-
-		if (lockres->master_node_num == osb->node_num) {
-			ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
-			status = ocfs_update_disk_lock (osb, DLOCK_FLAG_OPEN_MAP, 
-							&bh, inode, handle);
-			if (bh)
-				brelse(bh);
-
-			if (status < 0) {
-				LOG_ERROR_STATUS (status);
-				goto bail;
-			}
-			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
-		} else {
-			status = new_lock_function(osb, lockres->lock_type, FLAG_ADD_OIN_MAP, 
-						   NULL, &disk_vote, inode);
-			if (status < 0) {
-				if (status != -EAGAIN)
-					LOG_ERROR_STATUS (status);
-				ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
-				if (status == -EAGAIN) {
-					ocfs_sleep (500);
-					if (ocfs_task_interruptible ()) {
-						LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
-							OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-						status = -EINTR;
-						goto bail;
-					}
-					continue;
-				}
-
-				goto bail;
-			}
-			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
-		}
-	}
-
-bail:
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_update_master_on_open */
-
-/*
- * ocfs_notify_truncate_on_wopen()
- * inode is definitely non NULL
- */
-int ocfs_notify_truncate_on_wopen (ocfs_super * osb, struct inode *inode)
-{
-	int status = -EAGAIN;
-	int disk_vote = 0;
-	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 	u32 flags;
 
 	LOG_ENTRY ();
@@ -757,13 +490,13 @@
 				LOG_ERROR_STATUS (status);
 			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
 			if (status == -EAGAIN) {
-				ocfs_sleep (50);
 				if (ocfs_task_interruptible ()) {
 					LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
 					OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
 					status = -EINTR;
 					goto bail;
 				}
+				ocfs_sleep (50);
 				continue;
 			}
 
@@ -775,7 +508,7 @@
 bail:
 	LOG_EXIT_STATUS (status);
 	return status;
-}				/* ocfs_notify_truncate_on_wopen */
+}				/* ocfs_notify_on_open */
 
 enum {
 	invalid_path = 0,
@@ -867,8 +600,6 @@
 		    lockres->master_node_num != DISK_LOCK(disklock)->dl_master) {
 			lockres->master_node_num = DISK_LOCK(disklock)->dl_master;
 			lockres->lock_type = DISK_LOCK(disklock)->dl_level;
-			ocfs_get_disk_lock_open_map(osb, DISK_LOCK(disklock), 
-						    &lockres->oin_openmap);
 		}
 		updated = 1;
 	}
@@ -1011,14 +742,19 @@
 		}
 		goto finally;
 	} 
-			
+
 	/* We got the lock */
 	status = 0;
 
 skip_lock_write:
 	OCFS_ASSERT(status == 0);
 	lockres->lock_holders++;
+	if (extra_lock_flags == FLAG_FAST_PATH_LOCK 
+	    && ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_TRUNCATE)))
+		lockres->uncommitted_holders++;
 	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
+	LOG_TRACE_ARGS("lockres->uncommitted_holders = %u\n", 
+		       lockres->uncommitted_holders);
 	ocfs_release_lockres (lockres); // ocfs_acquire_lock
 
 finally:
@@ -1044,32 +780,11 @@
 	int disk_vote = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 
-	/* TODO: the ifdefed out code here used to be used
-	 *       right now, we *broadcast* on lock releases
-	 *       because of the issues surrounding lookup_file_allocation
-	 *       need to restrict this to only the openmap
-	 *       at some later date */
-#if 0
-	ocfs_node_map openmap;
-#endif
-
 	LOG_ENTRY_ARGS ("(0x%p, %llu, %u, %u, 0x%p)\n",
 			osb, lock_id, lock_type, flags, lockres);	
 	
 	/* TODO: figure out how to properly handle inode updates w/no oin */
-	ocfs_node_map_dup(osb, &votemap, &osb->publ_map);  // temporary hack, forces broadcast
-
-#if 0
-	ocfs_node_map_init(osb, &openmap);
-	ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &openmap);
-
-	/* publish_map & openmap for operations that dont need broadcast */
-	if (!(flags & (FLAG_FILE_RENAME | FLAG_FILE_DELETE | 
-		       FLAG_FILE_RELEASE_DENTRY | FLAG_FILE_TRUNCATE |
-		       FLAG_FILE_EXTEND)))
-		ocfs_node_map_and(&votemap, &openmap);
-#endif
-
+	ocfs_node_map_dup(osb, &votemap, &osb->publ_map);
 	ocfs_node_map_clear_bit(&votemap, osb->node_num);
 	if (ocfs_node_map_is_empty(&votemap))
 		goto finally;
@@ -1089,7 +804,7 @@
 		if (!disk_vote) {
 			LOG_TRACE_STR ("Network vote");
 			status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags, 
-							    &votemap, NULL, inode, num_ident, &vote_status);
+							    &votemap, inode, num_ident, &vote_status);
 			if (status >= 0 || status == -EAGAIN) {
 				if (status != -EAGAIN)
 					status = vote_status;
@@ -1133,7 +848,7 @@
 		LOG_TRACE_STR ("Disk vote");
 		disk_vote = 1;
 
-		status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, num_ident, NULL);
+		status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, num_ident);
 		tmpstat = ocfs_reset_voting (osb);
 
 		if (status >=0) {
@@ -1167,6 +882,7 @@
 	int status = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 	__u64 lock_id;
+	__u32 num_to_send = num_ident;
 
 	LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
 			osb, lock_type, flags, lockres);
@@ -1201,14 +917,28 @@
 	if (flags & FLAG_FILE_RELEASE_MASTER)
 		lockres->master_node_num = OCFS_INVALID_NODE_NUM;
 
-	status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, inode, num_ident);
+	OCFS_ASSERT(lockres->uncommitted_holders <= num_ident);
+
+	/* we don't want to send over a count for any size change
+	 * which includes acquires which we also didn't broadcast. */
+	if ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_TRUNCATE)) {
+		num_to_send -= lockres->uncommitted_holders;
+		lockres->uncommitted_holders = 0;
+		if (!num_to_send) {
+			/* it breaks the rules to send zero lock releases! */
+			status = 0;
+			goto finally;
+		}
+	}
+
+	status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, inode, num_to_send);
 	if (status < 0) {
 		/* Disable the vol */
 		LOG_ERROR_STATUS (status);
 	}
 
 finally:
-	if (!lockres->lock_holders) {
+	if (lockres->lock_holders - num_ident < 0) {
 		printk("About to decrement lock_holders one too many! lockid "
 		       "= %llu\n", lock_id);
 		BUG();
@@ -1225,10 +955,9 @@
 int new_lock_function(ocfs_super * osb, __u32 requested_lock, __u32 flags, struct buffer_head *bh, int *disk_vote, struct inode *inode)	
 {
 	ocfs_node_map vote_map;
-	ocfs_node_map open_map;
 	ocfs2_dinode *fe = NULL;
 	__u64 lock_id;
-	__u32 lock_write_flags = DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | DLOCK_FLAG_OPEN_MAP;
+	__u32 lock_write_flags = DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK;
 	__u32 lock_type = requested_lock;
 	int need_to_zap_buffers = 0, need_lock_write = 1;
 	int is_readdir = (flags & FLAG_READDIR) ? 1 : 0;
@@ -1256,8 +985,7 @@
 		/* on a master change... */
 		need_to_zap_buffers = 1; /* need to dump local buffers */
 		need_lock_write = 1;     /* and rewrite the lock */
-	} else if (flags & (FLAG_ADD_OIN_MAP | FLAG_DROP_READONLY |
-			FLAG_TRUNCATE_PAGES)) {
+	} else if (flags & (FLAG_DROP_READONLY | FLAG_TRUNCATE_PAGES)) {
 		need_lock_write = 0;
 		need_to_zap_buffers = 0;
 	} else if (flags & (FLAG_READDIR | FLAG_FILE_RELEASE_CACHE)) {
@@ -1274,8 +1002,6 @@
 		need_to_zap_buffers = 0; 
 	}
 
-	ocfs_node_map_init(osb, &open_map);
-
 	/* that's why it's called fast path */
 	if (flags & FLAG_FAST_PATH_LOCK)
 		goto vote_success;
@@ -1302,14 +1028,12 @@
 
 	if (ocfs_node_map_is_empty(&vote_map)) {
 		/* As this is the only node alive, make it master of the lock */
-		/* no need to update open map */
-		lock_write_flags &= ~(DLOCK_FLAG_OPEN_MAP);
 		goto vote_success;
 	}
 
 	status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, 
 					    flags, &vote_map, 
-					    &open_map, inode, 1, 
+					    inode, 1, 
 					    &vote_status);
 	if (status >= 0) {
 		status = vote_status;
@@ -1324,14 +1048,11 @@
 	}
 
 vote_success:
-	if (status >= 0 && flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE))
-		ocfs_node_map_set(&lockres->oin_openmap, &open_map);
-
 	if (need_to_zap_buffers)
 		ocfs_inc_inode_seq(osb, inode);
 	
 	/* just alerting owner on open */
-	if (flags & (FLAG_ADD_OIN_MAP | FLAG_TRUNCATE_PAGES))
+	if (flags & FLAG_TRUNCATE_PAGES)
 		goto bail;
 
 	/* drop readonly should remove anyone who has responded */
@@ -1358,12 +1079,6 @@
 			LOG_ERROR_STATUS (status);
 	}
 
-	/* pick up any change to the open map */
-	if (bh) {
-		fe = (ocfs2_dinode *) bh->b_data;
-		ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), 
-					    &lockres->oin_openmap);
-	}
 bail:
 	/* if we removed FLAG_READDIR above, set the readonly_node now */
 	if (is_readdir && !(flags & FLAG_READDIR)) {

Modified: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/dlm.h	2004-08-12 19:39:10 UTC (rev 1351)
@@ -44,17 +44,8 @@
 int ocfs_update_disk_lock(ocfs_super *osb, __u32 flags,
 			  struct buffer_head **bh, struct inode *inode,
 			  ocfs_journal_handle *handle);
-int ocfs_update_master_on_open(ocfs_super *osb, struct inode *inode,
-			       ocfs_journal_handle *handle);
-int ocfs_notify_truncate_on_wopen(ocfs_super *osb, struct inode *inode);
-int ocfs_wait_for_lock_release(ocfs_super *osb, __u64 offset,
-			       __u32 time_to_wait, __u32 lock_type,
-			       struct inode *inode);
+int ocfs_notify_on_open(ocfs_super *osb, struct inode *inode);
 void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
 			       ocfs_node_map *vote_map);
-void ocfs_set_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
-				 ocfs_node_map *open_map);
-void ocfs_get_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
-				 ocfs_node_map *open_map);
 
 #endif /* OCFS2_DLM_H */

Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/file.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -60,7 +60,6 @@
 static int ocfs_truncate_file(ocfs_super *osb, __u64 file_size,
 			      struct inode *inode);
 
-
 static void ocfs_fe_set_attributes(ocfs2_dinode *fe, struct iattr *attr)
 {
 	if (attr->ia_valid & ATTR_SIZE)
@@ -89,92 +88,91 @@
 	return sync_mapping_buffers(inode->i_mapping);
 }
 
-/* 
- * ocfs_inode_notify_open()
- * 
- * you should be holding io_sem and ip_sem in this function. 
- * If needed add ourselves to the open map. Only call this 
- * on 1st open of a file. Marks the oin as "in use"
- */
-int ocfs_inode_notify_open(ocfs_super *osb, struct buffer_head *fe_bh, 
-			   ocfs_journal_handle *handle, struct inode *inode)
+static void ocfs_block_sigs(sigset_t *oldsigs)
 {
-	int status = 0;
-	ocfs2_dinode *fe = NULL;
-	int local_handle = 1;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+	sigset_t tmpsig;
 
-	LOG_ENTRY ();
+	siginitsetinv(&tmpsig, SHUTDOWN_SIGS);
+	sigprocmask(SIG_BLOCK, &tmpsig, oldsigs);
+#else
+#ifdef HAVE_NPTL
+	spin_lock_irq (&current->sighand->siglock);
+	*oldsigs = current->blocked;
+	siginitsetinv (&current->blocked, SHUTDOWN_SIGS);
+	recalc_sigpending ();
+	spin_unlock_irq (&current->sighand->siglock);
+#else
+	spin_lock_irq (&current->sigmask_lock);
+	*oldsigs = current->blocked;
+	siginitsetinv (&current->blocked, SHUTDOWN_SIGS);
+	recalc_sigpending (current);
+	spin_unlock_irq (&current->sigmask_lock);
+#endif
+#endif
+}
 
-	if (handle)
-		local_handle = 0;
+static void ocfs_unblock_sigs(sigset_t newsig)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+	sigprocmask(SIG_SETMASK, newsig, NULL);
+#else
+#ifdef HAVE_NPTL
+       	spin_lock_irq (&current->sighand->siglock);
+	current->blocked = newsig;
+	recalc_sigpending ();
+	spin_unlock_irq (&current->sighand->siglock);
+#else
+	spin_lock_irq (&current->sigmask_lock);
+	current->blocked = newsig;
+	recalc_sigpending (current);
+	spin_unlock_irq (&current->sigmask_lock);
+#endif
+#endif
+}
 
-	fe = (ocfs2_dinode *) fe_bh->b_data;
-	if (!IS_VALID_FILE_ENTRY(fe)) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto leave;
-	}
+static inline int ocfs_wait_on_first_open(ocfs_super *osb, 
+					   struct inode *inode) 
+{
+	int status = 0;
+	sigset_t tmpsig;
 
-	/* why do we update these here? */
-	OCFS_I(inode)->ip_alloc_size =
-		(u64)fe->i_clusters << osb->s_clustersize_bits;
-	OCFS_I(inode)->ip_chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
-	if (fe->i_blkno == 0) {
-		LOG_ERROR_STR ("this_sector=0");
-		goto leave;
+	ocfs_block_sigs(&tmpsig);
+again:
+	if (signal_pending(current)) {
+		status = -EINTR;
+		goto bail;
 	}
 
-	if (inode == osb->root_inode)
-		goto leave;
+	down(&(OCFS_I(inode)->ip_sem));
+	if (!(OCFS_I(inode)->ip_open_flags & OCFS_IN_FIRST_OPEN))
+		goto bail;
+	up(&(OCFS_I(inode)->ip_sem));
 
-	if (local_handle) {
-		handle = ocfs_start_trans(osb, OCFS_OPEN_CREDITS);
-		if (!handle) {
-			LOG_ERROR_STATUS(status = -ENOMEM);
-			goto leave;
-		}
-		ocfs_handle_set_always_commits(handle, 1);
-	}
+	interruptible_sleep_on(&osb->open_event);
+	goto again;
 
-	/* this will update the lock info from disk while also
-	 * ensuring that the lock level drops to something
-	 * compatible with a first-open of the file */
-	status = ocfs_wait_for_lock_release(osb,
-					    OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits, 
-					    30000,
-					    (S_ISDIR (inode->i_mode) ? 
-					     OCFS_LKM_EXMODE : 
-					     OCFS_LKM_NLMODE), inode);
-	if (status < 0) {
-		if (status != -EINTR) {
-			LOG_ERROR_STR ("Lock owner is alive and taking too much time");
-			LOG_ERROR_STATUS(status);
-		}
-		goto leave;
-	}
+bail:
+	up(&(OCFS_I(inode)->ip_sem));
+	ocfs_unblock_sigs(tmpsig);
 
-	status = ocfs_update_master_on_open (osb, inode, handle);
-	if (status < 0 && status != -EINTR)
-		LOG_ERROR_STATUS (status);
+	return(status);
+}
 
-leave:
-	if (local_handle && handle)
-		ocfs_commit_trans(handle);
+static inline void ocfs_notify_openers(ocfs_super *osb) 
+{
+	wake_up(&osb->open_event);
+}
 
-	LOG_EXIT_STATUS (status);
-	return status;
-} /* ocfs_inode_notify_open */
-
 /*
  * ocfs_file_open()
  *
  */
 static int ocfs_file_open(struct inode *inode, struct file *file)
 {
-	int ret =0, err = 0, status = 0;
+	int ret =0, err = 0, status = 0, first_open = 0;
 	int mode = file->f_flags;
 	ocfs_super *osb = NULL;
-	int have_oin_sem = 0;
 	struct buffer_head *fe_bh = NULL;
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file, 
@@ -194,86 +192,71 @@
 		status = ocfs_verify_update_inode (osb, inode, 0);
 		up_read (&(OCFS_I(inode)->ip_io_sem));
 		if (status < 0) {
-			up_write(&OCFS_I(inode)->ip_io_sem);
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 	}
 
-	down_write (&(OCFS_I(inode)->ip_io_sem));
-	down (&(OCFS_I(inode)->ip_sem));
-	have_oin_sem = 1;
-
-	/* first open, we've gotta update the lock state. */
+	down(&(OCFS_I(inode)->ip_sem));
 	if (!OCFS_I(inode)->ip_open_cnt) {
-		status = ocfs_read_bh(osb,
-				      OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
-				      &fe_bh, OCFS_BH_CACHED, inode);
-		if (status < 0) {
-			up_write(&OCFS_I(inode)->ip_io_sem);
-			LOG_ERROR_STATUS(status);
-			goto leave;
-		}
+		first_open = 1;
+		OCFS_I(inode)->ip_open_flags |= OCFS_IN_FIRST_OPEN;
+	}
+	OCFS_I(inode)->ip_open_cnt++;
+	up(&(OCFS_I(inode)->ip_sem));
 
-		status = ocfs_inode_notify_open(osb, fe_bh, NULL, inode);
-		if (status < 0) {
-			up_write(&OCFS_I(inode)->ip_io_sem);
+	if (!first_open)
+		status = ocfs_wait_on_first_open(osb, inode);
+
+	if (status < 0) {
+		if (status != -EINTR)
 			LOG_ERROR_STATUS(status);
-			if (status != -EINTR) {
-				LOG_ERROR_ARGS("Status returned %d trying to "
-					       "notify open on file '%*s'\n", 
-					       status,
-					       file->f_dentry->d_name.len, 
-					       file->f_dentry->d_name.name);
-				status = -ENOENT;
-			}
-			goto leave;
-		}
+		down(&(OCFS_I(inode)->ip_sem));
+		OCFS_I(inode)->ip_open_cnt--;
+		goto leave_unlock;
+	}
 
-		if(OCFS_I(inode)->ip_ext_map.initialized &&
-			OCFS_I(inode)->ip_ext_map.count == 0) {
-			status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
-			if (status < 0) {
-				up_write(&OCFS_I(inode)->ip_io_sem);
-				LOG_ERROR_STATUS(status);
-				goto leave;
-			}
-		}
+	status = 0;
+	if (!(mode & O_DIRECT) 
+	    && (first_open || (mode & (O_WRONLY|O_RDWR))))
+		status = ocfs_notify_on_open(osb, inode);
+
+	down (&(OCFS_I(inode)->ip_sem));
+	if (first_open) {
+		OCFS_I(inode)->ip_open_flags &= ~OCFS_IN_FIRST_OPEN;
+		ocfs_notify_openers(osb);
 	}
 
-	if((mode & (O_WRONLY|O_RDWR)) && !(mode & O_DIRECT)) {
-		status = ocfs_notify_truncate_on_wopen(osb, inode);
-		if (status < 0) {
-			up_write(&OCFS_I(inode)->ip_io_sem);
-			LOG_ERROR_STATUS(status);
-			goto leave;
-		}
+	if (status < 0) {
+		OCFS_I(inode)->ip_open_cnt--;
+		LOG_ERROR_STATUS(status);
+		goto leave_unlock;
 	}
 
-	up_write(&OCFS_I(inode)->ip_io_sem);
-	/* yes, hold onto ip_sem. */
+	if (OCFS_I(inode)->ip_open_cnt > 1) {
+		/*  We're not the only person who has it open right
+		 *  now so lets check whether the requested
+		 *  access/share access conflicts with the existing
+		 *  open operations. */
 
-	if (OCFS_I(inode)->ip_open_cnt > 0) {
-		/*  The OIN is currently in use by some thread. */
-		/*  We must check whether the requested access/share access */
-		/*  conflicts with the existing open operations. */
-
 		LOG_TRACE_ARGS ("oin->ip_open_cnt > 0! : %u\n", 
 				OCFS_I(inode)->ip_open_cnt);
 		if (!(mode & O_DIRECT)) {
 			if ((OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) && !(mode & O_RDONLY)) {
+				OCFS_I(inode)->ip_open_cnt--;
 				status = -EACCES;
 				LOG_TRACE_STR("file is already open O_DIRECT, "
 						"cannot open non O_DIRECT");
-				goto leave;
+				goto leave_unlock;
 			}
 		} else if (mode & O_DIRECT) {
 			if (!(OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
+				OCFS_I(inode)->ip_open_cnt--;
 				status = -EACCES;
 				LOG_TRACE_STR("file is already open non "  \
 					      "O_DIRECT, cannot open "     \
 					      "O_DIRECT");
-				goto leave;
+				goto leave_unlock;
 			}
 		}
 		status = 0;
@@ -284,18 +267,9 @@
 			OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
 	}
 
-	OCFS_I(inode)->ip_open_cnt++;
-
-	if (have_oin_sem) {
-		up (&(OCFS_I(inode)->ip_sem));
-		have_oin_sem = 0;
-	}
-
+leave_unlock:
+	up (&(OCFS_I(inode)->ip_sem));
 leave:
-	if (have_oin_sem) {
-		up (&(OCFS_I(inode)->ip_sem));
-		have_oin_sem = 0;
-	}
 
 	if (fe_bh)
 		brelse(fe_bh);
@@ -358,29 +332,10 @@
 			OCFS_I(inode)->ip_open_cnt, osb->file_open_cnt,
 			atomic_read(&dentry->d_count)); 
 
-	if (last_close) {
-		if (OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)
-			OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
-
-		/* we might still be holding ip_extend_sem on
-		 * behalf of another node, so release it here. */
-		down(&recovery_list_sem);
-		spin_lock(&oin_num_ext_lock);
-		if (OCFS_I(inode)->ip_num_extends) {
-			LOG_TRACE_ARGS("ocfs2: closing inode %llu -- had %u "
-				       "extends pending from another node!\n", 
-				       OCFS_I(inode)->ip_blkno,
-				       OCFS_I(inode)->ip_num_extends);
-			OCFS_I(inode)->ip_num_extends = 0;
-			list_del(&OCFS_I(inode)->ip_recovery_list);
-			INIT_LIST_HEAD(&OCFS_I(inode)->ip_recovery_list);
-			up_write(&OCFS_I(inode)->ip_extend_sem);
-
-			dec = 1;
-		}
-		spin_unlock(&oin_num_ext_lock);
-		up(&recovery_list_sem);
-	}
+	if (last_close 
+	    && (OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO))
+		OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, 
+				OCFS_OIN_OPEN_FOR_DIRECTIO);
 	up (&(OCFS_I(inode)->ip_sem));
 
 bail:

Modified: trunk/src/file.h
===================================================================
--- trunk/src/file.h	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/file.h	2004-08-12 19:39:10 UTC (rev 1351)
@@ -39,9 +39,6 @@
 		     int system_file, struct buffer_head *fe_bh);
 int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
 			    struct inode *inode);
-int ocfs_inode_notify_open(ocfs_super *osb, struct buffer_head *fe_bh, 
-			   ocfs_journal_handle *handle,
-			   struct inode *inode);
 int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs_sync_inode(struct inode *inode);
 int ocfs2_zero_extend(struct inode *inode);

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/journal.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -1090,16 +1090,6 @@
 		LOG_ERROR_ARGS("inode and fe i_size's differ! (%llu != %llu", 
 			       inode->i_size, fe->i_size);
 
-	/* add this node to openmap and update disk lock */
-	status = ocfs_update_disk_lock (osb, 
-					DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF, 
-					&bh, inode, NULL);
-	if (status < 0) {
-		up_write(&OCFS_I(inode)->ip_io_sem);
-
-		LOG_ERROR_STATUS(status);
-		goto done;
-	}
 	OCFS_I(inode)->ip_open_cnt++;
 
 	up_write(&OCFS_I(inode)->ip_io_sem);
@@ -1532,18 +1522,6 @@
 
 	OCFS_I(inode)->ip_alloc_size = alloc_size;
 
-	/* add this node to openmap and update disk lock */
-	down_write(&OCFS_I(inode)->ip_io_sem);
-
-	status = ocfs_update_disk_lock (osb, 
-					DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF, 
-					&bh, inode, NULL);
-	up_write(&OCFS_I(inode)->ip_io_sem);
-	if (status < 0) {
-		LOG_ERROR_STATUS(status);
-		goto done;
-	}
-
 	status = ocfs_force_read_journal(osb, inode->i_size, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);

Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/lockres.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -103,7 +103,6 @@
 
 		lockres->lock_type = DISK_LOCK(fe)->dl_level;
 		lockres->master_node_num = DISK_LOCK(fe)->dl_master;
-		ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
 
 		if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
     	    	    lockres->readonly_node != lockres->master_node_num) {
@@ -205,7 +204,6 @@
 	lockres->lock_type = OCFS_LKM_NLMODE;
 	lockres->master_node_num = OCFS_INVALID_NODE_NUM;
 	lockres->last_upd_seq_num = 0;
-	ocfs_node_map_init(osb, &lockres->oin_openmap);
 	lockres->lock_state = 0;
 
 	init_MUTEX(&lockres->lock_mutex);
@@ -214,7 +212,7 @@
 	lockres->readonly_node = OCFS_INVALID_NODE_NUM;
 
 	lockres->lock_holders = 0;
-	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
+	lockres->uncommitted_holders = 0;
 
 	LOG_EXIT ();
 	return;

Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/namei.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -375,7 +375,6 @@
 	u64 fe_blkno = 0;
 	__u64 fileOffset = 0;
 	struct inode *inode_alloc_inode = NULL;
-	ocfs_node_map just_me;
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
 			(unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
@@ -383,9 +382,6 @@
 	OCFS_ASSERT(new_fe_bh);
 	*new_fe_bh = NULL;
 	
-	ocfs_node_map_init(osb, &just_me);
-	ocfs_node_map_set_bit(&just_me, osb->node_num);
-
 	inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
 	if (!inode_alloc_inode) {
 		status = -EFAIL;
@@ -452,7 +448,6 @@
 	DISK_LOCK(fe)->dl_seq_num = 0;
 	DISK_LOCK(fe)->dl_master = osb->node_num;
 	DISK_LOCK(fe)->dl_level = OCFS_LKM_EXMODE;
-	ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &just_me);
 	fe->i_atime = fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
 	fe->i_dtime = 0;
 
@@ -1512,12 +1507,6 @@
 	inode->i_size = newsize;
 	inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 
-	status = ocfs_inode_notify_open(osb, new_fe_bh, handle, inode);
-	if (status < 0) {
-		LOG_ERROR_STATUS(status);
-		goto abort_trans;
-	}
-
 	status = ocfs_inode_fill_ext_map(osb, new_fe_bh, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/nm.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -88,7 +88,6 @@
 	"DELETE_RELEASE",// delete or rename release request
 	"RELEASE_CACHE",        // release a cache lock I hold
 	"CHANGE_MASTER",        // request to change master to requestor
-	"ADD_OIN_MAP",          // add requestor into oin map
 	"NOT_MASTER",           // I am not master, retry
 	"REMASTER_THIS",        // remaster lock to me
 	"REMASTER_REQUESTOR",   // remaster lock to requestor
@@ -527,8 +526,6 @@
 	} else if (lockres->master_node_num == osb->node_num) {
 		if (flags & FLAG_CHANGE_MASTER)
 			vote_type = CHANGE_MASTER;
-		else if (flags & FLAG_ADD_OIN_MAP)
-			vote_type = ADD_OIN_MAP;
 		else {
 			LOG_TRACE_STR("(INVALID_REQUEST) am master, but no more types");
 			vote_type = INVALID_REQUEST;
@@ -550,18 +547,11 @@
 	return vote_type;
 }
 
+/* this function requires that callers to it be serialized (isn't
+ * really a problem as vote_sem does that for us. */
 static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
 				       __u32 node_num)
 {
-
-	down(&OCFS_I(inode)->ip_sem);
-
-	/* if we don't have it open, then don't bother with this. */
-	if (!OCFS_I(inode)->ip_open_cnt) {
-		up(&OCFS_I(inode)->ip_sem);
-		return;
-	}
-
 	spin_lock(&oin_num_ext_lock);
 
 	if (OCFS_I(inode)->ip_num_extends < 0)
@@ -571,8 +561,11 @@
 	 * the counter. */
 	if (OCFS_I(inode)->ip_num_extends > 0) {
 		OCFS_I(inode)->ip_num_extends++;
+
+	printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
+	       OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
+
 		spin_unlock(&oin_num_ext_lock);
-		up(&OCFS_I(inode)->ip_sem);
 		return;
 	}
 
@@ -580,7 +573,6 @@
 	 * this holding ip_sem so we drop it and recheck after we've
 	 * got it. */
 	spin_unlock(&oin_num_ext_lock);
-	up(&OCFS_I(inode)->ip_sem);
 
 	/* take the extend_sem on behalf of
 	 * this other node. It won't be
@@ -591,26 +583,26 @@
 	 * inode. */
 	down_write(&OCFS_I(inode)->ip_extend_sem);
 
-	down(&OCFS_I(inode)->ip_sem);
-	if (!OCFS_I(inode)->ip_open_cnt) {
-		up_write(&OCFS_I(inode)->ip_extend_sem);
-		up(&OCFS_I(inode)->ip_sem);
-		return;
-	}
-
 	atomic_inc(&inode->i_count);
 
 	/* Ok, we've still got it open. Put this guy on the recovery
 	 * list in case the extending node dies. */
 	down(&recovery_list_sem);
 	spin_lock(&oin_num_ext_lock);
+
+	if (OCFS_I(inode)->ip_num_extends < 0)
+		BUG();
+
 	OCFS_I(inode)->ip_num_extends++;
 	list_add_tail(&OCFS_I(inode)->ip_recovery_list, 
 		      &osb->lock_recovery_lists[node_num]);
+#ifdef VERBOSE_PROCESS_VOTE
+	printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
+	       OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
+#endif
 	spin_unlock(&oin_num_ext_lock);
 	up(&recovery_list_sem);
 
-	up(&OCFS_I(inode)->ip_sem);
 	return;
 }
 
@@ -619,21 +611,32 @@
 {
 	int dec = 0;
 
-	down(&OCFS_I(inode)->ip_sem);
-
-	/* if we no longer have it open, then the close path has dealt
-	 * with this. */
-	if (!OCFS_I(inode)->ip_open_cnt)
-		goto done;
-
 	down(&recovery_list_sem);
 	spin_lock(&oin_num_ext_lock);
 
-	OCFS_I(inode)->ip_num_extends -= num_rel;
+	if ((OCFS_I(inode)->ip_num_extends - (s32) num_rel) < 0) {
+		/* We don't force to zero here in order to cover up a
+		 * bug, but rather because it's perfectly valid for us
+		 * to get a release with a count > what we've had if
+		 * we mount after the acquires have been sent. */
 
-	if (OCFS_I(inode)->ip_num_extends < 0)
-		BUG();
+#ifdef VERBOSE_PROCESS_VOTE
+		printk("ocfs_clear_inode_for_extend: inode %llu, num_rel of "
+		       "%d would result in negative count (ip_num_extends "
+		       "= %d)\n", 
+		       OCFS_I(inode)->ip_blkno, num_rel,
+		       OCFS_I(inode)->ip_num_extends);
+#endif
+		OCFS_I(inode)->ip_num_extends = 0;
+	} else {
+		OCFS_I(inode)->ip_num_extends -= num_rel;
+	}
 
+#ifdef VERBOSE_PROCESS_VOTE
+	printk("ocfs_clear_inode_for_extend: inode %llu, num = %d\n",
+	       OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
+#endif
+
 	if (!OCFS_I(inode)->ip_num_extends) {
 		list_del(&OCFS_I(inode)->ip_recovery_list);
 		INIT_LIST_HEAD(&OCFS_I(inode)->ip_recovery_list);
@@ -646,9 +649,6 @@
 	spin_unlock(&oin_num_ext_lock);
 	up(&recovery_list_sem);
 
-done:
-	up(&OCFS_I(inode)->ip_sem);
-
 	/* we want iputs to happen outside of as many locks as possible. */
 	if (dec)
 		iput(inode);
@@ -763,7 +763,6 @@
 	ocfs_dlm_msg *dlm_msg = (comm_vote ? ctxt->u.dlm_msg : NULL);
 	__s16 node_num = ctxt->node_num;
 	__u64 lock_id, seq_num;
-	ocfs_node_map disk_map;
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, ctxt);
 
@@ -841,24 +840,18 @@
 		 * Please see the note in ocfs_delete_inode. */
 		osb->voting_ino = inode->i_ino;
 
-#warning "FLAG_ADD_OIN_MAP shouldn't do io anyways!"
 #ifdef PROCESS_VOTE_TRYLOCK
-		if (flags & (FLAG_FILE_RELEASE_CACHE | FLAG_CHANGE_MASTER 
-			     | FLAG_ADD_OIN_MAP)) {
-
+		if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
 			if (ocfs_io_sem_write_trylock(inode, 
 						      (OCFS_NM_HEARTBEAT_TIME/2))) {
 				LOG_ERROR_ARGS("Could not get i/o lock on "
-					       "inode %llu\n",
-					       OCFS_I(inode)->ip_blkno);
-				if (flags & FLAG_ADD_OIN_MAP)
-					goto leave;
+					       "inode %llu, flags 0x%x\n",
+					       OCFS_I(inode)->ip_blkno, flags);
 			} else 
 				have_io_sem = 1;
 		}
 #else
-		if (flags & (FLAG_FILE_RELEASE_CACHE | FLAG_CHANGE_MASTER 
-			     | FLAG_ADD_OIN_MAP)) {
+		    if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
 			down_write(&OCFS_I(inode)->ip_io_sem);
 			have_io_sem = 1;
 		}
@@ -899,13 +892,10 @@
 		printk("Invalid request! flags = 0x%x\n", flags);
 
 #endif
-	/* get_process_vote_action will only allow CHANGE_MASTER,
-	 * RELEASE_CACHE, and ADD_OIN_MAP on a CACHE lock held by this
-	 * node.  the CHANGE_MASTER/RELEASE_CACHE path needs to check
-	 * the readonly map to see if any nodes need to be updated.
-	 * this is not necessary for the ADD_OIN_MAP path since it
-	 * cannot actually modify any data or metadata under the lock.
-	 */
+	/* get_process_vote_action will only allow CHANGE_MASTER and
+	 * RELEASE_CACHE, on a CACHE lock held by this node.  the
+	 * CHANGE_MASTER/RELEASE_CACHE path needs to check the
+	 * readonly map to see if any nodes need to be updated. */
 
 	if (disk_vote) {
 		/* Zero out the vote for everybody, if any already set
@@ -1135,12 +1125,6 @@
 			is_locked = DISK_LOCK(fe)->dl_level > OCFS_LKM_NLMODE;
 			if (vote_type == CHANGE_MASTER) {
 				fe = (ocfs2_dinode *) fe_bh->b_data;
-				if (OCFS_I(inode)->ip_open_cnt) {
-					ocfs_node_map_init(osb, &disk_map);
-					ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-					ocfs_node_map_set_bit(&disk_map, osb->node_num);
-					ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-				}
 				DISK_LOCK(fe)->dl_master = node_num;
 			}
 
@@ -1172,39 +1156,6 @@
 			status = 0;
 			break;
 		
-		case ADD_OIN_MAP:
-			LOG_TRACE_STR("ADD_OIN_MAP");
-			status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
-			if (status < 0) {
-				LOG_ERROR_STATUS (status);
-				break;
-			}
-			fe = (ocfs2_dinode *) fe_bh->b_data;
-
-			if (!(fe->i_flags & OCFS2_VALID_FL)) {
-				vote_response = FLAG_VOTE_FILE_DEL;
-			} else {
-				ocfs_node_map_init(osb, &disk_map);
-				fe = (ocfs2_dinode *) fe_bh->b_data;
-				ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-				ocfs_node_map_set_bit(&disk_map, node_num);
-				ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-
-				/* Write new map on the disk */
-				status = ocfs_write_bh(osb, fe_bh, 0, inode);
-				if (status < 0) {
-					brelse(fe_bh);
-					LOG_ERROR_STATUS (status);
-					break;
-				}
-
-				/* Add this node to the oin map on the file entry */
-				ocfs_node_map_set(&lockres->oin_openmap, &disk_map);
-				vote_response = FLAG_VOTE_NODE;
-			}
-			brelse(fe_bh);
-			break;
-
 		case DROP_READONLY:
 			/* TODO: may need locking in here to lock out 
 			 * the actual IO that a readdir may have in 

Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/ocfs.h	2004-08-12 19:39:10 UTC (rev 1351)
@@ -83,7 +83,6 @@
 	DELETE_RELEASE,// delete or rename release request
 	RELEASE_CACHE,        // release a cache lock I hold
 	CHANGE_MASTER,        // request to change master to requestor
-	ADD_OIN_MAP,          // add requestor into oin map
 	NOT_MASTER,           // I am not master, retry
 	REMASTER_THIS,        // remaster lock to me
 	REMASTER_REQUESTOR,   // remaster lock to requestor
@@ -132,7 +131,7 @@
 #define  FLAG_FILE_RELEASE_MASTER 0x00000100
 #define  FLAG_RELEASE_DENTRY      0x00000200
 #define  FLAG_CHANGE_MASTER       0x00000400
-#define  FLAG_ADD_OIN_MAP         0x00000800
+#define  FLAG_FILE_UNUSED5        0x00000800
 #define  FLAG_DIR                 0x00001000
 #define  FLAG_REMASTER            0x00002000
 #define  FLAG_FAST_PATH_LOCK      0x00004000
@@ -315,13 +314,13 @@
 #define OCFS2_SB(sb)	    ((ocfs_super *)OCFS_GENERIC_SB_MEMBER(sb))
 
 
-#define DLOCK_FLAG_OPEN_MAP    (0x1)
+#define DLOCK_FLAG_UNUSED1     (0x1)
 #define DLOCK_FLAG_LOCK        (0x2)
 #define DLOCK_FLAG_SEQ_NUM     (0x4)
 #define DLOCK_FLAG_MASTER      (0x8)
 #define DLOCK_FLAG_LAST_UPDATE (0x10)
-#define DLOCK_FLAG_ADD_SELF    (0x20)
-#define DLOCK_FLAG_ALL         (DLOCK_FLAG_OPEN_MAP | DLOCK_FLAG_LOCK | \
+#define DLOCK_FLAG_UNUSED2     (0x20)
+#define DLOCK_FLAG_ALL         (DLOCK_FLAG_LOCK | \
 				DLOCK_FLAG_SEQ_NUM | DLOCK_FLAG_MASTER | \
 				DLOCK_FLAG_LAST_UPDATE)
 
@@ -390,11 +389,12 @@
 	__s16 master_node_num;	/* Master Node */
 	__u32 lock_state;
 	__u32 lock_holders;
+	__u32 uncommitted_holders;
 	__u8 lock_type;
 	struct semaphore lock_mutex;
 	__s16 readonly_node;
 	ocfs_node_map readonly_map;
-	ocfs_node_map oin_openmap;
+	//ocfs_node_map oin_openmap;
 	__u64 last_upd_seq_num;
 };
 
@@ -436,7 +436,7 @@
 	struct rw_semaphore  ip_extend_sem;
 
 	struct list_head  ip_recovery_list;/* protected by recovery_list_sem */
-	__u32             ip_num_extends; /* protected by oin_num_ext_lock */
+	__s32             ip_num_extends; /* protected by oin_num_ext_lock */
 
 	atomic_t          ip_clean_buffer_seq;
 	__u32             ip_flags; /* see below */
@@ -472,7 +472,8 @@
 /* Eventually, the 'flags' and 'open_flags' fields need to be
  * merged. */
 /* open flags */
-#define  OCFS_OIN_OPEN_FOR_DIRECTIO              (0x00000008)
+#define  OCFS_OIN_OPEN_FOR_DIRECTIO              (0x00000001)
+#define  OCFS_IN_FIRST_OPEN                      (0x00000002)
 
 /* 'flags' flags. */
 /* has this inode been deleted, either from this node or from another node. */
@@ -655,6 +656,7 @@
 	struct list_head vote_obj_queue;
 	spinlock_t vote_obj_queue_lock;
 	unsigned long voting_ino; /* only safe from the process_vote pid */
+	wait_queue_head_t open_event;
 };
 
 typedef struct _ocfs_comm_info
@@ -773,7 +775,7 @@
 	int vote_status;
 	ocfs_node_map req_vote_map;
 	ocfs_node_map got_vote_map;
-	ocfs_node_map tmp_openmap;
+	//ocfs_node_map tmp_openmap;
 	__u64 seq_num;
 	pid_t pid;
 	ocfs_dlm_msg m;
@@ -853,7 +855,6 @@
 	int reply_method;
 	int *status;
 	ocfs_node_map *got_vote_map;
-	ocfs_node_map *open_map;
 	__u32 flags;
 	union {
 		ocfs_dlm_reply_master *reply;

Modified: trunk/src/ocfs2_fs.h
===================================================================
--- trunk/src/ocfs2_fs.h	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/ocfs2_fs.h	2004-08-12 19:39:10 UTC (rev 1351)
@@ -277,8 +277,7 @@
 	__u8 dl_level;		/* Lock level */
 	__u8 dl_reserved2[3];	/* Pad to u64 */
 	__u64 dl_seq_num;	/* Lock transaction seqnum */
-/*10*/	__u32 dl_node_map[8];	/* Bitmap of interested nodes,
-				   was __u32 */ 
+/*10*/	__u32 dl_reserved3[8];  /* was dl_node_map */
 /*30*/
 } ocfs2_disk_lock;
 

Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/super.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -1510,6 +1510,7 @@
 	osb->have_local_alloc = 0;
 	osb->local_alloc_bh = NULL;
 
+	init_waitqueue_head (&osb->open_event);
 	/* not using any of these sb fields yet */
 #if 0
 di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)

Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/vote.c	2004-08-12 19:39:10 UTC (rev 1351)
@@ -418,7 +418,7 @@
 	int max = data->u.proc.max - *len;
 	char *p = data->u.proc.page + *len;
 	int ret = 0;
-	char *reqstr=NULL, *gotstr=NULL, *openstr=NULL;
+	char *reqstr=NULL, *gotstr=NULL;
 
 	/* just run thru everything to populate /proc */
 	/* return -ENOENT to keep going */
@@ -432,17 +432,14 @@
 				break;
 			if (ocfs_node_map_stringify(&obj->got_vote_map, &gotstr) < 0)
 				break;
-			if (ocfs_node_map_stringify(&obj->tmp_openmap, &openstr) < 0)
-				break;
-			ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s %s\n",
+			ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s\n",
 				obj->pid,
 				vote_state_str[obj->vote_state],
 				obj->vote_status,
 				reqstr,
 				request->lock_id,
 				request->lock_seq_num, 
-				request->flags, 
-				gotstr, openstr);
+				request->flags, gotstr);
 			break;
 		case OCFS_VOTE_REPLY:
 			reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
@@ -474,8 +471,6 @@
 		kfree(reqstr);
 	if (gotstr)
 		kfree(gotstr);
-	if (openstr)
-		kfree(openstr);
 	return status;
 }
 
@@ -608,7 +603,6 @@
 	
 	ctxt.reply_method = COMM_VOTE;
 	ctxt.got_vote_map = &(obj->got_vote_map);
-	ctxt.open_map = &(obj->tmp_openmap);
 	ctxt.status = &(obj->vote_status);
 	ctxt.flags = reply_msg->flags;
 	ctxt.u.reply = reply;
@@ -837,7 +831,6 @@
 	else
 		ocfs_node_map_init(osb, &obj->req_vote_map);
 	ocfs_node_map_init(osb, &obj->got_vote_map);
-	ocfs_node_map_init(osb, &obj->tmp_openmap);
 
 	obj->seq_num = 0ULL;
 	obj->req_lock_type = reqlock;
@@ -853,7 +846,7 @@
  * ocfs_send_dlm_request_msg()
  * inode is definitely non NULL
  */
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, __u32 num_ident, int *vote_status)
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, struct inode *inode, __u32 num_ident, int *vote_status)
 {
 	int status = 0;
 	ocfs_dlm_msg *dlm_msg = NULL;
@@ -904,10 +897,6 @@
 
 	spin_lock (&obj->lock);
 	if (obj->vote_status >= 0 && obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
-		// extend and update need to update the openmap
- 	    	if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE)) 
-			if (openmap)
-				ocfs_node_map_set(openmap, &obj->tmp_openmap);
 		LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
 	} else if (obj->vote_status != -EAGAIN || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
 #warning "should we even be erroring here at all!"
@@ -966,9 +955,6 @@
 	switch (reply_status) {
 		case FLAG_VOTE_NODE:
 			ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
-			if (ctxt->flags & (FLAG_FILE_EXTEND|FLAG_FILE_UPDATE) && 
-			    open_handle && ctxt->open_map)
-				ocfs_node_map_set_bit(ctxt->open_map, node_num);
 			break;
 		case FLAG_VOTE_OIN_ALREADY_INUSE:
 			ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);

Modified: trunk/src/vote.h
===================================================================
--- trunk/src/vote.h	2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/vote.h	2004-08-12 19:39:10 UTC (rev 1351)
@@ -44,7 +44,7 @@
 int ocfs_send_dismount_msg(ocfs_super *osb);
 int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, 
 			       __u32 lock_type, __u32 flags, 
-			       ocfs_node_map *votemap, ocfs_node_map *openmap, 
+			       ocfs_node_map *votemap, 
 			       struct inode *inode, __u32 num_ident, 
 			       int *vote_status);
 int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,



More information about the Ocfs2-commits mailing list