[Ocfs2-commits] jlbec commits r1031 - in branches/format-changes: . src

Wed Jun 9 03:11:08 CDT 2004

Author: jlbec
Date: 2004-06-09 02:11:07 -0500 (Wed, 09 Jun 2004)
New Revision: 1031

Modified:
   branches/format-changes/TODO
   branches/format-changes/src/alloc.c
   branches/format-changes/src/dir.c
   branches/format-changes/src/dir.h
   branches/format-changes/src/dlm.c
   branches/format-changes/src/file.c
   branches/format-changes/src/hash.c
   branches/format-changes/src/inode.c
   branches/format-changes/src/inode.h
   branches/format-changes/src/journal.c
   branches/format-changes/src/lockres.c
   branches/format-changes/src/namei.c
   branches/format-changes/src/nm.c
   branches/format-changes/src/ocfs.h
   branches/format-changes/src/ocfs_buffer_head.h
   branches/format-changes/src/ocfs_journal.h
   branches/format-changes/src/super.c
   branches/format-changes/src/sysfile.c
Log:

o Merged 1002:1029 from trunk:
	- [1024] Fix add-inode-to-handle-bugs
	- [1024] find_files_on_disk does not read fe
	- [1024] No fe_bh to ocfs_iget
	- [1024] Remove trans_lock
	- [1024] Add ip_io_sem
	- [1024] BUGs in bh_sem to catch ip_io_sem problems
	- [1024] BUG in acquire_lockres to catch ip_io_sem problems
	- [1024] Fix journal_{stop,start} race
	- [1024] Orphan inode flags
	- [1024] Fix extent alloc locking
	- [1024] Async local alloc moves
	- [1025] Update TODO
	- [1026] Remove ocfs_checkpoint_handle and ocfs_revoke_handle
	- [1027] Remove ocfs_wait_for_disk_lock_release
	- [1028] double_{down,up} fix
	- [1029] Remove stray printk



Modified: branches/format-changes/TODO
===================================================================

--- branches/format-changes/TODO	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/TODO	2004-06-09 07:11:07 UTC (rev 1031)
@@ -20,16 +20,6 @@
 	DISK_LOCK_SEQNUM(fe) = changeSeqNum;
   code as it is equally useless.
 
-* Make all our 64-bit divides into shifts, this way we won't need 
-  divdi3.c anymore.
-
-* Combine the ocfs_inode_private and ocfs_inode_num structs, keeping the
-  inode hash, but just using a unified struct as the inode private data, and
-  the hash element (there are a bunch of redundant fields)
-
-* move things to use i_sem to lock out inode changes instead of the bh sem
-  hash we have now.
-
 * get rid of as much of lockres as possible.
 
 * make slabs for: ocfs_journal_handle, and ocfs_journal_copyout and maybe
@@ -47,9 +37,6 @@
 * now that acquire_lock and release_lock take inodes, get rid of the lock_id
   and other redundant arguments in the prototype.
 
-* Fix the bug where deleting a directory with no files in it's first
-  dirnode, but lots in it's others succeeds when it should fail.
-
 * Go through the code and remove every spot where we look inside a bh for a
   value which is right there on an inode (like, whether it's a directory or
   not, fe / vote offsets, lock flags, etc). This includes passing redundant

Modified: branches/format-changes/src/alloc.c
===================================================================
--- branches/format-changes/src/alloc.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/alloc.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -329,7 +329,7 @@
 	}
 
 	/* start the transaction here to preserve ordering with the
-	 * bitmap i_sems... */
+	 * bitmap io_sems... */
 	handle = ocfs_start_trans(osb, credits);
 	if (!handle) {
 		status = -ENOMEM;
@@ -343,13 +343,13 @@
 		  osb->vol_layout.root_int_off;
 	for (i = 0; i < osb->max_nodes; i++, lock_id += osb->sect_size) {
 		if (free_ext_node[i] != NULL) {
-			extnode_inode[i] = ocfs_iget(osb, lock_id, NULL);
+			extnode_inode[i] = ocfs_iget(osb, lock_id);
 			if (!extnode_inode[i]) {
 				status = -EINVAL;
 				LOG_ERROR_STATUS (status);
 				goto abort;
 			}
-			down(&extnode_inode[i]->i_sem);
+			down(&OCFS_I(extnode_inode[i])->ip_io_sem);
 
 			status = ocfs_acquire_lock (osb, lock_id,
 				 		    OCFS_DLM_EXCLUSIVE_LOCK,
@@ -357,7 +357,7 @@
 						    &ugly_hack_bh, 
 						    extnode_inode[i]);
 			if (status < 0) {
-				up(&extnode_inode[i]->i_sem);
+				up(&OCFS_I(extnode_inode[i])->ip_io_sem);
 				iput(extnode_inode[i]);
 				extnode_inode[i] = NULL;
 				if (status != -EINTR)
@@ -378,14 +378,14 @@
 			LOG_ERROR_STATUS (status);
 			goto abort;
 		}
-		down(&vol_inode->i_sem);
+		down(&OCFS_I(vol_inode)->ip_io_sem);
 
 		status = ocfs_acquire_lock (osb, OCFS_BITMAP_LOCK_OFFSET(osb),
 					    OCFS_DLM_EXCLUSIVE_LOCK,
 					    FLAG_FILE_CREATE,
 					    &globalbh, vol_inode);
 		if (status < 0) {
-			up(&vol_inode->i_sem);
+			up(&OCFS_I(vol_inode)->ip_io_sem);
 			iput(vol_inode);
 			vol_inode = NULL;
 
@@ -472,12 +472,12 @@
 
 	for (i = 0; i < osb->max_nodes; i++) {
 		if (extnode_inode[i]) {
-			up(&extnode_inode[i]->i_sem);
+			up(&OCFS_I(extnode_inode[i])->ip_io_sem);
 			iput(extnode_inode[i]);
 		}
 	}
 	if (vol_inode) {
-		up(&vol_inode->i_sem);
+		up(&OCFS_I(vol_inode)->ip_io_sem);
 		iput(vol_inode);
 	}
 
@@ -2783,6 +2783,9 @@
 
 	OCFS_ASSERT (osb);
 
+	if ((bitmap_inode && !lock_bh) || (lock_bh && !bitmap_inode))
+		BUG();
+
 	if (!bitmap_inode) {
 		bitmap_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
 		if (!bitmap_inode) {
@@ -2809,8 +2812,8 @@
 				LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		ocfs_journal_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 
-				      0, bh, bitmap_inode);
+		ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 
+				      0, bh, bitmap_inode, 1);
 	}
 
 	status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
@@ -3009,8 +3012,18 @@
 	/* Allocate a block of size blocksize from the relevant file/bitmap */
 	OCFS_ASSERT (blockSize);
 
+	if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
+		LOG_TRACE_ARGS("Uhoh, asking me to allocate on an unlocked system file! (type = %u, i_ino = %lu)\n", Type, inode->i_ino);
+		BUG();
+	}
+
 	/* Get a lock on the file */
 	lockId = (bm_file * osb->sect_size) + osb->vol_layout.root_int_off;
+	if (lockId != GET_INODE_FEOFF(inode)) {
+		LOG_TRACE_ARGS("lockId = %llu, offset = %llu\n", lockId, 
+			       GET_INODE_FEOFF(inode));
+		BUG();
+	}
 	status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK,
 			     FLAG_FILE_CREATE, &bh, inode);
 	if (status < 0) {
@@ -3018,9 +3031,9 @@
 		goto leave;
 	}
 
-	ocfs_journal_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 
+	ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 
 			      FLAG_FILE_CREATE, 
-			      bh, inode);
+			      bh, inode, 1);
 
 	status = ocfs_ugly_hack(handle, bh);
 	if (status < 0) {
@@ -3590,10 +3603,6 @@
 				ocfs_shutdown_local_alloc(osb, NULL, 0, 
 							  0);
 
-				/* we want to make sure an empty alloc
-				 * hits disk. */
-				ocfs_handle_set_sync(handle, 1);
-
 				/* the bh might not have been dirtied to
 				 * the journal yet. */
 				tmpstat = ocfs_journal_dirty(handle, 
@@ -3644,8 +3653,8 @@
 			goto bail;
 		}
 
-		ocfs_journal_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 0,
-				      main_bm_bh, main_bm_inode);
+		ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 0,
+				      main_bm_bh, main_bm_inode, 1);
 
 		status = ocfs_sync_local_to_main(osb, &(handle->commit_bits),
 						 NULL, 0);
@@ -3752,6 +3761,10 @@
 		use_global = 0;
 
 	if (!use_global) {
+		if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC) {
+			printk("whoa, I already have local alloc sem!?!\n");
+			BUG();
+		}
 		down(&osb->local_alloc_sem);
 		handle->flags |= OCFS_HANDLE_LOCAL_ALLOC;
 		status = ocfs_find_space_from_local(osb, bitswanted, 

Modified: branches/format-changes/src/dir.c
===================================================================
--- branches/format-changes/src/dir.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/dir.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -182,10 +182,14 @@
 
 
 /* ocfs_find_files_on_disk()
- * NOTE: this should always be called with inode->i_sem taken!
+ * NOTE: this should always be called with parent dir ip_io_sem taken!
  */
 /* parent off changed to file entry offset of parent! */
-int ocfs_find_files_on_disk (ocfs_super * osb, const char *name, int namelen, struct buffer_head ** fe_bh, struct inode *inode, struct inode *file_inode, int take_lock, struct buffer_head **dirent_bh, struct ocfs2_dir_entry **dirent)
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+                            int namelen, __u64 *fe_off,
+                            struct inode *inode, int take_lock,
+                            struct buffer_head **dirent_bh,
+                            struct ocfs2_dir_entry **dirent)
 {
 	int status = -ENOENT;
 	int tmpstat;
@@ -194,8 +198,8 @@
 	__u32 lock_type = OCFS_DLM_ENABLE_CACHE_LOCK;
 	__u64 parent_off = GET_INODE_FEOFF(inode);
 	
-	LOG_ENTRY_ARGS ("(osb=%p, parent=%llu, name='%*s', fe_bh=%p, inode=%p)\n", 
-			osb, parent_off, namelen, name, fe_bh, inode);
+	LOG_ENTRY_ARGS ("(osb=%p, parent=%llu, name='%*s', fe_off=%p, inode=%p)\n", 
+			osb, parent_off, namelen, name, fe_off, inode);
 
 	if (take_lock) {
 		/* Get a lock on the directory... */
@@ -215,12 +219,9 @@
 	if (!*dirent_bh || !*dirent)
 		goto leave;
 
-	status = ocfs_read_bh(osb, (*dirent)->inode, fe_bh, OCFS_BH_CACHED, file_inode);
-	if (status < 0) {
-		brelse(*dirent_bh);
-		LOG_ERROR_STATUS(status);
-		status = -ENOENT;
-	}
+	*fe_off = (*dirent)->inode;
+
+	status = 0;
 leave:
 
 	if (take_lock && lock_acq)

Modified: branches/format-changes/src/dir.h
===================================================================
--- branches/format-changes/src/dir.h	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/dir.h	2004-06-09 07:11:07 UTC (rev 1031)
@@ -28,10 +28,9 @@
 #define OCFS2_DIR_H
 
 int empty_dir(struct inode *inode);  /* FIXME: to namei.c */
-int ocfs_find_files_on_disk(ocfs_super *osb, const char *name, int namelen,
-			    struct buffer_head **fe_bh,
-			    struct inode *inode,
-			    struct inode *file_inode, int take_lock,
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+                            int namelen, __u64 *fe_off,
+			    struct inode *inode, int take_lock,
 			    struct buffer_head **dirent_bh,
 			    struct ocfs2_dir_entry **dirent);
 int ocfs_readdir(struct file *filp, void *dirent, filldir_t filldir);

Modified: branches/format-changes/src/dlm.c
===================================================================
--- branches/format-changes/src/dlm.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/dlm.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -215,85 +215,6 @@
 }				/* ocfs_disk_request_vote */
 
 /*
- * ocfs_wait_for_disk_lock_release()
- *
- * @osb: ocfs super block for the volume
- * @offset:
- * @time_to_wait:
- * @lock_type: lowest level to which a lock must deprecate for us to break out.
- *
- * Returns 0 of success, < 0 if error.
- */
-static int ocfs_wait_for_disk_lock_release (ocfs_super * osb, __u64 offset, __u32 time_to_wait, __u32 lock_type)
-{
-	int status = -ETIMEDOUT;
-	int tmpstat = -ETIMEDOUT;
-	__u32 timewaited = 0;
-	ocfs2_dinode *fe = NULL;
-	struct buffer_head *bh = NULL;
-	__u32 curr_master;
-	__u8 lock_level;
-
-	LOG_ENTRY ();
-
-	/* Create a sepearate thread which should  set the event of the */
-	/* resource after N retries. */
-
-	while (time_to_wait > timewaited) {
-		bh = NULL;
-		fe = NULL;
-
-		tmpstat = ocfs_read_bh (osb, offset, &bh, 0, NULL);
-		if (tmpstat < 0) {
-			LOG_ERROR_STATUS (status = tmpstat);
-			goto finally;
-		}
-		fe = OCFS_BH_GET_DATA_READ(bh);
-		curr_master = DISK_LOCK(fe)->curr_master;
-		lock_level = DISK_LOCK(fe)->file_lock;
-		OCFS_BH_PUT_DATA(bh);
-
-		/* This will always be zero when the first Node comes up after reboot */
-		/* (for volume lock) */
-		if ((curr_master == OCFS_INVALID_NODE_NUM) ||
-		    (curr_master == osb->node_num)) {
-			goto got_it;
-		}
-
-		if (!ocfs_node_is_alive(&osb->publ_map, curr_master)) {
-			/* Reset the lock as not owned and return success?? */
-			/* This needs to be under some sort of cluster wide lock */
-			fe = OCFS_BH_GET_DATA_WRITE(bh);
-			DISK_LOCK(fe)->curr_master = OCFS_INVALID_NODE_NUM;
-			DISK_LOCK(fe)->file_lock = OCFS_DLM_NO_LOCK;
-			OCFS_BH_PUT_DATA(bh);
-			tmpstat = ocfs_write_bh (osb, bh, 0, NULL);
-			if (tmpstat < 0) {
-				LOG_ERROR_STATUS (status = tmpstat);
-			}
-			goto got_it;
-		}
-
-		/* If we are here in the code it means the local node is not the master */
-		if (lock_level <= lock_type)
-			goto got_it;
-		
-		brelse(bh);
-		ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
-		timewaited += WAIT_FOR_VOTE_INCREMENT;
-		continue;
-got_it:
-		brelse(bh);
-		status = 0;
-		break;
-	}
-
-finally:
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_wait_for_disk_lock_release */
-
-/*
  * ocfs_wait_for_lock_release()
  * inode is definitely non NULL
  */
@@ -733,8 +654,8 @@
 			status = ocfs_journal_dirty(handle, *bh);
 			lockres->lock_holders++;
 #warning I hope these lock flags are alright.
-			ocfs_journal_add_lock(handle, lockres->lock_type, 0, 
-					      *bh, inode);
+			ocfs_handle_add_lock(handle, lockres->lock_type, 0, 
+					     *bh, inode, 0);
 		} else
 			status = ocfs_write_bh (osb, *bh, 0, inode);
 		if (status < 0) 

Modified: branches/format-changes/src/file.c
===================================================================
--- branches/format-changes/src/file.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/file.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -92,7 +92,7 @@
 /* 
  * ocfs_inode_notify_open()
  * 
- * you should be holding i_sem and priv_sem in this function. 
+ * you should be holding io_sem and priv_sem in this function. 
  * If needed add ourselves to the open map. Only call this 
  * on 1st open of a file. Marks the oin as "in use"
  */
@@ -196,7 +196,7 @@
 
 	/* kch - for an open request we are already given the 
 	* inode, and therefore we are given the oin too */
-	down(&inode->i_sem);
+	down (&(OCFS_I(inode)->ip_io_sem));
 	down (&(OCFS_I(inode)->priv_sem));
 	have_oin_sem = 1;
 
@@ -205,14 +205,14 @@
 		status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh, 
 				      OCFS_BH_CACHED, inode);
 		if (status < 0) {
-			up(&inode->i_sem);
+			up(&OCFS_I(inode)->ip_io_sem);
 			LOG_ERROR_STATUS(status);
 			goto leave;
 		}
 
 		status = ocfs_inode_notify_open(osb, fe_bh, NULL, inode);
 		if (status < 0) {
-			up(&inode->i_sem);
+			up(&OCFS_I(inode)->ip_io_sem);
 			LOG_ERROR_STATUS(status);
 			if (status != -EINTR) {
 				LOG_ERROR_ARGS("Open request made for nonexistent "
@@ -226,7 +226,7 @@
 
 		status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
 		if (status < 0) {
-			up(&inode->i_sem);
+			up(&OCFS_I(inode)->ip_io_sem);
 			LOG_ERROR_STATUS(status);
 			goto leave;
 		}
@@ -236,13 +236,13 @@
 		status = ocfs_verify_update_inode (osb, inode, &truncate_pages,
 						   0);
 		if (status < 0) {
-			up(&inode->i_sem);
+			up(&OCFS_I(inode)->ip_io_sem);
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 	}
 
-	up(&inode->i_sem);
+	up(&OCFS_I(inode)->ip_io_sem);
 	/* yes, hold onto priv_sem. */
 
 	if (OCFS_I(inode)->open_hndl_cnt > 0) {
@@ -662,8 +662,8 @@
 		if (status < 0) {
 			ocfs_abort_trans(handle);
 		} else {
-			ocfs_journal_add_lock(handle, locktype, lockFlags, 
-					      bh, inode);
+			ocfs_handle_add_lock(handle, locktype, lockFlags, 
+					     bh, inode, 0);
 			have_disk_lock = 0;
 
 			ocfs_commit_trans(handle);
@@ -762,10 +762,12 @@
 
 	if (OCFS_I(inode)->needs_verification) {
 		LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
+		down (&(OCFS_I(inode)->ip_io_sem));
 		down (&(OCFS_I(inode)->priv_sem));
 		status = ocfs_verify_update_inode (osb, inode, &needs_trunc, 
 						   0);
 		up (&(OCFS_I(inode)->priv_sem));
+		up (&(OCFS_I(inode)->ip_io_sem));
 		if (needs_trunc)
 			ocfs_truncate_inode_pages(inode, 0);
 		if (status < 0) {
@@ -802,8 +804,9 @@
 		LOG_TRACE_ARGS
 		    ("Will need more allocation: have=%llu, need=%llu\n",
 		     OCFS_I(inode)->alloc_size, newsize);
-
+		down(&OCFS_I(inode)->ip_io_sem);
 		status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode, NULL);
+		up(&OCFS_I(inode)->ip_io_sem);
 		if (status < 0) {
 			if (status != -EINTR && status != -ENOSPC) {
 				LOG_ERROR_STATUS (status);
@@ -890,12 +893,12 @@
 
 	if (OCFS_I(inode)->needs_verification) {
 		/* yay, locking hell! */
-		down(&inode->i_sem);
+		down(&OCFS_I(inode)->ip_io_sem);
 		down (&(OCFS_I(inode)->priv_sem));
 		status = ocfs_verify_update_inode (osb, inode, &needs_trunc, 
 						   0);
 		up (&(OCFS_I(inode)->priv_sem));
-		up(&inode->i_sem);
+		up(&OCFS_I(inode)->ip_io_sem);
 		if (needs_trunc)
 			ocfs_truncate_inode_pages(inode, 0);
 		if (status < 0) {
@@ -1128,7 +1131,6 @@
 	OCFS_BH_PUT_DATA(bh);
 	fe = NULL;
 
-
 	if (passed_handle == NULL) {
 		credits = ocfs_calc_extend_credits(((__u32) alloc_size), 
 						   osb->vol_layout.cluster_size);
@@ -1296,8 +1298,8 @@
 			} else {
 				lockFlags |= FLAG_FILE_UPDATE_OIN;
 
-				ocfs_journal_add_lock(handle, locktype,
-						      lockFlags, bh, inode);
+				ocfs_handle_add_lock(handle, locktype,
+						     lockFlags, bh, inode, 0);
 				have_disk_lock = 0;
 
 				ocfs_commit_trans(handle);
@@ -1348,10 +1350,7 @@
 
 	osb = OCFS_SB(inode->i_sb);
 
-	/* NOTE: Other filesystems get away without locking this, but
-	 * we're clustered and this has to hit disk now... */
-	if (!(attr->ia_valid & ATTR_SIZE))
-		down(&inode->i_sem);
+	down(&OCFS_I(inode)->ip_io_sem);
 
 	if (!dentry->d_parent || !dentry->d_parent->d_inode) {
 		LOG_ERROR_STR ("bad inode or root inode");
@@ -1452,8 +1451,7 @@
 	inode_setattr (inode, attr);
 
 bail:
-	if (!(attr->ia_valid & ATTR_SIZE))
-		up(&inode->i_sem);
+	up(&OCFS_I(inode)->ip_io_sem);
 
 #ifndef BH_SEM_LEAK_CHECKING
 	if (error < 0)

Modified: branches/format-changes/src/hash.c
===================================================================
--- branches/format-changes/src/hash.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/hash.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -263,6 +263,7 @@
 #ifdef VERBOSE_BH_SEM
 		LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
 #endif
+		LOG_ERROR_ARGS("Uhoh, read lock wanted on modified buffer! (pid=%d, block=%lu)\n", sem->s_pid, bh->b_blocknr);
 		ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
 	} else {
 #ifdef VERBOSE_BH_SEM
@@ -336,8 +337,9 @@
 			/* refcount as if it weren't modified */
 			ocfs_bh_sem_get(sem);
 		} else if (sem->s_pid != current->pid) {
-			LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
+//			LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
 			ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
+			LOG_ERROR_ARGS("Uhoh, write lock wanted on modified buffer! (pid=%d, block=%lu)\n", sem->s_pid, bh->b_blocknr);
 		}
 	} else {
 		//LOG_TRACE_ARGS("buffer NOT modified\n");

Modified: branches/format-changes/src/inode.c
===================================================================
--- branches/format-changes/src/inode.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/inode.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -68,6 +68,16 @@
 
 extern struct semaphore recovery_list_sem;
 
+typedef struct _ocfs_find_inode_args
+{
+	__u64 feoff;
+	unsigned long ino;
+	__u32 flags;
+}
+ocfs_find_inode_args;
+
+#define OCFS_FIND_INODE_FLAG_SYSFILE              0x00000002
+
 static int ocfs_readpage (struct file *file, struct page *page);
 static int ocfs_prepare_write (struct file *file, struct page *page, unsigned from, unsigned to);
 static int ocfs_commit_write (struct file *file, struct page *page, unsigned from, unsigned to);
@@ -180,25 +190,16 @@
 
 /* 
  * ocfs_iget()
- *
- * Not all fields are required, pick your poison:
- *   * fe_bh only -- voteoff and feoff should both be zero then.
- *   * voteoff and feoff -- fe_bh can be NULL. 
- *     If AND ONLY IF the inode has no file entry (as in the main bitmap), 
- *     are you allowed to have feoff = 0.
- * 
- *   If you give me both, I'll prefer fe_bh.
+ * feoff is *required*
  */
-struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff, 
-			struct buffer_head *fe_bh)
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff)
 {
 	struct inode *inode = NULL;
 	struct super_block *sb = osb->sb;
-	ocfs2_dinode *fe;
 	ocfs_find_inode_args args;
 	__u32 flags = 0;
 
-	LOG_ENTRY_ARGS("(feoff = %llu, fe_bh = %p)\n", feoff, fe_bh);
+	LOG_ENTRY_ARGS("(feoff = %llu)\n", feoff);
 
 	/* Shortcut: if they ask for the root dirnode, just return
 	 * it. */
@@ -228,22 +229,6 @@
 		goto bail;
 	}
 
-	/* Ok, lets try to be smart here. We need a very specific set
-	 * of arguments to get our inode. Figure these out from the
-	 * available data. */
-	if (fe_bh) {
-		/* best case -- we can figure out what we need from
-		 * the file entry! */
-		fe = OCFS_BH_GET_DATA_READ(fe_bh);
-		if (!IS_VALID_FILE_ENTRY(fe)) {
-			OCFS_BH_PUT_DATA(fe_bh);
-			LOG_ERROR_STATUS(-EINVAL);
-			goto bail;
-		}
-		feoff = fe->i_blkno << osb->sb->s_blocksize_bits;
-		OCFS_BH_PUT_DATA(fe_bh);
-	}
-
 	/* Ok. By now we've either got the offsets passed to us by the
 	 * caller, or we just pulled them off the bh. Lets do some
 	 * sanity checks to make sure they're OK. */
@@ -257,7 +242,6 @@
 		flags |= OCFS_FIND_INODE_FLAG_SYSFILE;
 
 	args.feoff = feoff;
-	args.fe_bh = fe_bh;
 	args.flags = flags;
 	args.ino = ino_from_off(sb, feoff);
 
@@ -319,7 +303,6 @@
 {
 	ocfs_find_inode_args *args = NULL;
 	int ret = 0;
-	ocfs2_dinode *fe = NULL;
 
 	LOG_ENTRY_ARGS ("(0x%p, %lu, 0x%p)\n", inode, ino, opaque);
 	
@@ -337,8 +320,6 @@
 
 	ret = 1;
 bail:
-	if (fe)
-		OCFS_BH_PUT_DATA(args->fe_bh);
 	LOG_EXIT_INT (ret);
 	return ret;
 }				/* ocfs_find_inode */
@@ -371,8 +352,11 @@
 	i->open_hndl_cnt = 0;
 	ocfs_extent_map_init (&i->map);
 	INIT_LIST_HEAD(&i->recovery_list);
-	INIT_LIST_HEAD(&i->handle_list);
+	INIT_LIST_HEAD(&i->ip_handle_list);
+	i->ip_handle = NULL;
 
+	init_MUTEX(&i->ip_io_sem);
+
 	/* These should be set in read_inode2. */
 	i->alloc_size = 0ULL;
 	i->feoff = 0ULL;
@@ -546,19 +530,15 @@
 	feoff = args->feoff;
 	sysfile = (args->flags & OCFS_FIND_INODE_FLAG_SYSFILE);
 
-	/* Uhoh, they didn't give us a buffer. Read the FE off
-	 * disk. This is safe because the kernel only does one
-	 * read_inode2 for a new inode, and if it doesn't exist yet
-	 * then nobody can be working on it! */
-	if (!args->fe_bh) {
-		status = ocfs_read_bh(osb, args->feoff, &bh, 0, NULL);
-		if (status < 0) {
-			LOG_ERROR_STATUS(status);
-			make_bad_inode (inode);
-			goto bail;
-		}
-	} else
-		bh = args->fe_bh;
+	/* Read the FE off disk. This is safe because the kernel only
+	 * does one read_inode2 for a new inode, and if it doesn't
+	 * exist yet then nobody can be working on it! */
+	status = ocfs_read_bh(osb, args->feoff, &bh, 0, NULL);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		make_bad_inode (inode);
+		goto bail;
+	}
 
 	fe = OCFS_BH_GET_DATA_READ(bh);
 
@@ -580,7 +560,7 @@
 	if (fe)
 		OCFS_BH_PUT_DATA(bh);
 
-	if (args && !args->fe_bh && bh)
+	if (args && bh)
 		brelse(bh);
 
 	LOG_EXIT ();
@@ -604,7 +584,6 @@
 {
 	ocfs_find_inode_args *args = NULL;
 	int ret = 0;
-	ocfs2_dinode *fe = NULL;
 
 	LOG_ENTRY_ARGS ("(0x%p, %lu, %llu, 0x%p)\n", inode, inode->i_ino, GET_INODE_FEOFF(inode), opaque);
 
@@ -624,8 +603,6 @@
 
 	ret = 1;
 bail:
-	if (fe)
-		OCFS_BH_PUT_DATA(args->fe_bh);
 	LOG_EXIT_INT (ret);
 	return ret;
 }				/* ocfs_find_actor */
@@ -696,7 +673,7 @@
 	osb = OCFS_SB(inode->i_sb);
 
 	if (!inode->u.generic_ip) {
-		LOG_ERROR_ARGS("inode %llu has no generic_ip!\n", GET_INODE_FEOFF(inode));
+		LOG_ERROR_ARGS("inode %lu has no generic_ip!\n", inode->i_ino);
 		goto bail;
 	}
 
@@ -1733,7 +1710,7 @@
 
 	osb = OCFS_SB(inode->i_sb);
 
-	down (&inode->i_sem);
+	down (&(OCFS_I(inode)->ip_io_sem));
 	down (&(OCFS_I(inode)->priv_sem));
 
 	if (INODE_DELETED(inode)) {
@@ -1762,7 +1739,7 @@
 
 bail:
 	up (&(OCFS_I(inode)->priv_sem));
-	up (&inode->i_sem);
+	up (&(OCFS_I(inode)->ip_io_sem));
 
 	if (needs_trunc)
 		ocfs_truncate_inode_pages(inode, 0);

Modified: branches/format-changes/src/inode.h
===================================================================
--- branches/format-changes/src/inode.h	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/inode.h	2004-06-09 07:11:07 UTC (rev 1031)
@@ -33,8 +33,7 @@
 			       struct inode * inode, int block,
 			       int create, int *err, int reada);
 void ocfs_clear_inode(struct inode *inode);
-struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff, 
-			struct buffer_head *fe_bh);
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
 int ocfs_inode_init_private(struct inode *inode);
 int ocfs_inode_revalidate(struct dentry *dentry);
 void ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,

Modified: branches/format-changes/src/journal.c
===================================================================
--- branches/format-changes/src/journal.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/journal.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -56,23 +56,6 @@
 	TRANS_CACHE
 } release_locks_action;
 
-/*  
- *  Trans Lock:
- *  Right now OCFS2 only supports a single transaction at a
- *  time. Transactions are locked out by using trans_lock. 
- */
-#define ocfs_take_trans_lock(osb) 					\
-	do {								\
-		down(&osb->trans_lock);					\
-	} while (0)
-
-#define ocfs_release_trans_lock(osb) 					\
-	do {								\
-		up (&osb->trans_lock);					\
-	} while (0)
-
-static int ocfs_checkpoint_handle(ocfs_journal_handle *handle);
-static int ocfs_revoke_handle(ocfs_journal_handle *handle);
 static int ocfs_reset_publish (ocfs_super * osb, __u64 node_num);
 static int ocfs_journal_release_locks(ocfs_journal_handle *handle, release_locks_action action);
 static int ocfs_force_read_journal(ocfs_super *osb, __u64 size, 
@@ -81,6 +64,46 @@
 static int __ocfs_recovery_thread(void *arg);
 static int ocfs_commit_cache (ocfs_super * osb, int data_flush);
 
+/* 
+ * JBD in 2.4 kernels has a bug in that it doesn't do any locking of
+ * the t_updates transaction variable. If we don't serialize calls to
+ * journal_start/journal_stop, then it can get way out of whack,
+ * resulting in either a crash or a lockup. As far as I can tell, they
+ * never hit this bug in ext3 because those calls somehow manage to
+ * get serialized. I wish I didn't have to use lock_kernel here, but
+ * we actually want the "drop on sleep" behavior which we can't get
+ * with any other lock.
+ * 
+ * 2.6 does it the right way by spinlocking around it's structures.
+ * 
+ * These two should be moved to compat.h when it exists. 
+ */
+static inline handle_t *ocfs_journal_start(journal_t *journal, int nblocks)
+{
+	handle_t * h;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+	lock_kernel();
+#endif
+	h = journal_start(journal, nblocks);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+	unlock_kernel();
+#endif
+	return(h);
+}
+
+static inline int ocfs_journal_stop(handle_t *handle)
+{
+	int status;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+	lock_kernel();
+#endif
+	status = journal_stop(handle);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+	unlock_kernel();
+#endif
+	return(status);
+}
+
 /* DO NOT EVER CALL THIS FUNCTION WITH A LOCKED BUFFER HEAD! */
 ocfs_journal_handle * ocfs_start_trans(ocfs_super *osb, int max_buffs) 
 {
@@ -92,9 +115,11 @@
 	if (!osb || !osb->journal->k_journal)
 		BUG();
 
-	/* for now, we only do one transaction at a time. Eventually
-	 * this and trans_in_progress need to be replaced. */
-	ocfs_take_trans_lock(osb);
+	/* JBD might support this, but our journalling code doesn't yet. */
+	if (journal_current_handle()) {
+		LOG_ERROR_STR("Recursive transaction attempted!");
+		BUG();
+	}
 
 	retval = ocfs_malloc(sizeof(*retval));
 	if (!retval) {
@@ -130,9 +155,13 @@
 	retval->osb = osb;
 	retval->commit_bits = NULL;
 
+	down_read(&osb->journal->trans_barrier);
+
 	/* actually start the transaction now */
-	retval->k_handle = journal_start(journal, max_buffs);
+	retval->k_handle = ocfs_journal_start(journal, max_buffs);
 	if (IS_ERR(retval->k_handle)) {
+		up_read(&osb->journal->trans_barrier);
+
 		LOG_ERROR_STR("journal_start() failed!");
 		LOG_ERROR_STATUS((int)PTR_ERR(retval->k_handle));
 		retval->k_handle = NULL;
@@ -141,10 +170,6 @@
 
 	atomic_inc(&(osb->journal->num_trans));
 
-	down(&osb->journal->commit_sem);
-	osb->journal->curr = retval;
-	up(&osb->journal->commit_sem);
-
 	/* default handle flags! */
 	ocfs_handle_set_sync(retval, 1);
 	ocfs_handle_set_checkpoint(retval, 1);
@@ -153,7 +178,6 @@
 	return(retval);
 
 done_free:
-	ocfs_release_trans_lock(osb);
 
 	if (retval) {
 		if (retval->buffs)
@@ -164,93 +188,6 @@
 	return(NULL);
 }  /*  ocfs_start_trans  */
 
-#define OCFS_JOURNAL_CHECKPOINT_RETRIES 3
-static int ocfs_checkpoint_handle(ocfs_journal_handle *handle) 
-{
-	int retval = 0;
-	ocfs_super *osb = NULL;
-	int i;
-
-	LOG_ENTRY();
-
-	osb = handle->osb;
-
-	if (!handle->num_buffs)
-		goto done;
-
-	/* Try up to 3 times to checkpoint the handle */
-	for (i = 0; i < OCFS_JOURNAL_CHECKPOINT_RETRIES; i++) {
-		retval = ocfs_write_bhs(osb, handle->buffs, handle->num_buffs, 
-					OCFS_BH_IGNORE_JBD, NULL);
-		if (retval < 0)
-			LOG_ERROR_STATUS(retval);
-		else
-			break;
-	}
-
-done:
-
-	LOG_EXIT_STATUS(retval);
-	return(retval);
-}
-
-/* This function expects the journal handle to have already been
- * commited to disk (and it'd better have been checkpointed too!)
- *
- * Tells JBD to revoke these buffers from the on disk journal so if we
- * crash later, they won't be replayed.
- * 
- * The call to journal_revoke does a brelse. It also winds up removing
- * the journal_head from the buffer, and therefore the JBD bit is no
- * longer set. We do a get_bh before calling journal_revoke so that
- * the count doesn't change.
- */
-static int ocfs_revoke_handle(ocfs_journal_handle *handle) 
-{
-	int retval = 0;
-	struct buffer_head *bh;
-	handle_t *new_handle = NULL;
-	ocfs_journal *journal;
-	int i;
-
-	LOG_ENTRY();
-
-	journal = handle->journal;
-	
-	if (!handle->num_buffs)
-		goto done;
-
-	new_handle = journal_start(journal->k_journal, handle->num_buffs);
-	if (IS_ERR(new_handle)) {
-		retval = PTR_ERR(new_handle);
-		new_handle = NULL;
-		LOG_ERROR_STATUS(retval);
-		LOG_ERROR_STR("Could not start revoke transaction!");
-		goto done;
-	}
-
-	new_handle->h_sync = 1;
-
-	for(i = 0; i < handle->num_buffs; i++) {
-		bh = handle->buffs[i];
-
-		get_bh(bh); /* want to keep this around after the revoke */
-
-		retval = journal_revoke(new_handle, bh->b_blocknr, bh);
-		if (retval < 0) {
-			LOG_ERROR_STR("Could not revoke buffer!");
-			goto done;
-		}
-	}
-
-done:
-	if (new_handle)
-		journal_stop(new_handle);
-
-	LOG_EXIT_STATUS(retval);
-	return(retval);
-}
-
 void ocfs_handle_add_inode(ocfs_journal_handle *handle, struct inode *inode)
 {
 	if (!handle)
@@ -259,22 +196,31 @@
 	if (!inode)
 		BUG();
 
-	if (!list_empty(&OCFS_I(inode)->handle_list)) {
+	if (OCFS_I(inode)->ip_handle == handle) {
+		/* sanity check */
+		if (list_empty(&OCFS_I(inode)->ip_handle_list))
+			BUG();
+
 		/* I think this can happen to the main bitmap inode if
 		 * we extend a regular file and also have to extend a
 		 * system file in the same transaction */
-		LOG_ERROR_ARGS("Inode %lu already has a transaction!\n",
+		LOG_TRACE_ARGS("Inode %lu already added to transaction!\n",
 			       inode->i_ino);
 		return;
 	}
 
 	atomic_inc(&inode->i_count);
 
-	down(&inode->i_sem);
+	down(&OCFS_I(inode)->ip_io_sem);
 
-	list_del(&(OCFS_I(inode)->handle_list));
-	list_add_tail(&(OCFS_I(inode)->handle_list), &(handle->inode_list));
+	/* sanity check */
+	if (OCFS_I(inode)->ip_handle)
+		BUG();
 
+	OCFS_I(inode)->ip_handle = handle;
+	list_del(&(OCFS_I(inode)->ip_handle_list));
+	list_add_tail(&(OCFS_I(inode)->ip_handle_list), &(handle->inode_list));
+
 	return;
 }
 
@@ -285,21 +231,22 @@
 	ocfs_inode_private *ip;
 
 	list_for_each_safe(p, n, &handle->inode_list) {
-		ip = list_entry(p, ocfs_inode_private, handle_list);
+		ip = list_entry(p, ocfs_inode_private, ip_handle_list);
 		inode = ip->inode;
 
-		list_del(&OCFS_I(inode)->handle_list);
-		INIT_LIST_HEAD(&OCFS_I(inode)->handle_list);
+		OCFS_I(inode)->ip_handle = NULL;
+		list_del(&OCFS_I(inode)->ip_handle_list);
+		INIT_LIST_HEAD(&OCFS_I(inode)->ip_handle_list);
 
-		up(&inode->i_sem);
+		up(&OCFS_I(inode)->ip_io_sem);
 		iput(inode);
 	}
 	return;
 }
 
-/* This does no locking of the handle, so make sure that the handle
- * isn't on journal->curr. If the handle is on journal->commited, then
- * you want to be holding the commit_sem before calling this. */
+/* This does no locking of the handle. If the handle is on
+ * journal->commited, then you want to be holding the commit_sem
+ * before calling this. */
 static int ocfs_journal_release_locks(ocfs_journal_handle *handle, 
 				      release_locks_action action)
 {
@@ -313,9 +260,6 @@
 
 	osb = handle->osb;
 
-	if (osb->journal->curr == handle)
-		BUG();
-
 	LOG_TRACE_ARGS("num_locks = %d\n", handle->num_locks);
 
 	list_for_each_safe(p, n, &(handle->locks)) {
@@ -325,13 +269,12 @@
 			BUG();
 
 		/* The cache list holds unlocked inodes */
-		if (action == TRANS_CACHE)
-			down(&lock->inode->i_sem);	
+		if (action == TRANS_CACHE || lock->req_io_sem)
+			down(&OCFS_I(lock->inode)->ip_io_sem);
 
 		/* The file may have been deleted before we got to
 		 * this lock release. If so, just skip it.  */
-		if ((!lock->inode)
-		    || (lock->inode && !INODE_DELETED(lock->inode))) {
+		if (!INODE_DELETED(lock->inode)) {
 
 			tmpstat = ocfs_release_lock(osb, 
 						    GET_INODE_FEOFF(lock->inode),
@@ -349,13 +292,12 @@
 			}
 		}
 
-		if (action == TRANS_CACHE)
-			up(&lock->inode->i_sem);
+		if (action == TRANS_CACHE || lock->req_io_sem)
+			up(&OCFS_I(lock->inode)->ip_io_sem);
 
 		if (lock->bh != NULL)
 			brelse(lock->bh);
-		if (lock->inode)
-			iput(lock->inode);
+		iput(lock->inode);
 		list_del(&(lock->lock_list));
 		handle->num_locks--;
 		kfree(lock);
@@ -410,6 +352,21 @@
 	}								\
 } while (0)
 
+static inline int ocfs_journal_flush(ocfs_journal *journal) 
+{
+	int retval;
+
+	down_write(&journal->trans_barrier);
+	journal_lock_updates(journal->k_journal);
+
+	retval = journal_flush(journal->k_journal);
+
+	journal_unlock_updates(journal->k_journal);
+	up_write(&journal->trans_barrier);
+
+	return(retval);
+}
+
 /*
  * ocfs_commit_trans
  */
@@ -447,15 +404,31 @@
 	else
 		kern_handle->h_sync = 0;
 
+	/* Ok, we're done changing these buffers now... */
+	for(i = 0; i < handle->num_buffs; i++)
+		ocfs_clear_buffer_modified(handle->buffs[i]);
+
+	/* release inode semaphores we took during this transaction */
+	ocfs_handle_unlock_inodes(handle);
+	if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC)
+		up(&osb->local_alloc_sem);
+
 	/* actually stop the transaction. if we've set h_sync,
 	 * it'll have been commited when we return */
-	retval = journal_stop(kern_handle);
+	retval = ocfs_journal_stop(kern_handle);
 	if (retval < 0) {
 		LOG_ERROR_STATUS(retval);
 		LOG_ERROR_STR("Could not commit transaction");
 		BUG();
 	}
 
+	/* in the checkpoint case we num_trans as there's nothing for
+	 * the commit thread to do on our behalf. */
+	if (checkpoint)
+		atomic_dec(&(osb->journal->num_trans));
+
+	up_read(&journal->trans_barrier);
+
 	handle->k_handle = NULL; /* it's been free'd in journal_stop */
 
 	/* In the future we'll try to queue up as many
@@ -463,41 +436,31 @@
 	 * will checkpoint and revoke everything from that
 	 * transaction. */
 	if (checkpoint) {
-		/* checkpoint from buffer_head list */
-		retval = ocfs_checkpoint_handle(handle);
-		if (retval < 0) {
-			LOG_ERROR_STR("Could not checkpoint transaction!");
-			BUG();
-		}
-
-		/* revoke from buffer_head list, commit revoke records */
-		retval = ocfs_revoke_handle(handle);
-		if (retval < 0) {
-			LOG_ERROR_STR("Could not completely revoke "
-				      "transaction!");
-			BUG();
-		}
-	} else { 
-		/* If we're not checkpointing, we have to be careful
-		 * to also clear the modified bits. */
-		for(i = 0; i < handle->num_buffs; i++)
-			ocfs_clear_buffer_modified(handle->buffs[i]);
+		retval = ocfs_journal_flush(journal);
+		if (retval < 0)
+			LOG_ERROR_STATUS(retval);
 	}
 
-/* done: */
+	/* Do the next few steps before we put the handle on any lists
+	 * where it might be freed! */
 	for(i = 0; i < handle->num_buffs; i++) {
 		brelse(handle->buffs[i]);
 		handle->buffs[i] = NULL;
 	}
 	handle->num_buffs = 0;
+	if (handle->buffs) {
+		kfree(handle->buffs);
+		handle->buffs = NULL;
+	}
 
-	down(&journal->commit_sem);
-	journal->curr = NULL;
+	/* At this point, we don't need the copyout buffers. */
+	ocfs_handle_free_all_copyout(handle);
 
-	if (checkpoint) {
-		up(&journal->commit_sem);
-		atomic_dec(&(osb->journal->num_trans));
+	commit_head = handle->commit_bits;
+	handle->commit_bits = NULL;
 
+/* done: */
+	if (checkpoint) {
 		/* Release locks associated with this handle. */
 		retval = ocfs_journal_release_locks(handle, TRANS_COMMIT);
 		if (retval < 0)
@@ -506,37 +469,17 @@
 		/* If we're not going to checkpoint the handle on
 		 * commit then we need to add it to our journals list
 		 * so it can be done later */
+		down(&journal->commit_sem);
 		list_add_tail(&(handle->h_list), &(journal->commited));
 		osb->needs_flush = 1;
 		up(&journal->commit_sem);
+		/* Ok, any references to the handle after this are
+		 * unsafe as it might be processed (and free'd from
+		 * memory) by the commit thread! */
 	}
 
-	/* At this point, we don't need the copyout buffers. */
-	ocfs_handle_free_all_copyout(handle);
-
-	/* we don't free the kernel handle because jbd has freed it. */
-	if (handle->buffs) {
-		kfree(handle->buffs);
-		handle->buffs = NULL;
-	}
-
-	/* save off while we still have trans lock */
-	commit_head = handle->commit_bits;
-	handle->commit_bits = NULL;
-
-	/* release inode semaphores we took during this transaction */
-	ocfs_handle_unlock_inodes(handle);
-	if (handle->flags | OCFS_HANDLE_LOCAL_ALLOC)
-		up(&osb->local_alloc_sem);
-
-	/* This has to happen after we release the other locks. */
-	ocfs_release_trans_lock(osb);
-
-	if (commit_head && (retval == 0)) {
-		if (!sync)
-			BUG();
+	if (commit_head && (retval == 0))
 		ocfs_process_bitmap_free_head(osb, commit_head);
-	}
 	ocfs_free_bitmap_free_head(commit_head);
 
 	if (checkpoint)
@@ -638,28 +581,33 @@
 		}
 	}
 
+	for(i = 0; i < handle->num_buffs; i++)
+		ocfs_clear_buffer_modified(handle->buffs[i]);
+
+	/* release inode semaphores we took during this transaction */
+	ocfs_handle_unlock_inodes(handle);
+	if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC)
+		up(&osb->local_alloc_sem);
+
 	/* done copying them, free it now. */
 	ocfs_handle_free_all_copyout(handle);
 
 	/* want to force our handle to disk in abort case. */
 	handle->k_handle->h_sync = 1;
 
-	retval = journal_stop(handle->k_handle);
+	retval = ocfs_journal_stop(handle->k_handle);
 	if (retval < 0) {
 		LOG_ERROR_STR("Could not commit aborted transaction!");
 		LOG_ERROR_STATUS(retval);
 	}
+	atomic_dec(&(osb->journal->num_trans));
 
+	up_read(&journal->trans_barrier);
+
 	handle->k_handle = NULL;
 
-	atomic_dec(&(osb->journal->num_trans));
 
 /* done: */
-
-	down(&osb->journal->commit_sem);
-	osb->journal->curr = NULL;
-	up(&osb->journal->commit_sem);
-
 	if (handle->num_buffs) {
 		/* Ok, we now want to fill our buffers with the older (but
 		 * valid) data, instead of leaving them with the aborted
@@ -667,31 +615,19 @@
 		 * transactions in the journal so that we know that disk
 		 * reflects the latest correct blocks. After that, we just
 		 * repopulate the buffers from disk. */
-		journal_lock_updates(journal->k_journal);
-		retval = journal_flush(journal->k_journal);
-		journal_unlock_updates(journal->k_journal);
+		retval = ocfs_journal_flush(journal);
 		if (retval < 0)
 			LOG_ERROR_STATUS(retval);
 	}
 
-	for(i = 0; i < handle->num_buffs; i++) {
-		ocfs_clear_buffer_modified(handle->buffs[i]);
+	for(i = 0; i < handle->num_buffs; i++)
 		brelse(handle->buffs[i]);
-	}
 
 	/* drop locks associated with the handle here. */
 	retval = ocfs_journal_release_locks(handle, TRANS_ABORT);
 	if (retval < 0)
 		LOG_ERROR_STATUS(retval);
 
-	/* release inode semaphores we took during this transaction */
-	ocfs_handle_unlock_inodes(handle);
-	if (handle->flags | OCFS_HANDLE_LOCAL_ALLOC)
-		up(&osb->local_alloc_sem);
-
-	/* This has to happen after we release the other locks. */
-	ocfs_release_trans_lock(osb);
-
 	/* Should only be processed in commit. */
 	ocfs_free_bitmap_free_head(handle->commit_bits);
 
@@ -882,8 +818,9 @@
 /* We are expecting to be run on the current running transaction, so
  * we use the spin_lock here. You really shouldn't be calling this on
  * other transactions anyway... */
-void ocfs_journal_add_lock(ocfs_journal_handle *handle, __u32 type, __u32 flags, 
-			   struct buffer_head *bh, struct inode *inode) 
+void ocfs_handle_add_lock(ocfs_journal_handle *handle, __u32 type, 
+			  __u32 flags, struct buffer_head *bh, 
+			  struct inode *inode, int req_io_sem) 
 {
 	ocfs_journal_lock *lock;
 
@@ -902,13 +839,13 @@
 	lock->flags = flags;
 	lock->bh    = bh;
 	lock->inode = inode;
+	lock->req_io_sem  = req_io_sem;
 
 	if (bh)
 		get_bh(bh);
-	
-	if (inode)
-		atomic_inc(&inode->i_count);
 
+	atomic_inc(&inode->i_count);
+
 	spin_lock(&handle->list_lock);
 	list_add_tail(&(lock->lock_list), &(handle->locks));
 	handle->num_locks++;
@@ -943,7 +880,7 @@
 		  osb->vol_layout.root_int_off;
 
 	/* Ok, look up the inode for our journal */
-	inode = ocfs_iget(osb, lock_id, NULL);
+	inode = ocfs_iget(osb, lock_id);
 	if (inode == NULL) {
 		LOG_ERROR_STR("access error");
 		status = -EACCES;
@@ -956,12 +893,18 @@
 		status = -EACCES;
 		goto done;
 	}
+
+	down(&OCFS_I(inode)->ip_io_sem);
+
+
 	SET_INODE_JOURNAL(inode);
 
 	/* TODO: Use another type of lock. */
 	status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK,
 				    FLAG_FILE_CREATE, &bh, inode);
 	if (status < 0) {
+		up(&OCFS_I(inode)->ip_io_sem);
+
 		if (status != -EINTR)
 			LOG_ERROR_STR("Could not get lock on journal!");
 		goto done;
@@ -979,6 +922,8 @@
 	if (status < 0) {
 		OCFS_BH_PUT_DATA(bh);
 		fe = NULL;
+		up(&OCFS_I(inode)->ip_io_sem);
+
 		goto done;
 	}
 
@@ -1001,6 +946,8 @@
 					DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF, 
 					&bh, inode, NULL);
 	if (status < 0) {
+		up(&OCFS_I(inode)->ip_io_sem);
+
 		LOG_ERROR_STATUS(status);
 		goto done;
 	}
@@ -1008,6 +955,8 @@
 	LOG_TRACE_ARGS("inode->alloc_size = %llu\n", 
 		       OCFS_I(inode)->alloc_size);
 
+	up(&OCFS_I(inode)->ip_io_sem);
+
 	/* call the kernels journal init function now */
 	k_journal = journal_init_inode(inode);
 	if (k_journal == NULL) {
@@ -1030,7 +979,9 @@
 	osb->journal->lockbh = bh;
 	osb->journal->lock_id = lock_id;
 	atomic_set(&(osb->journal->num_trans), 0);
+	init_rwsem(&(osb->journal->trans_barrier));
 	osb->journal->state = OCFS_JOURNAL_LOADED;
+
 	status = 0;
 done:
 	if (status < 0) {
@@ -1039,13 +990,15 @@
 				OCFS_BH_PUT_DATA(bh);
 			brelse(bh);
 		}
-		if (inode)
+		if (inode) {
 			OCFS_I(inode)->open_hndl_cnt--;
+			iput(inode);
+		}
 	}
 
 	LOG_EXIT_STATUS(status);
 	return(status);
-}
+} /* ocfs_journal_init */
 
 /*
   if the journal has been ocfs_malloc'd it needs to be freed after this call.
@@ -1083,9 +1036,7 @@
 	 * release any locks that are still held.
 	 * set the SHUTDOWN flag and release the trans lock.
 	 * the commit thread will take the trans lock for us below. */
-	down(&osb->trans_lock);
 	journal->state = OCFS_JOURNAL_IN_SHUTDOWN;
-	up(&osb->trans_lock);
 
 	/* wake the commit thread */
 	atomic_set (&osb->flush_event_woken, 1);
@@ -1108,20 +1059,22 @@
 
 	OCFS_I(inode)->open_hndl_cnt--;
 
+	down(&OCFS_I(inode)->ip_io_sem);
 	/* unlock our journal */
 	status = ocfs_release_lock (osb, journal->lock_id,
 				    OCFS_DLM_EXCLUSIVE_LOCK,
 				    FLAG_FILE_CREATE, 
 				    journal->lockbh, inode);
+	up(&OCFS_I(inode)->ip_io_sem);
 	if (status < 0)
 		LOG_ERROR_STATUS (status);
-	
+
 	brelse (journal->lockbh);
 	journal->lockbh = NULL;
 
 	journal->state = OCFS_JOURNAL_FREE;
 
-	up (&osb->trans_lock);
+//	up_write(&journal->trans_barrier);
 done:
 	if (inode)
 		iput(inode);
@@ -1381,7 +1334,7 @@
 		+ osb->vol_layout.root_int_off;
 
 	/* Ok, look up the inode for our journal */
-	inode = ocfs_iget(osb, lock_id, NULL);
+	inode = ocfs_iget(osb, lock_id);
 	if (inode == NULL) {
 		LOG_ERROR_STR("access error");
 		status = -EACCES;
@@ -1394,6 +1347,9 @@
 		status = -EACCES;
 		goto done;
 	}
+
+	down(&OCFS_I(inode)->ip_io_sem);
+
 	SET_INODE_JOURNAL(inode);
 
 	/* Should not ever be called to recover ourselves -- in that
@@ -1405,6 +1361,8 @@
 				    OCFS_DLM_EXCLUSIVE_LOCK,
 				    FLAG_FILE_CREATE|FLAG_FILE_RECOVERY, 
 				    &bh, inode);
+
+	up(&OCFS_I(inode)->ip_io_sem);
 	if (status < 0) {
 		LOG_TRACE_ARGS("status returned from acquire_lock=%d\n", 
 			       status);
@@ -1433,9 +1391,12 @@
 	OCFS_I(inode)->alloc_size = alloc_size;
 
 	/* add this node to openmap and update disk lock */
+	down(&OCFS_I(inode)->ip_io_sem);
+
 	status = ocfs_update_disk_lock (osb, 
 					DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF, 
 					&bh, inode, NULL);
+	up(&OCFS_I(inode)->ip_io_sem);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto done;
@@ -1501,11 +1462,15 @@
 		up(&(osb->recovery_lock));
 
 	/* drop the lock on this nodes journal */
-	if (got_lock)
+	if (got_lock) {
+		down(&OCFS_I(inode)->ip_io_sem);
+
 		status = ocfs_release_lock(osb, lock_id, 
 					   OCFS_DLM_EXCLUSIVE_LOCK, 
 					   FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
 					   bh, inode);
+		up(&OCFS_I(inode)->ip_io_sem);
+	}
 	if (inode)
 		iput(inode);
 
@@ -1634,15 +1599,6 @@
 		    (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED))
 			finish = 1;
 
-		if (down_trylock(&osb->trans_lock) != 0) {
-			LOG_TRACE_ARGS("commit thread: trylock failed, miss=%d\n", misses);
-			if (++misses < OCFS_COMMIT_MISS_MAX && finish == 0)
-				continue;
-			LOG_TRACE_ARGS("commit thread: about to down\n");
-			down(&osb->trans_lock);
-			misses = 0;
-		}
-
 		status = ocfs_commit_cache(osb, 0);
 		if (status < 0)
 			LOG_ERROR_STATUS(status);
@@ -1651,8 +1607,6 @@
 			break;
 	}
 
-
-
 	/* Flush all scheduled tasks */
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 	flush_scheduled_work ();
@@ -1683,23 +1637,27 @@
 
 	LOG_ENTRY_ARGS("(data_flush = %u)\n", data_flush);
 
-	if (down_trylock(&osb->trans_lock) == 0)
-		BUG();
-
 	journal = osb->journal;
 
 	if (atomic_read(&journal->num_trans) == 0) {
-		up(&osb->trans_lock);
-
 		LOG_TRACE_STR("No transactions for me to flush!");
 		goto flush_data;
 	}
 
 	/* flush all pending commits and checkpoint the journal. */
+	down_write(&journal->trans_barrier);
+
+	/* check again, this time locked :) */
+	if (atomic_read(&journal->num_trans) == 0) {
+		up_write(&journal->trans_barrier);
+		goto flush_data;
+	}
+
 	journal_lock_updates(journal->k_journal);
 	status = journal_flush(journal->k_journal);
+
+	up_write(&journal->trans_barrier);
 	if (status < 0) {
-		up(&osb->trans_lock);
 		journal_unlock_updates(journal->k_journal);
 
 		LOG_ERROR_STATUS(status);
@@ -1709,8 +1667,6 @@
 	LOG_TRACE_ARGS("flushing %d transactions\n", 
 		       atomic_read(&journal->num_trans));
 
-	atomic_set(&journal->num_trans, 0);
-
 	/* now we can run an unlock against any pending handles and
 	 * release them. */
 	down(&journal->commit_sem);
@@ -1732,9 +1688,6 @@
 	up(&journal->commit_sem);
 
 	osb->needs_flush = 0;
-	/* shutdown code wants to hold the trans lock */
-	if (journal->state != OCFS_JOURNAL_IN_SHUTDOWN)
-		up(&osb->trans_lock);
 
 	down(&commit->c_lock);
 	list_for_each_safe(p, n, &commit->c_list) {
@@ -1744,6 +1697,8 @@
 			LOG_ERROR_STATUS((status = tmpstat));
 		list_del(&(handle->h_list));
 		kfree(handle);
+
+		atomic_dec(&journal->num_trans);
 	}
 	up(&commit->c_lock);
 

Modified: branches/format-changes/src/lockres.c
===================================================================
--- branches/format-changes/src/lockres.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/lockres.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -132,6 +132,10 @@
 	return status;
 }				/* ocfs_find_update_res */
 
+
+#define ocfs_container_of(ptr, type, member) ({                      \
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+         (type *)( (char *)__mptr - offsetof(type,member) );})
 /*
  * ocfs_acquire_lockres()
  *
@@ -144,6 +148,8 @@
 	unsigned long jif = 0;
 	int status = 0;
 	int cnt = 0;
+	struct inode *inode;
+	ocfs_inode_private *ip;
 
 	LOG_ENTRY_ARGS ("(0x%p, %u)\n", lockres, timeout);
 
@@ -160,6 +166,11 @@
 		if (lockres->in_use) {
 			if (lockres->thread_id != mypid) {
 				spin_unlock (&lockres->lock_mutex);
+				LOG_ERROR_ARGS ("lockpid=%d, newpid=%d,"
+						" timedout\n",
+						lockres->thread_id, mypid);
+				BUG();
+
 				if (jif && jif < jiffies) {
 					LOG_TRACE_ARGS ("lockpid=%d, newpid=%d,"
 						" timedout\n",
@@ -175,7 +186,7 @@
 				}
 				ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
 			}
-			else {
+			else  {
 				printk("lockres in_use=%d, pid=%d, mypid=%d\n", lockres->in_use, lockres->thread_id, mypid);
 				BUG();
 				lockres->in_use++;
@@ -186,6 +197,13 @@
 			lockres->in_use = 1;
 			lockres->thread_id = mypid;
 			spin_unlock (&lockres->lock_mutex);
+			ip = ocfs_container_of(lockres, ocfs_inode_private, i_lockres);
+			inode = ip->inode;
+			if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
+				LOG_ERROR_ARGS("locking lockres without io_sem! ino = %lu, offset = %llu\n", inode->i_ino, OCFS_I(inode)->feoff);
+
+				BUG();
+			}
 			break;
 		}
 	}

Modified: branches/format-changes/src/namei.c
===================================================================
--- branches/format-changes/src/namei.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/namei.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -88,8 +88,8 @@
 #endif
 {
 	int status;
-	ocfs2_dinode *fe;
-	struct buffer_head *fe_bh = NULL, *dirent_bh = NULL;
+	__u64 fe_off;
+	struct buffer_head *dirent_bh = NULL;
 	struct inode *inode = NULL;
 	struct super_block *sb = dir->i_sb;
 	struct dentry *ret;
@@ -108,21 +108,15 @@
 	LOG_TRACE_ARGS("about to call find_files_on_disk with inode=%p\n", 
 		       dir);
 
-	status = ocfs_find_files_on_disk (osb, dentry->d_name.name, dentry->d_name.len, 
-					  &fe_bh, dir, inode, 1, &dirent_bh, &dirent);
+	down(&OCFS_I(dir)->ip_io_sem);
+	status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+					 dentry->d_name.len, &fe_off,
+					 dir, 1, &dirent_bh, &dirent);
+	up(&OCFS_I(dir)->ip_io_sem);
 	if (status < 0)
 		goto bail_add;
 	
-	fe = OCFS_BH_GET_DATA_READ(fe_bh);
-	if (!IS_VALID_FILE_ENTRY(fe)) {
-		printk("ocfs2: invalid file entry!  parent=%llu, name='%*s'\n",
-		       GET_INODE_FEOFF(dir), dentry->d_name.len, 
-		       dentry->d_name.name);
-		BUG();
-	}
-	OCFS_BH_PUT_DATA(fe_bh);
-
-	inode = ocfs_iget(osb, 0, fe_bh);
+	inode = ocfs_iget(osb, fe_off);
 	if (!inode) {
 		LOG_ERROR_STR("Could not create inode!");
 		ret = ERR_PTR (-EACCES);
@@ -135,8 +129,6 @@
 	ret = NULL;
 
 bail:
-	if (fe_bh)
-		brelse(fe_bh);
 	if (dirent_bh)
 		brelse(dirent_bh);
 	
@@ -159,6 +151,8 @@
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
 			dev, dentry->d_name.len, dentry->d_name.name);
 
+	down(&OCFS_I(dir)->ip_io_sem);
+
 	/* get our super block */
 	osb = OCFS_SB(dir->i_sb);
 	if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
@@ -204,8 +198,9 @@
 	}
 
 	/* Ok, we got the lock -- we'd better add it to our transaction */
-	ocfs_journal_add_lock(handle, OCFS_DLM_ENABLE_CACHE_LOCK, 
-			      FLAG_FILE_CREATE | FLAG_DIR, parent_fe_bh, dir);
+	ocfs_handle_add_lock(handle, OCFS_DLM_ENABLE_CACHE_LOCK, 
+			     FLAG_FILE_CREATE | FLAG_DIR, parent_fe_bh, dir, 
+			     0);
 
 	/* do the real work now. */
 	status = ocfs_mknod_locked(osb, dir, dentry, mode, dev,
@@ -229,9 +224,9 @@
 
 	ocfs_init_lockres (osb, inode);
 
+	ocfs_handle_add_inode(handle, inode);
 	status = ocfs_update_lockres (osb, GET_INODE_FEOFF(inode), 
 				      &new_fe_bh, NULL, 0, inode, 0, 0);
-
 	if (S_ISDIR (mode)) {
 		struct buffer_head *newdirbh = NULL;
 		int retval = 0;
@@ -295,6 +290,8 @@
 	if ((status < 0) && handle)
 		ocfs_abort_trans(handle);
 
+	up(&OCFS_I(dir)->ip_io_sem);
+
 	if (status == -ENOSPC)
 		LOG_TRACE_STR ("Disk is full");
 	else if (status < 0 && status != -EINTR)
@@ -364,7 +361,7 @@
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
-		
+
 	status = ocfs_read_bh(osb, disk_off, new_fe_bh,
                               OCFS_BH_CACHED, inode);
 
@@ -511,7 +508,7 @@
 	struct inode *inode = dentry->d_inode;
 	int retval = -EBUSY;
 	ocfs_super *osb = OCFS_SB(dir->i_sb);
-	__u64 fileOff = GET_INODE_FEOFF(inode);
+	__u64 fe_off = GET_INODE_FEOFF(inode);
 	struct inode *parentInode = dentry->d_parent->d_inode;
 	ocfs2_dinode *fe = NULL;
 	__u32 lockFlags = (S_ISDIR (inode->i_mode) ? (FLAG_FILE_DELETE | FLAG_DIR) : FLAG_FILE_DELETE);
@@ -527,7 +524,7 @@
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
 			dentry->d_name.len, dentry->d_name.name);
 
-	LOG_TRACE_ARGS("ino = %llu\n", fileOff);
+	LOG_TRACE_ARGS("ino = %llu\n", fe_off);
 
 	status = -EBUSY;
 
@@ -535,7 +532,7 @@
 		LOG_TRACE_STR ("dentry is not empty, cannot delete");
 		goto bail;
 	} else if (OCFS_I(inode)->open_hndl_cnt > 0) {
-		LOG_TRACE_ARGS ("Cannot remove an open file (open_hndl_cnt = %u, fileOff = %llu, d_count=%u)\n", OCFS_I(inode)->open_hndl_cnt, fileOff, atomic_read(&dentry->d_count));
+		LOG_TRACE_ARGS ("Cannot remove an open file (open_hndl_cnt = %u, fe_off = %llu, d_count=%u)\n", OCFS_I(inode)->open_hndl_cnt, fe_off, atomic_read(&dentry->d_count));
 		goto bail;
 	} else if (inode == osb->root_inode) {
 		LOG_TRACE_STR ("Cannot delete the root directory");
@@ -547,16 +544,19 @@
 	spin_lock(&oin_num_ext_lock);
 	if (OCFS_I(inode)->num_extends) {
 		LOG_ERROR_ARGS ("Cannot remove a file with = "
-				"%u, pending extends (fileOff "
+				"%u, pending extends (fe_off "
 				"= %llu)\n", 
 				OCFS_I(inode)->num_extends,
-				fileOff);
+				fe_off);
 		spin_unlock(&oin_num_ext_lock);
 		status = -EBUSY;
 		goto bail;
 	}
 	spin_unlock(&oin_num_ext_lock);
 
+	down(&OCFS_I(dir)->ip_io_sem);
+	down(&OCFS_I(inode)->ip_io_sem);
+
 	handle = ocfs_start_trans(osb, OCFS_FILE_DELETE_CREDITS);
 	if (handle == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
@@ -575,16 +575,26 @@
 	got_parent = 1;
 
 	/* this will re-read the directory now with the EXCLUSIVE */
-	/* lock already held; it will also return the fe_bh to us */
-	status = ocfs_find_files_on_disk (osb, dentry->d_name.name, dentry->d_name.len, 
-					  &fe_bh, parentInode, 
-					  inode, 0, &dirent_bh, &dirent);
+	/* lock already held; it will also return the fe_off to us */
+	status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+					 dentry->d_name.len, &fe_off,
+					 parentInode, 0, &dirent_bh,
+					 &dirent);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
 
-	status = ocfs_acquire_lock (osb, fileOff, OCFS_DLM_EXCLUSIVE_LOCK,
+	if (fe_off != GET_INODE_FEOFF(inode))
+		BUG();
+
+	status = ocfs_read_bh(osb, fe_off, &fe_bh, OCFS_BH_CACHED, inode);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto leave;
+	}
+
+	status = ocfs_acquire_lock (osb, fe_off, OCFS_DLM_EXCLUSIVE_LOCK,
 			lockFlags, &fe_bh, inode);
 	if (status < 0) {
 		if (status != -EINTR)
@@ -698,7 +708,7 @@
 	/* need this to alert dentry-owners on other nodes */
 	/* Release the file lock if we acquired it */
 	if (got_file) {
-		tmpstat = ocfs_release_lock(osb, fileOff, 
+		tmpstat = ocfs_release_lock(osb, fe_off, 
 					    OCFS_DLM_EXCLUSIVE_LOCK, 
 					    lockFlags, fe_bh, inode);
 		if (tmpstat < 0)
@@ -728,6 +738,9 @@
 		if (drop_inode) 
 			SET_INODE_DELETED(inode);
 	}
+
+	up(&OCFS_I(inode)->ip_io_sem);
+	up(&OCFS_I(dir)->ip_io_sem);
 bail:
 	if (status < 0 && status != -ENOTEMPTY && 
 	    status != -EPERM && status != -EBUSY && status != -EINTR) {
@@ -838,18 +851,38 @@
 		}
 	} else if (handle) {
 		if (id2_locked)
-			ocfs_journal_add_lock(handle, type2, flags2, 
-					      *bh2, inode2);
-		ocfs_journal_add_lock(handle, type1, flags1, *bh1, 
-				      inode1);
+			ocfs_handle_add_lock(handle, type2, flags2, 
+					     *bh2, inode2, 0);
+		ocfs_handle_add_lock(handle, type1, flags1, *bh1, 
+				     inode1, 0);
 	}
 
 	LOG_EXIT_STATUS(status);
 	return(status);
 } /* ocfs_double_lock */
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static inline void double_down(struct semaphore *s1, struct semaphore *s2)
+{
+	if (s1 != s2) {
+		if ((unsigned long) s1 < (unsigned long) s2) {
+			struct semaphore *tmp = s2;
+			s2 = s1; s1 = tmp;
+		}
+		down(s1);
+	}
+	down(s2);
+}
 
+static inline void double_up(struct semaphore *s1, struct semaphore *s2)
+{
+	up(s1);
+	if (s1 != s2)
+		up(s2);
+}
 
+#endif
+
 #define PARENT_INO(buffer) \
 	((struct ocfs2_dir_entry *) ((char *) buffer + \
 	le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->rec_len)))->inode
@@ -901,10 +934,13 @@
 	/* new parent dir offset */
 	newDirOff = GET_INODE_FEOFF(new_dir);
 	
+	double_down(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
+	down(&OCFS_I(old_inode)->ip_io_sem);
 
 	if (new_inode) {
 		if (ocfs_inc_icount(new_inode) < 0)
 			BUG();
+		down(&OCFS_I(new_inode)->ip_io_sem);
 	}
 
 	if (atomic_read (&old_dentry->d_count) > 2) {
@@ -1033,8 +1069,10 @@
 
 	/* check if the target already exists (in which case we need
 	 * to delete it */
-	status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name, new_dentry->d_name.len, 
-					 &newfe_bh, new_dir, new_inode, 0, &new_de_bh, &new_de);
+	status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name,
+					 new_dentry->d_name.len, 
+					 &newfe_lockid, new_dir, 0,
+					 &new_de_bh, &new_de);
 	/* The only error we allow here is -ENOENT because the new
 	 * file not existing is perfectly valid. */
 	if ((status < 0) && (status != -ENOENT)) {
@@ -1047,14 +1085,26 @@
 	/* In case we need to overwrite an existing file, we blow it
 	 * away first */
 	if (new_de) {
+		if (newfe_lockid != GET_INODE_FEOFF(new_inode))
+			BUG();
+
+		status = ocfs_read_bh(osb, newfe_lockid, &newfe_bh, 
+				      OCFS_BH_CACHED, new_inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS(status);
+			goto finally;
+		}
+
 		/* TODO: change this block to the ext3-style orphan model */
 		newfe = OCFS_BH_GET_DATA_READ(newfe_bh);
 		if (S_ISDIR(newfe->i_mode))
 			newfe_flags = FLAG_DIR;
 		newfe_flags |= FLAG_FILE_DELETE;
-		newfe_lockid =
-	     		newfe->i_blkno << osb->sb->s_blocksize_bits;
 
+		if (newfe_lockid !=
+		    (newfe->i_blkno << osb->sb->s_blocksize_bits))
+			BUG();
+
 		OCFS_BH_PUT_DATA(newfe_bh);
 		newfe = NULL;
 
@@ -1241,8 +1291,13 @@
 				  newfe_flags, NULL, new_inode);
 	}
 
-	if (new_inode)
+	double_up(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
+	up(&OCFS_I(old_inode)->ip_io_sem);
+
+	if (new_inode) {
+		up(&OCFS_I(new_inode)->ip_io_sem);
 		iput(new_inode);
+	}
 
 	if (tmpfe)
 		ocfs_release_file_entry (tmpfe);
@@ -1304,17 +1359,23 @@
 	sb = dir->i_sb;
 	osb = OCFS_SB(sb);
 
+	down(&OCFS_I(dir)->ip_io_sem);
+
 	inode = new_inode (sb);
 	if (IS_ERR (inode)) {
 		status = PTR_ERR(inode);
+		inode = NULL;
 		LOG_ERROR_STR("new_inode failed!");
 		goto bail;
 	}
 
 	if (ocfs_inode_init_private(inode)) {
 		LOG_ERROR_STATUS(status = -ENOMEM);
+		iput(inode);
+		inode = NULL;
 		goto bail;
 	}
+	down(&OCFS_I(inode)->ip_io_sem);
 
 	l = strlen (symname) + 1;
 	newsize = l - 1;
@@ -1408,6 +1469,10 @@
 	}
 
 bail:
+	if (inode)
+		up(&OCFS_I(inode)->ip_io_sem);
+	up(&OCFS_I(dir)->ip_io_sem);
+
 	if (new_fe_bh) {
 		if (fe)
 			OCFS_BH_PUT_DATA(new_fe_bh);

Modified: branches/format-changes/src/nm.c
===================================================================
--- branches/format-changes/src/nm.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/nm.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -87,7 +87,7 @@
 	"REMASTER_THIS",        // remaster lock to me
 	"REMASTER_REQUESTOR",   // remaster lock to requestor
 	"DROP_READONLY",        // RO cachelock needs to convert to RW
-	"READONLY"
+	"READONLY",
 };
 #endif
 
@@ -525,6 +525,11 @@
 			my_node_wins = (node_num < osb->node_num);
 	}
 
+//	if (flags & FLAG_DROP_LINK) {
+//		vote_type = RELEASE_DENTRY;
+//		goto done;
+//	}
+
 	if (flags & FLAG_DROP_READONLY) {
 		vote_type = DROP_READONLY;
 		goto done;
@@ -635,7 +640,7 @@
 	int inc_inode_seq = 0;
 	int disk_vote = (ctxt->request_method == DISK_VOTE);
 	int comm_vote = (ctxt->request_method == COMM_VOTE);
-	int have_i_sem = 0;
+	int have_io_sem = 0;
 	ocfs_publish *publish = (disk_vote ? ctxt->u.publish : NULL);
 	ocfs_dlm_msg *dlm_msg = (comm_vote ? ctxt->u.dlm_msg : NULL);
 	__u32 node_num = ctxt->node_num;
@@ -687,7 +692,7 @@
 	if ((flags & (FLAG_FILE_DELETE | FLAG_FILE_RENAME)) && (flags & FLAG_RELEASE_LOCK))
 		inode = NULL;
 	else {
-		inode = ocfs_iget(osb, lock_id, NULL);
+		inode = ocfs_iget(osb, lock_id);
 		if (!inode) {
 			status = -EFAIL;
 			LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
@@ -697,8 +702,8 @@
 			goto leave;
 		}
 
-		down(&inode->i_sem);
-		have_i_sem = 1;
+		down(&OCFS_I(inode)->ip_io_sem);
+		have_io_sem = 1;
 
 		lockres = GET_INODE_LOCKRES(inode);
 		status = ocfs_update_lockres (osb, lock_id, NULL, NULL,
@@ -772,7 +777,21 @@
 			}
 			vote_response = FLAG_VOTE_OIN_UPDATED;
 			break;
-		
+
+#if 0
+		case RELEASE_DENTRY:
+			if (!inode)
+				BUG();
+
+			/* we always vote yes on this one. */
+			vote_response = FLAG_VOTE_NODE;
+			printk("going to prune dentries for inode %lu\n",
+			       inode->i_ino);
+
+			d_prune_aliases (inode);
+			inode->i_nlink--;
+			break;
+#endif
 		case DELETE_RENAME_RELEASE:
 			/* ACK and done */
 			vote_response = FLAG_VOTE_NODE;
@@ -842,15 +861,15 @@
 #else
 					fsync_inode_buffers (inode);
 #endif
-					up(&inode->i_sem);
-					have_i_sem = 0;
+					up(&OCFS_I(inode)->ip_io_sem);
+					have_io_sem = 0;
 				}
 				break;
 			}
 
 			if (inode) {
-				up(&inode->i_sem);
-				have_i_sem = 0;
+				up(&OCFS_I(inode)->ip_io_sem);
+				have_io_sem = 0;
 			}
 
 			/* Set the always update master on open flag */
@@ -953,7 +972,7 @@
 				ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
 				if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
 					OCFS_ASSERT(lockres->readonly_node == osb->node_num);
-#warning need to make sure inode is not NULL in process_vote
+					OCFS_ASSERT(inode);
 					status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
 					if (status < 0)
 						LOG_ERROR_STATUS(status);
@@ -1079,9 +1098,9 @@
 			 * the actual IO that a readdir may have in 
 			 * progress, if it's possible to have a corrupt 
 			 * readdir.  for now, skip it.
-			 * NOTE: can't just take i_sem because lock order
-			 * needs to be i_sem->lockres... would have to 
-			 * drop lockres, take i_sem, take lockres, then 
+			 * NOTE: can't just take io_sem because lock order
+			 * needs to be io_sem->lockres... would have to 
+			 * drop lockres, take io_sem, take lockres, then 
 			 * recheck all the conditions to see if still 
 			 * appropriate, then do the work and drop both.
 			 * seems like a lot of work.  almost as many lines
@@ -1237,8 +1256,8 @@
 	if (inode) {
 		if (inc_inode_seq)
 			ocfs_inc_inode_seq(osb, inode, 1);
-		if (have_i_sem)
-			up(&inode->i_sem);
+		if (have_io_sem)
+			up(&OCFS_I(inode)->ip_io_sem);
 		iput(inode);
 	}
 
@@ -1365,6 +1384,7 @@
 
 	if (yield) {
 		/* this will wait until process_vote gets to the release */
+		down(&OCFS_I(inode)->ip_io_sem);
 		ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
 	}
 
@@ -1401,6 +1421,9 @@
 			if (yield) {
 				/* from nm thread, give some time to waiters */
 				ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+				up(&OCFS_I(inode)->ip_io_sem);
+
+				down(&OCFS_I(inode)->ip_io_sem);
 				ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
 			}
 			continue;
@@ -1417,8 +1440,10 @@
 	lockres->lock_state &= ~FLAG_READONLY_DROPPING;
 
 leave:
-	if (yield)
+	if (yield) {
 		ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+		up(&OCFS_I(inode)->ip_io_sem);
+	}
 
 	if (inode)
 		iput(inode);

Modified: branches/format-changes/src/ocfs.h
===================================================================
--- branches/format-changes/src/ocfs.h	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/ocfs.h	2004-06-09 07:11:07 UTC (rev 1031)
@@ -172,7 +172,7 @@
 #define  FLAG_FILE_CREATE_DIR     0x00000040
 #define  FLAG_FILE_UPDATE_OIN     0x00000080
 #define  FLAG_FILE_RELEASE_MASTER 0x00000100
-#define  FLAG_FILE_UNUSED2        0x00000200
+#define  FLAG_DROP_LINK           0x00000200
 #define  FLAG_CHANGE_MASTER       0x00000400
 #define  FLAG_ADD_OIN_MAP         0x00000800
 #define  FLAG_DIR                 0x00001000
@@ -802,18 +802,13 @@
 	__u64 last_upd_seq_num;
 };
 
-/* OCFS2 Inode Private Data
- *
- * feoff/voteoff can change during rename. Luckily, rename takes a ton
- * of locks and does several checks, so you're safe reading these values
- * if any of the following is true:
- *  1) you have i_sem
- *  2) you have priv_sem
- *  3) open_hndl_cnt > 0 
- */
+struct _ocfs_journal_handle;
+
+/* OCFS2 Inode Private Data */
 typedef struct _ocfs_inode_private
 {
-	/* always valid, just a simple back pointer. */
+	/* inode and feoff fields never change and are always safe to
+	 * read. */
 	struct inode     *inode;
 
 	__u64             feoff;
@@ -821,12 +816,24 @@
 	/* These fields are protected by priv_sem */
 	struct semaphore  priv_sem;
 	__u32             open_hndl_cnt;
-	int              needs_verification;
+	int               needs_verification;
 	__u64             chng_seq_num;
 	ocfs_extent_map   map;
 	__s64             alloc_size;
 	__u32             oin_flags;
 
+	/* This protects io on the metadata buffers related to this
+	 * inode. We also consider an "abort_trans" an I/O as it will
+	 * revert the buffer back to a previous state. */
+	struct semaphore  ip_io_sem;
+
+	/* Used by the journalling code to attach an inode to a
+	 * handle.  These are protected by ip_io_sem in order to lock
+	 * out other I/O to the inode until we either commit or
+	 * abort. */
+	struct list_head            ip_handle_list;
+	struct _ocfs_journal_handle *ip_handle;
+
 	/* inode_extend_sem locks out extends on behalf of other nodes. */
 	struct semaphore  inode_extend_sem;
 
@@ -848,8 +855,6 @@
 
 	ocfs_lock_res     i_lockres;
 	__u32 		  i_dir_start_lookup;
-
-	struct list_head  handle_list;
 } ocfs_inode_private;
 
 /* Eventually, the 'flags' and 'oin_flags' fields need to be
@@ -894,7 +899,6 @@
 #define GET_INODE_FEOFF(i) OCFS_I(i)->feoff
 
 
-#warning take this out when all the lockres stuff checks out
 #define GET_INODE_LOCKRES(i) ({ if (i==NULL) BUG(); (&(OCFS_I(i)->i_lockres)); })
 
 typedef enum _ocfs_vol_state
@@ -1088,7 +1092,6 @@
 	__u32 cfg_numblocks;
 	struct semaphore publish_lock;  /* protects r/w to publish sector */
 	atomic_t node_req_vote;         /* set when node's vote req pending */
-	struct semaphore trans_lock;	/* serializes transactions */
 	int publish_dirty;
 	struct list_head needs_flush_head;
 	wait_queue_head_t flush_event;
@@ -1542,17 +1545,6 @@
 	[S_IFLNK >> S_SHIFT]    OCFS_FT_SYMLINK,
 };
 
-
-typedef struct _ocfs_find_inode_args
-{
-	__u64 feoff;
-	struct buffer_head *fe_bh;
-	unsigned long ino;
-	__u32 flags;
-} ocfs_find_inode_args;
-
-#define OCFS_FIND_INODE_FLAG_SYSFILE              0x00000002
-
 /* timeout structure taken from Ben's aio.c */
 typedef struct _ocfs_timeout {
 	struct timer_list	timer;
@@ -1829,22 +1821,6 @@
 	return (u32)(blocks >> b_to_c_bits);
 }
 
-
-/*  
- *  Trans Lock:
- *  Right now OCFS2 only supports a single transaction at a
- *  time. Transactions are locked out by using trans_lock. 
- */
-static inline void ocfs_take_trans_lock(ocfs_super *osb)
-{
-	down(&osb->trans_lock);
-}
-
-static inline void ocfs_release_trans_lock(ocfs_super *osb)
-{
-	up(&osb->trans_lock);
-}
-
 typedef struct _ocfs_journal_handle ocfs_journal_handle;
 
 #endif /* !OCFS_H */

Modified: branches/format-changes/src/ocfs_buffer_head.h
===================================================================
--- branches/format-changes/src/ocfs_buffer_head.h	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/ocfs_buffer_head.h	2004-06-09 07:11:07 UTC (rev 1031)
@@ -120,6 +120,8 @@
 
 	while (1) {
 		if (ocfs_bh_sem_lock(bh) == OCFS_BH_SEM_WAIT_ON_MODIFY) {
+			BUG();
+
 			ocfs_bh_sem_unlock(bh);
 			wait_on_buffer_modified(bh);
 		} else {
@@ -180,6 +182,7 @@
 			       "this process is not the lock "
 			       "holder!\n");
 #endif
+			BUG();
 			ocfs_bh_sem_unlock(bh);
 			wait_on_buffer_modified(bh);
 		} else {
@@ -210,6 +213,8 @@
 		       "this process is not the lock "
 		       "holder!\n");
 #endif
+		LOG_ERROR_STR("Trylock about to BUG()");
+		BUG();
 		ocfs_bh_sem_unlock(bh);
 		return NULL;
 	}

Modified: branches/format-changes/src/ocfs_journal.h
===================================================================
--- branches/format-changes/src/ocfs_journal.h	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/ocfs_journal.h	2004-06-09 07:11:07 UTC (rev 1031)
@@ -46,8 +46,6 @@
 struct _ocfs2_dinode;
 struct _ocfs_journal_handle;
 
-/* most of the ocfs_journal structure is protected by the
- * trans_lock. BEWARE. */
 typedef struct _ocfs_journal ocfs_journal;
 struct _ocfs_journal {
 	enum ocfs_journal_state   state;      /* Journals current state   */
@@ -71,12 +69,6 @@
 						 to access file entry	  */
 	atomic_t                  num_trans;  /* Number of transactions 
 					       * currently in the system. */
-	struct _ocfs_journal_handle *curr;    /* pointer to currently
-					       * running handle. In
-					       * the future when we do
-					       * multiple concurrent
-					       * transactions this may
-					       * become a list.*/
 	/* locking order: trans_lock -> commit_sem -> journal.curr.list_lock */
 	struct semaphore          commit_sem; /* protects *everything*
 					       * in the commited list
@@ -86,6 +78,7 @@
 	struct list_head          commited;   /* doubly linked list of all
 					       * commited handles awaiting
 					       * checkpointing.           */
+	struct rw_semaphore       trans_barrier;
 };
 
 typedef struct _ocfs_journal_lock ocfs_journal_lock;
@@ -94,6 +87,7 @@
 	__u32 flags;
 	struct buffer_head *bh;
 	struct inode *inode;
+	int req_io_sem;
 	struct list_head lock_list;
 };
 
@@ -120,7 +114,7 @@
 	int                 num_buffs;
 	struct buffer_head  **buffs;
 
-	/* The following three fields are for ocfs_journal_add_lock */
+	/* The following three fields are for ocfs_handle_add_lock */
 	spinlock_t          list_lock; /* Used to protect the 'locks'
 					* list. Only used if the
 					* handle is the same as
@@ -242,9 +236,9 @@
  *                          buffer. Will have to call ocfs_journal_dirty once
  *                          we've actually dirtied it. Type is one of . or .
  *  ocfs_journal_dirty    - Mark a journalled buffer as having dirty data.
- *  ocfs_journal_add_lock - Sometimes we need to delay lock release
+ *  ocfs_handle_add_lock  - Sometimes we need to delay lock release
  *                          until after a transaction has been completed. Use
- *                          ocfs_journal_add_lock to indicate that a lock needs
+ *                          ocfs_handle_add_lock to indicate that a lock needs
  *                          to be released at the end of that handle. Locks 
  *                          will be released in the order that they are added. 
  *  ocfs_handle_add_inode - Add a locked inode to a transaction.
@@ -293,10 +287,11 @@
  */
 int                  ocfs_journal_dirty(ocfs_journal_handle *handle, 
 					struct buffer_head *bh);
-void                 ocfs_journal_add_lock(ocfs_journal_handle *handle, 
-					   __u32 type, __u32 flags, 
-					   struct buffer_head *bh, 
-					   struct inode *inode);
+void                 ocfs_handle_add_lock(ocfs_journal_handle *handle, 
+					  __u32 type, __u32 flags, 
+					  struct buffer_head *bh, 
+					  struct inode *inode,
+					  int req_io_sem);
 /*
  * Some transactions require us to leave inodes in a locked state
  * until we either commit or abort because the buffer state can change

Modified: branches/format-changes/src/super.c
===================================================================
--- branches/format-changes/src/super.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/super.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -233,13 +233,14 @@
 
 	LOG_ENTRY();
 
-	new = ocfs_iget(osb, OCFS_SYS_ROOT_INODE_OFF(osb), NULL);
+	new = ocfs_iget(osb, OCFS_SYS_ROOT_INODE_OFF(osb));
 	if (!new) {
 		LOG_ERROR_STATUS(status = -EINVAL);
 		goto bail;
 	}
 	osb->sys_root_inode = new;
-	for (i=0; i<NUM_SYSTEM_INODES; i++) {
+
+	for (i = 0; i < NUM_SYSTEM_INODES; i++) {
 		new = ocfs_get_system_file_inode(osb, i, osb->node_num);
 		if (!new) {
 			ocfs_release_system_inodes(osb);
@@ -248,6 +249,7 @@
 		}
 		osb->system_inodes[i] = new;
 	}
+
 bail:
 	LOG_EXIT_STATUS(status);
 	return(status);
@@ -940,7 +942,7 @@
 	osb->vol_state = VOLUME_ENABLED;
 	up (&(osb->osb_res));
 
-	inode = ocfs_iget(osb, OCFS_ROOT_INODE_FE_OFF(osb), NULL);
+	inode = ocfs_iget(osb, OCFS_ROOT_INODE_FE_OFF(osb));
 	if (!inode) {
 		status = -EIO;
 		LOG_ERROR_STATUS (status);
@@ -1364,7 +1366,6 @@
 	init_MUTEX (&(osb->osb_res));
 	init_MUTEX (&(osb->recovery_lock));
 	init_MUTEX (&(osb->comm_lock));
-	init_MUTEX (&(osb->trans_lock));
 	init_MUTEX (&(osb->extend_sem));
 	init_MUTEX (&(osb->cfg_lock));
 	init_MUTEX (&(osb->vote_sem));

Modified: branches/format-changes/src/sysfile.c
===================================================================
--- branches/format-changes/src/sysfile.c	2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/sysfile.c	2004-06-09 07:11:07 UTC (rev 1031)
@@ -86,32 +86,30 @@
 {
 	char namebuf[40];
 	struct inode *inode = NULL;
-	struct buffer_head *fe_bh = NULL;
+	__u64 fe_off = 0;
 	struct buffer_head *dirent_bh = NULL;
 	struct ocfs2_dir_entry *de = NULL;
 	int status = 0;
 
 	if (file_type == GLOBAL_BITMAP_SYSTEM_INODE)
-		// "There Can Be Only One!"
+		/* "There Can Be Only One!" */
 		sprintf(namebuf, system_file_names[file_type]);
 	else
 		sprintf(namebuf, system_file_names[file_type], node);
 
-	status = ocfs_find_files_on_disk (osb, namebuf, strlen(namebuf),
-					  &fe_bh, osb->sys_root_inode, 
-					  NULL, 1, &dirent_bh, &de);
+	status = ocfs_find_files_on_disk(osb, namebuf, strlen(namebuf),
+					 &fe_off, osb->sys_root_inode, 
+					 1, &dirent_bh, &de);
 	if (status < 0) {
 		goto bail;
 	}
 
-	inode = ocfs_iget(osb, 0, fe_bh);
+	inode = ocfs_iget(osb, fe_off);
 	if (!inode) {
 		LOG_ERROR_STR("Could not create inode!");
 		goto bail;
 	}
 bail:
-	if (fe_bh)
-		brelse(fe_bh);
 	if (dirent_bh)
 		brelse(dirent_bh);
 	return inode;
@@ -326,6 +324,7 @@
 	int numbhs, i;
 	char *data;
 	struct buffer_head **bhs;
+	struct inode *ext_alloc_inode = NULL;
 
 	LOG_ENTRY_ARGS ("(FileId = %u, Size = %llu)\n", FileId, FileSize);
 
@@ -377,6 +376,14 @@
 		    osb->vol_layout.data_start_off;
 		actualLength = numClusterAlloc * osb->vol_layout.cluster_size;
 
+		ext_alloc_inode = igrab(osb->system_inodes[EXTENT_ALLOC_BITMAP_SYSTEM_INODE]);
+		if (!ext_alloc_inode) {
+			status = -EFAIL;
+			LOG_ERROR_STATUS(status);
+			goto leave;
+		}
+
+		ocfs_handle_add_inode(handle, ext_alloc_inode);
 		status = ocfs_allocate_extent(osb, fe_bh, handle,  
 					      actualDiskOffset >> osb->sb->s_blocksize_bits,
 					      actualLength >> osb->s_clustersize_bits,
@@ -446,6 +453,9 @@
 		OCFS_BH_PUT_DATA(fe_bh);
 	if (local_fe)
 		brelse(fe_bh);
+	if (ext_alloc_inode)
+		iput(ext_alloc_inode);
+
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_extend_system_file */