[Ocfs2-commits]
jlbec commits r1031 - in branches/format-changes: . src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Wed Jun 9 03:11:08 CDT 2004
Author: jlbec
Date: 2004-06-09 02:11:07 -0500 (Wed, 09 Jun 2004)
New Revision: 1031
Modified:
branches/format-changes/TODO
branches/format-changes/src/alloc.c
branches/format-changes/src/dir.c
branches/format-changes/src/dir.h
branches/format-changes/src/dlm.c
branches/format-changes/src/file.c
branches/format-changes/src/hash.c
branches/format-changes/src/inode.c
branches/format-changes/src/inode.h
branches/format-changes/src/journal.c
branches/format-changes/src/lockres.c
branches/format-changes/src/namei.c
branches/format-changes/src/nm.c
branches/format-changes/src/ocfs.h
branches/format-changes/src/ocfs_buffer_head.h
branches/format-changes/src/ocfs_journal.h
branches/format-changes/src/super.c
branches/format-changes/src/sysfile.c
Log:
o Merged 1002:1029 from trunk:
- [1024] Fix add-inode-to-handle-bugs
- [1024] find_files_on_disk does not read fe
- [1024] No fe_bh to ocfs_iget
- [1024] Remove trans_lock
- [1024] Add ip_io_sem
- [1024] BUGs in bh_sem to catch ip_io_sem problems
- [1024] BUG in acquire_lockres to catch ip_io_sem problems
- [1024] Fix journal_{stop,start} race
- [1024] Orphan inode flags
- [1024] Fix extent alloc locking
- [1024] Async local alloc moves
- [1025] Update TODO
- [1026] Remove ocfs_checkpoint_handle and ocfs_revoke_handle
- [1027] Remove ocfs_wait_for_disk_lock_release
- [1028] double_{down,up} fix
- [1029] Remove stray printk
Modified: branches/format-changes/TODO
===================================================================
--- branches/format-changes/TODO 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/TODO 2004-06-09 07:11:07 UTC (rev 1031)
@@ -20,16 +20,6 @@
DISK_LOCK_SEQNUM(fe) = changeSeqNum;
code as it is equally useless.
-* Make all our 64-bit divides into shifts, this way we won't need
- divdi3.c anymore.
-
-* Combine the ocfs_inode_private and ocfs_inode_num structs, keeping the
- inode hash, but just using a unified struct as the inode private data, and
- the hash element (there are a bunch of redundant fields)
-
-* move things to use i_sem to lock out inode changes instead of the bh sem
- hash we have now.
-
* get rid of as much of lockres as possible.
* make slabs for: ocfs_journal_handle, and ocfs_journal_copyout and maybe
@@ -47,9 +37,6 @@
* now that acquire_lock and release_lock take inodes, get rid of the lock_id
and other redundant arguments in the prototype.
-* Fix the bug where deleting a directory with no files in it's first
- dirnode, but lots in it's others succeeds when it should fail.
-
* Go through the code and remove every spot where we look inside a bh for a
value which is right there on an inode (like, whether it's a directory or
not, fe / vote offsets, lock flags, etc). This includes passing redundant
Modified: branches/format-changes/src/alloc.c
===================================================================
--- branches/format-changes/src/alloc.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/alloc.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -329,7 +329,7 @@
}
/* start the transaction here to preserve ordering with the
- * bitmap i_sems... */
+ * bitmap io_sems... */
handle = ocfs_start_trans(osb, credits);
if (!handle) {
status = -ENOMEM;
@@ -343,13 +343,13 @@
osb->vol_layout.root_int_off;
for (i = 0; i < osb->max_nodes; i++, lock_id += osb->sect_size) {
if (free_ext_node[i] != NULL) {
- extnode_inode[i] = ocfs_iget(osb, lock_id, NULL);
+ extnode_inode[i] = ocfs_iget(osb, lock_id);
if (!extnode_inode[i]) {
status = -EINVAL;
LOG_ERROR_STATUS (status);
goto abort;
}
- down(&extnode_inode[i]->i_sem);
+ down(&OCFS_I(extnode_inode[i])->ip_io_sem);
status = ocfs_acquire_lock (osb, lock_id,
OCFS_DLM_EXCLUSIVE_LOCK,
@@ -357,7 +357,7 @@
&ugly_hack_bh,
extnode_inode[i]);
if (status < 0) {
- up(&extnode_inode[i]->i_sem);
+ up(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
extnode_inode[i] = NULL;
if (status != -EINTR)
@@ -378,14 +378,14 @@
LOG_ERROR_STATUS (status);
goto abort;
}
- down(&vol_inode->i_sem);
+ down(&OCFS_I(vol_inode)->ip_io_sem);
status = ocfs_acquire_lock (osb, OCFS_BITMAP_LOCK_OFFSET(osb),
OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
&globalbh, vol_inode);
if (status < 0) {
- up(&vol_inode->i_sem);
+ up(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
vol_inode = NULL;
@@ -472,12 +472,12 @@
for (i = 0; i < osb->max_nodes; i++) {
if (extnode_inode[i]) {
- up(&extnode_inode[i]->i_sem);
+ up(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
}
}
if (vol_inode) {
- up(&vol_inode->i_sem);
+ up(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
}
@@ -2783,6 +2783,9 @@
OCFS_ASSERT (osb);
+ if ((bitmap_inode && !lock_bh) || (lock_bh && !bitmap_inode))
+ BUG();
+
if (!bitmap_inode) {
bitmap_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
if (!bitmap_inode) {
@@ -2809,8 +2812,8 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- ocfs_journal_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK,
- 0, bh, bitmap_inode);
+ ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK,
+ 0, bh, bitmap_inode, 1);
}
status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
@@ -3009,8 +3012,18 @@
/* Allocate a block of size blocksize from the relevant file/bitmap */
OCFS_ASSERT (blockSize);
+ if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
+ LOG_TRACE_ARGS("Uhoh, asking me to allocate on an unlocked system file! (type = %u, i_ino = %lu)\n", Type, inode->i_ino);
+ BUG();
+ }
+
/* Get a lock on the file */
lockId = (bm_file * osb->sect_size) + osb->vol_layout.root_int_off;
+ if (lockId != GET_INODE_FEOFF(inode)) {
+ LOG_TRACE_ARGS("lockId = %llu, offset = %llu\n", lockId,
+ GET_INODE_FEOFF(inode));
+ BUG();
+ }
status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE, &bh, inode);
if (status < 0) {
@@ -3018,9 +3031,9 @@
goto leave;
}
- ocfs_journal_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK,
+ ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
- bh, inode);
+ bh, inode, 1);
status = ocfs_ugly_hack(handle, bh);
if (status < 0) {
@@ -3590,10 +3603,6 @@
ocfs_shutdown_local_alloc(osb, NULL, 0,
0);
- /* we want to make sure an empty alloc
- * hits disk. */
- ocfs_handle_set_sync(handle, 1);
-
/* the bh might not have been dirtied to
* the journal yet. */
tmpstat = ocfs_journal_dirty(handle,
@@ -3644,8 +3653,8 @@
goto bail;
}
- ocfs_journal_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 0,
- main_bm_bh, main_bm_inode);
+ ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 0,
+ main_bm_bh, main_bm_inode, 1);
status = ocfs_sync_local_to_main(osb, &(handle->commit_bits),
NULL, 0);
@@ -3752,6 +3761,10 @@
use_global = 0;
if (!use_global) {
+ if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC) {
+ printk("whoa, I already have local alloc sem!?!\n");
+ BUG();
+ }
down(&osb->local_alloc_sem);
handle->flags |= OCFS_HANDLE_LOCAL_ALLOC;
status = ocfs_find_space_from_local(osb, bitswanted,
Modified: branches/format-changes/src/dir.c
===================================================================
--- branches/format-changes/src/dir.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/dir.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -182,10 +182,14 @@
/* ocfs_find_files_on_disk()
- * NOTE: this should always be called with inode->i_sem taken!
+ * NOTE: this should always be called with parent dir ip_io_sem taken!
*/
/* parent off changed to file entry offset of parent! */
-int ocfs_find_files_on_disk (ocfs_super * osb, const char *name, int namelen, struct buffer_head ** fe_bh, struct inode *inode, struct inode *file_inode, int take_lock, struct buffer_head **dirent_bh, struct ocfs2_dir_entry **dirent)
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+ int namelen, __u64 *fe_off,
+ struct inode *inode, int take_lock,
+ struct buffer_head **dirent_bh,
+ struct ocfs2_dir_entry **dirent)
{
int status = -ENOENT;
int tmpstat;
@@ -194,8 +198,8 @@
__u32 lock_type = OCFS_DLM_ENABLE_CACHE_LOCK;
__u64 parent_off = GET_INODE_FEOFF(inode);
- LOG_ENTRY_ARGS ("(osb=%p, parent=%llu, name='%*s', fe_bh=%p, inode=%p)\n",
- osb, parent_off, namelen, name, fe_bh, inode);
+ LOG_ENTRY_ARGS ("(osb=%p, parent=%llu, name='%*s', fe_off=%p, inode=%p)\n",
+ osb, parent_off, namelen, name, fe_off, inode);
if (take_lock) {
/* Get a lock on the directory... */
@@ -215,12 +219,9 @@
if (!*dirent_bh || !*dirent)
goto leave;
- status = ocfs_read_bh(osb, (*dirent)->inode, fe_bh, OCFS_BH_CACHED, file_inode);
- if (status < 0) {
- brelse(*dirent_bh);
- LOG_ERROR_STATUS(status);
- status = -ENOENT;
- }
+ *fe_off = (*dirent)->inode;
+
+ status = 0;
leave:
if (take_lock && lock_acq)
Modified: branches/format-changes/src/dir.h
===================================================================
--- branches/format-changes/src/dir.h 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/dir.h 2004-06-09 07:11:07 UTC (rev 1031)
@@ -28,10 +28,9 @@
#define OCFS2_DIR_H
int empty_dir(struct inode *inode); /* FIXME: to namei.c */
-int ocfs_find_files_on_disk(ocfs_super *osb, const char *name, int namelen,
- struct buffer_head **fe_bh,
- struct inode *inode,
- struct inode *file_inode, int take_lock,
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+ int namelen, __u64 *fe_off,
+ struct inode *inode, int take_lock,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent);
int ocfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
Modified: branches/format-changes/src/dlm.c
===================================================================
--- branches/format-changes/src/dlm.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/dlm.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -215,85 +215,6 @@
} /* ocfs_disk_request_vote */
/*
- * ocfs_wait_for_disk_lock_release()
- *
- * @osb: ocfs super block for the volume
- * @offset:
- * @time_to_wait:
- * @lock_type: lowest level to which a lock must deprecate for us to break out.
- *
- * Returns 0 of success, < 0 if error.
- */
-static int ocfs_wait_for_disk_lock_release (ocfs_super * osb, __u64 offset, __u32 time_to_wait, __u32 lock_type)
-{
- int status = -ETIMEDOUT;
- int tmpstat = -ETIMEDOUT;
- __u32 timewaited = 0;
- ocfs2_dinode *fe = NULL;
- struct buffer_head *bh = NULL;
- __u32 curr_master;
- __u8 lock_level;
-
- LOG_ENTRY ();
-
- /* Create a sepearate thread which should set the event of the */
- /* resource after N retries. */
-
- while (time_to_wait > timewaited) {
- bh = NULL;
- fe = NULL;
-
- tmpstat = ocfs_read_bh (osb, offset, &bh, 0, NULL);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (status = tmpstat);
- goto finally;
- }
- fe = OCFS_BH_GET_DATA_READ(bh);
- curr_master = DISK_LOCK(fe)->curr_master;
- lock_level = DISK_LOCK(fe)->file_lock;
- OCFS_BH_PUT_DATA(bh);
-
- /* This will always be zero when the first Node comes up after reboot */
- /* (for volume lock) */
- if ((curr_master == OCFS_INVALID_NODE_NUM) ||
- (curr_master == osb->node_num)) {
- goto got_it;
- }
-
- if (!ocfs_node_is_alive(&osb->publ_map, curr_master)) {
- /* Reset the lock as not owned and return success?? */
- /* This needs to be under some sort of cluster wide lock */
- fe = OCFS_BH_GET_DATA_WRITE(bh);
- DISK_LOCK(fe)->curr_master = OCFS_INVALID_NODE_NUM;
- DISK_LOCK(fe)->file_lock = OCFS_DLM_NO_LOCK;
- OCFS_BH_PUT_DATA(bh);
- tmpstat = ocfs_write_bh (osb, bh, 0, NULL);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (status = tmpstat);
- }
- goto got_it;
- }
-
- /* If we are here in the code it means the local node is not the master */
- if (lock_level <= lock_type)
- goto got_it;
-
- brelse(bh);
- ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
- timewaited += WAIT_FOR_VOTE_INCREMENT;
- continue;
-got_it:
- brelse(bh);
- status = 0;
- break;
- }
-
-finally:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_wait_for_disk_lock_release */
-
-/*
* ocfs_wait_for_lock_release()
* inode is definitely non NULL
*/
@@ -733,8 +654,8 @@
status = ocfs_journal_dirty(handle, *bh);
lockres->lock_holders++;
#warning I hope these lock flags are alright.
- ocfs_journal_add_lock(handle, lockres->lock_type, 0,
- *bh, inode);
+ ocfs_handle_add_lock(handle, lockres->lock_type, 0,
+ *bh, inode, 0);
} else
status = ocfs_write_bh (osb, *bh, 0, inode);
if (status < 0)
Modified: branches/format-changes/src/file.c
===================================================================
--- branches/format-changes/src/file.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/file.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -92,7 +92,7 @@
/*
* ocfs_inode_notify_open()
*
- * you should be holding i_sem and priv_sem in this function.
+ * you should be holding io_sem and priv_sem in this function.
* If needed add ourselves to the open map. Only call this
* on 1st open of a file. Marks the oin as "in use"
*/
@@ -196,7 +196,7 @@
/* kch - for an open request we are already given the
* inode, and therefore we are given the oin too */
- down(&inode->i_sem);
+ down (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
have_oin_sem = 1;
@@ -205,14 +205,14 @@
status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto leave;
}
status = ocfs_inode_notify_open(osb, fe_bh, NULL, inode);
if (status < 0) {
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
if (status != -EINTR) {
LOG_ERROR_ARGS("Open request made for nonexistent "
@@ -226,7 +226,7 @@
status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
if (status < 0) {
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto leave;
}
@@ -236,13 +236,13 @@
status = ocfs_verify_update_inode (osb, inode, &truncate_pages,
0);
if (status < 0) {
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS (status);
goto leave;
}
}
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
/* yes, hold onto priv_sem. */
if (OCFS_I(inode)->open_hndl_cnt > 0) {
@@ -662,8 +662,8 @@
if (status < 0) {
ocfs_abort_trans(handle);
} else {
- ocfs_journal_add_lock(handle, locktype, lockFlags,
- bh, inode);
+ ocfs_handle_add_lock(handle, locktype, lockFlags,
+ bh, inode, 0);
have_disk_lock = 0;
ocfs_commit_trans(handle);
@@ -762,10 +762,12 @@
if (OCFS_I(inode)->needs_verification) {
LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
+ down (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
status = ocfs_verify_update_inode (osb, inode, &needs_trunc,
0);
up (&(OCFS_I(inode)->priv_sem));
+ up (&(OCFS_I(inode)->ip_io_sem));
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
if (status < 0) {
@@ -802,8 +804,9 @@
LOG_TRACE_ARGS
("Will need more allocation: have=%llu, need=%llu\n",
OCFS_I(inode)->alloc_size, newsize);
-
+ down(&OCFS_I(inode)->ip_io_sem);
status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode, NULL);
+ up(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
LOG_ERROR_STATUS (status);
@@ -890,12 +893,12 @@
if (OCFS_I(inode)->needs_verification) {
/* yay, locking hell! */
- down(&inode->i_sem);
+ down(&OCFS_I(inode)->ip_io_sem);
down (&(OCFS_I(inode)->priv_sem));
status = ocfs_verify_update_inode (osb, inode, &needs_trunc,
0);
up (&(OCFS_I(inode)->priv_sem));
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
if (status < 0) {
@@ -1128,7 +1131,6 @@
OCFS_BH_PUT_DATA(bh);
fe = NULL;
-
if (passed_handle == NULL) {
credits = ocfs_calc_extend_credits(((__u32) alloc_size),
osb->vol_layout.cluster_size);
@@ -1296,8 +1298,8 @@
} else {
lockFlags |= FLAG_FILE_UPDATE_OIN;
- ocfs_journal_add_lock(handle, locktype,
- lockFlags, bh, inode);
+ ocfs_handle_add_lock(handle, locktype,
+ lockFlags, bh, inode, 0);
have_disk_lock = 0;
ocfs_commit_trans(handle);
@@ -1348,10 +1350,7 @@
osb = OCFS_SB(inode->i_sb);
- /* NOTE: Other filesystems get away without locking this, but
- * we're clustered and this has to hit disk now... */
- if (!(attr->ia_valid & ATTR_SIZE))
- down(&inode->i_sem);
+ down(&OCFS_I(inode)->ip_io_sem);
if (!dentry->d_parent || !dentry->d_parent->d_inode) {
LOG_ERROR_STR ("bad inode or root inode");
@@ -1452,8 +1451,7 @@
inode_setattr (inode, attr);
bail:
- if (!(attr->ia_valid & ATTR_SIZE))
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
#ifndef BH_SEM_LEAK_CHECKING
if (error < 0)
Modified: branches/format-changes/src/hash.c
===================================================================
--- branches/format-changes/src/hash.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/hash.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -263,6 +263,7 @@
#ifdef VERBOSE_BH_SEM
LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
#endif
+ LOG_ERROR_ARGS("Uhoh, read lock wanted on modified buffer! (pid=%d, block=%lu)\n", sem->s_pid, bh->b_blocknr);
ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
} else {
#ifdef VERBOSE_BH_SEM
@@ -336,8 +337,9 @@
/* refcount as if it weren't modified */
ocfs_bh_sem_get(sem);
} else if (sem->s_pid != current->pid) {
- LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
+// LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
+ LOG_ERROR_ARGS("Uhoh, write lock wanted on modified buffer! (pid=%d, block=%lu)\n", sem->s_pid, bh->b_blocknr);
}
} else {
//LOG_TRACE_ARGS("buffer NOT modified\n");
Modified: branches/format-changes/src/inode.c
===================================================================
--- branches/format-changes/src/inode.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/inode.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -68,6 +68,16 @@
extern struct semaphore recovery_list_sem;
+typedef struct _ocfs_find_inode_args
+{
+ __u64 feoff;
+ unsigned long ino;
+ __u32 flags;
+}
+ocfs_find_inode_args;
+
+#define OCFS_FIND_INODE_FLAG_SYSFILE 0x00000002
+
static int ocfs_readpage (struct file *file, struct page *page);
static int ocfs_prepare_write (struct file *file, struct page *page, unsigned from, unsigned to);
static int ocfs_commit_write (struct file *file, struct page *page, unsigned from, unsigned to);
@@ -180,25 +190,16 @@
/*
* ocfs_iget()
- *
- * Not all fields are required, pick your poison:
- * * fe_bh only -- voteoff and feoff should both be zero then.
- * * voteoff and feoff -- fe_bh can be NULL.
- * If AND ONLY IF the inode has no file entry (as in the main bitmap),
- * are you allowed to have feoff = 0.
- *
- * If you give me both, I'll prefer fe_bh.
+ * feoff is *required*
*/
-struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff,
- struct buffer_head *fe_bh)
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff)
{
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
- ocfs2_dinode *fe;
ocfs_find_inode_args args;
__u32 flags = 0;
- LOG_ENTRY_ARGS("(feoff = %llu, fe_bh = %p)\n", feoff, fe_bh);
+ LOG_ENTRY_ARGS("(feoff = %llu)\n", feoff);
/* Shortcut: if they ask for the root dirnode, just return
* it. */
@@ -228,22 +229,6 @@
goto bail;
}
- /* Ok, lets try to be smart here. We need a very specific set
- * of arguments to get our inode. Figure these out from the
- * available data. */
- if (fe_bh) {
- /* best case -- we can figure out what we need from
- * the file entry! */
- fe = OCFS_BH_GET_DATA_READ(fe_bh);
- if (!IS_VALID_FILE_ENTRY(fe)) {
- OCFS_BH_PUT_DATA(fe_bh);
- LOG_ERROR_STATUS(-EINVAL);
- goto bail;
- }
- feoff = fe->i_blkno << osb->sb->s_blocksize_bits;
- OCFS_BH_PUT_DATA(fe_bh);
- }
-
/* Ok. By now we've either got the offsets passed to us by the
* caller, or we just pulled them off the bh. Lets do some
* sanity checks to make sure they're OK. */
@@ -257,7 +242,6 @@
flags |= OCFS_FIND_INODE_FLAG_SYSFILE;
args.feoff = feoff;
- args.fe_bh = fe_bh;
args.flags = flags;
args.ino = ino_from_off(sb, feoff);
@@ -319,7 +303,6 @@
{
ocfs_find_inode_args *args = NULL;
int ret = 0;
- ocfs2_dinode *fe = NULL;
LOG_ENTRY_ARGS ("(0x%p, %lu, 0x%p)\n", inode, ino, opaque);
@@ -337,8 +320,6 @@
ret = 1;
bail:
- if (fe)
- OCFS_BH_PUT_DATA(args->fe_bh);
LOG_EXIT_INT (ret);
return ret;
} /* ocfs_find_inode */
@@ -371,8 +352,11 @@
i->open_hndl_cnt = 0;
ocfs_extent_map_init (&i->map);
INIT_LIST_HEAD(&i->recovery_list);
- INIT_LIST_HEAD(&i->handle_list);
+ INIT_LIST_HEAD(&i->ip_handle_list);
+ i->ip_handle = NULL;
+ init_MUTEX(&i->ip_io_sem);
+
/* These should be set in read_inode2. */
i->alloc_size = 0ULL;
i->feoff = 0ULL;
@@ -546,19 +530,15 @@
feoff = args->feoff;
sysfile = (args->flags & OCFS_FIND_INODE_FLAG_SYSFILE);
- /* Uhoh, they didn't give us a buffer. Read the FE off
- * disk. This is safe because the kernel only does one
- * read_inode2 for a new inode, and if it doesn't exist yet
- * then nobody can be working on it! */
- if (!args->fe_bh) {
- status = ocfs_read_bh(osb, args->feoff, &bh, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- make_bad_inode (inode);
- goto bail;
- }
- } else
- bh = args->fe_bh;
+ /* Read the FE off disk. This is safe because the kernel only
+ * does one read_inode2 for a new inode, and if it doesn't
+ * exist yet then nobody can be working on it! */
+ status = ocfs_read_bh(osb, args->feoff, &bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ make_bad_inode (inode);
+ goto bail;
+ }
fe = OCFS_BH_GET_DATA_READ(bh);
@@ -580,7 +560,7 @@
if (fe)
OCFS_BH_PUT_DATA(bh);
- if (args && !args->fe_bh && bh)
+ if (args && bh)
brelse(bh);
LOG_EXIT ();
@@ -604,7 +584,6 @@
{
ocfs_find_inode_args *args = NULL;
int ret = 0;
- ocfs2_dinode *fe = NULL;
LOG_ENTRY_ARGS ("(0x%p, %lu, %llu, 0x%p)\n", inode, inode->i_ino, GET_INODE_FEOFF(inode), opaque);
@@ -624,8 +603,6 @@
ret = 1;
bail:
- if (fe)
- OCFS_BH_PUT_DATA(args->fe_bh);
LOG_EXIT_INT (ret);
return ret;
} /* ocfs_find_actor */
@@ -696,7 +673,7 @@
osb = OCFS_SB(inode->i_sb);
if (!inode->u.generic_ip) {
- LOG_ERROR_ARGS("inode %llu has no generic_ip!\n", GET_INODE_FEOFF(inode));
+ LOG_ERROR_ARGS("inode %lu has no generic_ip!\n", inode->i_ino);
goto bail;
}
@@ -1733,7 +1710,7 @@
osb = OCFS_SB(inode->i_sb);
- down (&inode->i_sem);
+ down (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
if (INODE_DELETED(inode)) {
@@ -1762,7 +1739,7 @@
bail:
up (&(OCFS_I(inode)->priv_sem));
- up (&inode->i_sem);
+ up (&(OCFS_I(inode)->ip_io_sem));
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
Modified: branches/format-changes/src/inode.h
===================================================================
--- branches/format-changes/src/inode.h 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/inode.h 2004-06-09 07:11:07 UTC (rev 1031)
@@ -33,8 +33,7 @@
struct inode * inode, int block,
int create, int *err, int reada);
void ocfs_clear_inode(struct inode *inode);
-struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff,
- struct buffer_head *fe_bh);
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
int ocfs_inode_init_private(struct inode *inode);
int ocfs_inode_revalidate(struct dentry *dentry);
void ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,
Modified: branches/format-changes/src/journal.c
===================================================================
--- branches/format-changes/src/journal.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/journal.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -56,23 +56,6 @@
TRANS_CACHE
} release_locks_action;
-/*
- * Trans Lock:
- * Right now OCFS2 only supports a single transaction at a
- * time. Transactions are locked out by using trans_lock.
- */
-#define ocfs_take_trans_lock(osb) \
- do { \
- down(&osb->trans_lock); \
- } while (0)
-
-#define ocfs_release_trans_lock(osb) \
- do { \
- up (&osb->trans_lock); \
- } while (0)
-
-static int ocfs_checkpoint_handle(ocfs_journal_handle *handle);
-static int ocfs_revoke_handle(ocfs_journal_handle *handle);
static int ocfs_reset_publish (ocfs_super * osb, __u64 node_num);
static int ocfs_journal_release_locks(ocfs_journal_handle *handle, release_locks_action action);
static int ocfs_force_read_journal(ocfs_super *osb, __u64 size,
@@ -81,6 +64,46 @@
static int __ocfs_recovery_thread(void *arg);
static int ocfs_commit_cache (ocfs_super * osb, int data_flush);
+/*
+ * JBD in 2.4 kernels has a bug in that it doesn't do any locking of
+ * the t_updates transaction variable. If we don't serialize calls to
+ * journal_start/journal_stop, then it can get way out of whack,
+ * resulting in either a crash or a lockup. As far as I can tell, they
+ * never hit this bug in ext3 because those calls somehow manage to
+ * get serialized. I wish I didn't have to use lock_kernel here, but
+ * we actually want the "drop on sleep" behavior which we can't get
+ * with any other lock.
+ *
+ * 2.6 does it the right way by spinlocking around it's structures.
+ *
+ * These two should be moved to compat.h when it exists.
+ */
+static inline handle_t *ocfs_journal_start(journal_t *journal, int nblocks)
+{
+ handle_t * h;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ lock_kernel();
+#endif
+ h = journal_start(journal, nblocks);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ unlock_kernel();
+#endif
+ return(h);
+}
+
+static inline int ocfs_journal_stop(handle_t *handle)
+{
+ int status;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ lock_kernel();
+#endif
+ status = journal_stop(handle);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ unlock_kernel();
+#endif
+ return(status);
+}
+
/* DO NOT EVER CALL THIS FUNCTION WITH A LOCKED BUFFER HEAD! */
ocfs_journal_handle * ocfs_start_trans(ocfs_super *osb, int max_buffs)
{
@@ -92,9 +115,11 @@
if (!osb || !osb->journal->k_journal)
BUG();
- /* for now, we only do one transaction at a time. Eventually
- * this and trans_in_progress need to be replaced. */
- ocfs_take_trans_lock(osb);
+ /* JBD might support this, but our journalling code doesn't yet. */
+ if (journal_current_handle()) {
+ LOG_ERROR_STR("Recursive transaction attempted!");
+ BUG();
+ }
retval = ocfs_malloc(sizeof(*retval));
if (!retval) {
@@ -130,9 +155,13 @@
retval->osb = osb;
retval->commit_bits = NULL;
+ down_read(&osb->journal->trans_barrier);
+
/* actually start the transaction now */
- retval->k_handle = journal_start(journal, max_buffs);
+ retval->k_handle = ocfs_journal_start(journal, max_buffs);
if (IS_ERR(retval->k_handle)) {
+ up_read(&osb->journal->trans_barrier);
+
LOG_ERROR_STR("journal_start() failed!");
LOG_ERROR_STATUS((int)PTR_ERR(retval->k_handle));
retval->k_handle = NULL;
@@ -141,10 +170,6 @@
atomic_inc(&(osb->journal->num_trans));
- down(&osb->journal->commit_sem);
- osb->journal->curr = retval;
- up(&osb->journal->commit_sem);
-
/* default handle flags! */
ocfs_handle_set_sync(retval, 1);
ocfs_handle_set_checkpoint(retval, 1);
@@ -153,7 +178,6 @@
return(retval);
done_free:
- ocfs_release_trans_lock(osb);
if (retval) {
if (retval->buffs)
@@ -164,93 +188,6 @@
return(NULL);
} /* ocfs_start_trans */
-#define OCFS_JOURNAL_CHECKPOINT_RETRIES 3
-static int ocfs_checkpoint_handle(ocfs_journal_handle *handle)
-{
- int retval = 0;
- ocfs_super *osb = NULL;
- int i;
-
- LOG_ENTRY();
-
- osb = handle->osb;
-
- if (!handle->num_buffs)
- goto done;
-
- /* Try up to 3 times to checkpoint the handle */
- for (i = 0; i < OCFS_JOURNAL_CHECKPOINT_RETRIES; i++) {
- retval = ocfs_write_bhs(osb, handle->buffs, handle->num_buffs,
- OCFS_BH_IGNORE_JBD, NULL);
- if (retval < 0)
- LOG_ERROR_STATUS(retval);
- else
- break;
- }
-
-done:
-
- LOG_EXIT_STATUS(retval);
- return(retval);
-}
-
-/* This function expects the journal handle to have already been
- * commited to disk (and it'd better have been checkpointed too!)
- *
- * Tells JBD to revoke these buffers from the on disk journal so if we
- * crash later, they won't be replayed.
- *
- * The call to journal_revoke does a brelse. It also winds up removing
- * the journal_head from the buffer, and therefore the JBD bit is no
- * longer set. We do a get_bh before calling journal_revoke so that
- * the count doesn't change.
- */
-static int ocfs_revoke_handle(ocfs_journal_handle *handle)
-{
- int retval = 0;
- struct buffer_head *bh;
- handle_t *new_handle = NULL;
- ocfs_journal *journal;
- int i;
-
- LOG_ENTRY();
-
- journal = handle->journal;
-
- if (!handle->num_buffs)
- goto done;
-
- new_handle = journal_start(journal->k_journal, handle->num_buffs);
- if (IS_ERR(new_handle)) {
- retval = PTR_ERR(new_handle);
- new_handle = NULL;
- LOG_ERROR_STATUS(retval);
- LOG_ERROR_STR("Could not start revoke transaction!");
- goto done;
- }
-
- new_handle->h_sync = 1;
-
- for(i = 0; i < handle->num_buffs; i++) {
- bh = handle->buffs[i];
-
- get_bh(bh); /* want to keep this around after the revoke */
-
- retval = journal_revoke(new_handle, bh->b_blocknr, bh);
- if (retval < 0) {
- LOG_ERROR_STR("Could not revoke buffer!");
- goto done;
- }
- }
-
-done:
- if (new_handle)
- journal_stop(new_handle);
-
- LOG_EXIT_STATUS(retval);
- return(retval);
-}
-
void ocfs_handle_add_inode(ocfs_journal_handle *handle, struct inode *inode)
{
if (!handle)
@@ -259,22 +196,31 @@
if (!inode)
BUG();
- if (!list_empty(&OCFS_I(inode)->handle_list)) {
+ if (OCFS_I(inode)->ip_handle == handle) {
+ /* sanity check */
+ if (list_empty(&OCFS_I(inode)->ip_handle_list))
+ BUG();
+
/* I think this can happen to the main bitmap inode if
* we extend a regular file and also have to extend a
* system file in the same transaction */
- LOG_ERROR_ARGS("Inode %lu already has a transaction!\n",
+ LOG_TRACE_ARGS("Inode %lu already added to transaction!\n",
inode->i_ino);
return;
}
atomic_inc(&inode->i_count);
- down(&inode->i_sem);
+ down(&OCFS_I(inode)->ip_io_sem);
- list_del(&(OCFS_I(inode)->handle_list));
- list_add_tail(&(OCFS_I(inode)->handle_list), &(handle->inode_list));
+ /* sanity check */
+ if (OCFS_I(inode)->ip_handle)
+ BUG();
+ OCFS_I(inode)->ip_handle = handle;
+ list_del(&(OCFS_I(inode)->ip_handle_list));
+ list_add_tail(&(OCFS_I(inode)->ip_handle_list), &(handle->inode_list));
+
return;
}
@@ -285,21 +231,22 @@
ocfs_inode_private *ip;
list_for_each_safe(p, n, &handle->inode_list) {
- ip = list_entry(p, ocfs_inode_private, handle_list);
+ ip = list_entry(p, ocfs_inode_private, ip_handle_list);
inode = ip->inode;
- list_del(&OCFS_I(inode)->handle_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->handle_list);
+ OCFS_I(inode)->ip_handle = NULL;
+ list_del(&OCFS_I(inode)->ip_handle_list);
+ INIT_LIST_HEAD(&OCFS_I(inode)->ip_handle_list);
- up(&inode->i_sem);
+ up(&OCFS_I(inode)->ip_io_sem);
iput(inode);
}
return;
}
-/* This does no locking of the handle, so make sure that the handle
- * isn't on journal->curr. If the handle is on journal->commited, then
- * you want to be holding the commit_sem before calling this. */
+/* This does no locking of the handle. If the handle is on
+ * journal->commited, then you want to be holding the commit_sem
+ * before calling this. */
static int ocfs_journal_release_locks(ocfs_journal_handle *handle,
release_locks_action action)
{
@@ -313,9 +260,6 @@
osb = handle->osb;
- if (osb->journal->curr == handle)
- BUG();
-
LOG_TRACE_ARGS("num_locks = %d\n", handle->num_locks);
list_for_each_safe(p, n, &(handle->locks)) {
@@ -325,13 +269,12 @@
BUG();
/* The cache list holds unlocked inodes */
- if (action == TRANS_CACHE)
- down(&lock->inode->i_sem);
+ if (action == TRANS_CACHE || lock->req_io_sem)
+ down(&OCFS_I(lock->inode)->ip_io_sem);
/* The file may have been deleted before we got to
* this lock release. If so, just skip it. */
- if ((!lock->inode)
- || (lock->inode && !INODE_DELETED(lock->inode))) {
+ if (!INODE_DELETED(lock->inode)) {
tmpstat = ocfs_release_lock(osb,
GET_INODE_FEOFF(lock->inode),
@@ -349,13 +292,12 @@
}
}
- if (action == TRANS_CACHE)
- up(&lock->inode->i_sem);
+ if (action == TRANS_CACHE || lock->req_io_sem)
+ up(&OCFS_I(lock->inode)->ip_io_sem);
if (lock->bh != NULL)
brelse(lock->bh);
- if (lock->inode)
- iput(lock->inode);
+ iput(lock->inode);
list_del(&(lock->lock_list));
handle->num_locks--;
kfree(lock);
@@ -410,6 +352,21 @@
} \
} while (0)
+static inline int ocfs_journal_flush(ocfs_journal *journal)
+{
+ int retval;
+
+ down_write(&journal->trans_barrier);
+ journal_lock_updates(journal->k_journal);
+
+ retval = journal_flush(journal->k_journal);
+
+ journal_unlock_updates(journal->k_journal);
+ up_write(&journal->trans_barrier);
+
+ return(retval);
+}
+
/*
* ocfs_commit_trans
*/
@@ -447,15 +404,31 @@
else
kern_handle->h_sync = 0;
+ /* Ok, we're done changing these buffers now... */
+ for(i = 0; i < handle->num_buffs; i++)
+ ocfs_clear_buffer_modified(handle->buffs[i]);
+
+ /* release inode semaphores we took during this transaction */
+ ocfs_handle_unlock_inodes(handle);
+ if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC)
+ up(&osb->local_alloc_sem);
+
/* actually stop the transaction. if we've set h_sync,
* it'll have been commited when we return */
- retval = journal_stop(kern_handle);
+ retval = ocfs_journal_stop(kern_handle);
if (retval < 0) {
LOG_ERROR_STATUS(retval);
LOG_ERROR_STR("Could not commit transaction");
BUG();
}
+ /* in the checkpoint case we num_trans as there's nothing for
+ * the commit thread to do on our behalf. */
+ if (checkpoint)
+ atomic_dec(&(osb->journal->num_trans));
+
+ up_read(&journal->trans_barrier);
+
handle->k_handle = NULL; /* it's been free'd in journal_stop */
/* In the future we'll try to queue up as many
@@ -463,41 +436,31 @@
* will checkpoint and revoke everything from that
* transaction. */
if (checkpoint) {
- /* checkpoint from buffer_head list */
- retval = ocfs_checkpoint_handle(handle);
- if (retval < 0) {
- LOG_ERROR_STR("Could not checkpoint transaction!");
- BUG();
- }
-
- /* revoke from buffer_head list, commit revoke records */
- retval = ocfs_revoke_handle(handle);
- if (retval < 0) {
- LOG_ERROR_STR("Could not completely revoke "
- "transaction!");
- BUG();
- }
- } else {
- /* If we're not checkpointing, we have to be careful
- * to also clear the modified bits. */
- for(i = 0; i < handle->num_buffs; i++)
- ocfs_clear_buffer_modified(handle->buffs[i]);
+ retval = ocfs_journal_flush(journal);
+ if (retval < 0)
+ LOG_ERROR_STATUS(retval);
}
-/* done: */
+ /* Do the next few steps before we put the handle on any lists
+ * where it might be freed! */
for(i = 0; i < handle->num_buffs; i++) {
brelse(handle->buffs[i]);
handle->buffs[i] = NULL;
}
handle->num_buffs = 0;
+ if (handle->buffs) {
+ kfree(handle->buffs);
+ handle->buffs = NULL;
+ }
- down(&journal->commit_sem);
- journal->curr = NULL;
+ /* At this point, we don't need the copyout buffers. */
+ ocfs_handle_free_all_copyout(handle);
- if (checkpoint) {
- up(&journal->commit_sem);
- atomic_dec(&(osb->journal->num_trans));
+ commit_head = handle->commit_bits;
+ handle->commit_bits = NULL;
+/* done: */
+ if (checkpoint) {
/* Release locks associated with this handle. */
retval = ocfs_journal_release_locks(handle, TRANS_COMMIT);
if (retval < 0)
@@ -506,37 +469,17 @@
/* If we're not going to checkpoint the handle on
* commit then we need to add it to our journals list
* so it can be done later */
+ down(&journal->commit_sem);
list_add_tail(&(handle->h_list), &(journal->commited));
osb->needs_flush = 1;
up(&journal->commit_sem);
+ /* Ok, any references to the handle after this are
+ * unsafe as it might be processed (and free'd from
+ * memory) by the commit thread! */
}
- /* At this point, we don't need the copyout buffers. */
- ocfs_handle_free_all_copyout(handle);
-
- /* we don't free the kernel handle because jbd has freed it. */
- if (handle->buffs) {
- kfree(handle->buffs);
- handle->buffs = NULL;
- }
-
- /* save off while we still have trans lock */
- commit_head = handle->commit_bits;
- handle->commit_bits = NULL;
-
- /* release inode semaphores we took during this transaction */
- ocfs_handle_unlock_inodes(handle);
- if (handle->flags | OCFS_HANDLE_LOCAL_ALLOC)
- up(&osb->local_alloc_sem);
-
- /* This has to happen after we release the other locks. */
- ocfs_release_trans_lock(osb);
-
- if (commit_head && (retval == 0)) {
- if (!sync)
- BUG();
+ if (commit_head && (retval == 0))
ocfs_process_bitmap_free_head(osb, commit_head);
- }
ocfs_free_bitmap_free_head(commit_head);
if (checkpoint)
@@ -638,28 +581,33 @@
}
}
+ for(i = 0; i < handle->num_buffs; i++)
+ ocfs_clear_buffer_modified(handle->buffs[i]);
+
+ /* release inode semaphores we took during this transaction */
+ ocfs_handle_unlock_inodes(handle);
+ if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC)
+ up(&osb->local_alloc_sem);
+
/* done copying them, free it now. */
ocfs_handle_free_all_copyout(handle);
/* want to force our handle to disk in abort case. */
handle->k_handle->h_sync = 1;
- retval = journal_stop(handle->k_handle);
+ retval = ocfs_journal_stop(handle->k_handle);
if (retval < 0) {
LOG_ERROR_STR("Could not commit aborted transaction!");
LOG_ERROR_STATUS(retval);
}
+ atomic_dec(&(osb->journal->num_trans));
+ up_read(&journal->trans_barrier);
+
handle->k_handle = NULL;
- atomic_dec(&(osb->journal->num_trans));
/* done: */
-
- down(&osb->journal->commit_sem);
- osb->journal->curr = NULL;
- up(&osb->journal->commit_sem);
-
if (handle->num_buffs) {
/* Ok, we now want to fill our buffers with the older (but
* valid) data, instead of leaving them with the aborted
@@ -667,31 +615,19 @@
* transactions in the journal so that we know that disk
* reflects the latest correct blocks. After that, we just
* repopulate the buffers from disk. */
- journal_lock_updates(journal->k_journal);
- retval = journal_flush(journal->k_journal);
- journal_unlock_updates(journal->k_journal);
+ retval = ocfs_journal_flush(journal);
if (retval < 0)
LOG_ERROR_STATUS(retval);
}
- for(i = 0; i < handle->num_buffs; i++) {
- ocfs_clear_buffer_modified(handle->buffs[i]);
+ for(i = 0; i < handle->num_buffs; i++)
brelse(handle->buffs[i]);
- }
/* drop locks associated with the handle here. */
retval = ocfs_journal_release_locks(handle, TRANS_ABORT);
if (retval < 0)
LOG_ERROR_STATUS(retval);
- /* release inode semaphores we took during this transaction */
- ocfs_handle_unlock_inodes(handle);
- if (handle->flags | OCFS_HANDLE_LOCAL_ALLOC)
- up(&osb->local_alloc_sem);
-
- /* This has to happen after we release the other locks. */
- ocfs_release_trans_lock(osb);
-
/* Should only be processed in commit. */
ocfs_free_bitmap_free_head(handle->commit_bits);
@@ -882,8 +818,9 @@
/* We are expecting to be run on the current running transaction, so
* we use the spin_lock here. You really shouldn't be calling this on
* other transactions anyway... */
-void ocfs_journal_add_lock(ocfs_journal_handle *handle, __u32 type, __u32 flags,
- struct buffer_head *bh, struct inode *inode)
+void ocfs_handle_add_lock(ocfs_journal_handle *handle, __u32 type,
+ __u32 flags, struct buffer_head *bh,
+ struct inode *inode, int req_io_sem)
{
ocfs_journal_lock *lock;
@@ -902,13 +839,13 @@
lock->flags = flags;
lock->bh = bh;
lock->inode = inode;
+ lock->req_io_sem = req_io_sem;
if (bh)
get_bh(bh);
-
- if (inode)
- atomic_inc(&inode->i_count);
+ atomic_inc(&inode->i_count);
+
spin_lock(&handle->list_lock);
list_add_tail(&(lock->lock_list), &(handle->locks));
handle->num_locks++;
@@ -943,7 +880,7 @@
osb->vol_layout.root_int_off;
/* Ok, look up the inode for our journal */
- inode = ocfs_iget(osb, lock_id, NULL);
+ inode = ocfs_iget(osb, lock_id);
if (inode == NULL) {
LOG_ERROR_STR("access error");
status = -EACCES;
@@ -956,12 +893,18 @@
status = -EACCES;
goto done;
}
+
+ down(&OCFS_I(inode)->ip_io_sem);
+
+
SET_INODE_JOURNAL(inode);
/* TODO: Use another type of lock. */
status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE, &bh, inode);
if (status < 0) {
+ up(&OCFS_I(inode)->ip_io_sem);
+
if (status != -EINTR)
LOG_ERROR_STR("Could not get lock on journal!");
goto done;
@@ -979,6 +922,8 @@
if (status < 0) {
OCFS_BH_PUT_DATA(bh);
fe = NULL;
+ up(&OCFS_I(inode)->ip_io_sem);
+
goto done;
}
@@ -1001,6 +946,8 @@
DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
&bh, inode, NULL);
if (status < 0) {
+ up(&OCFS_I(inode)->ip_io_sem);
+
LOG_ERROR_STATUS(status);
goto done;
}
@@ -1008,6 +955,8 @@
LOG_TRACE_ARGS("inode->alloc_size = %llu\n",
OCFS_I(inode)->alloc_size);
+ up(&OCFS_I(inode)->ip_io_sem);
+
/* call the kernels journal init function now */
k_journal = journal_init_inode(inode);
if (k_journal == NULL) {
@@ -1030,7 +979,9 @@
osb->journal->lockbh = bh;
osb->journal->lock_id = lock_id;
atomic_set(&(osb->journal->num_trans), 0);
+ init_rwsem(&(osb->journal->trans_barrier));
osb->journal->state = OCFS_JOURNAL_LOADED;
+
status = 0;
done:
if (status < 0) {
@@ -1039,13 +990,15 @@
OCFS_BH_PUT_DATA(bh);
brelse(bh);
}
- if (inode)
+ if (inode) {
OCFS_I(inode)->open_hndl_cnt--;
+ iput(inode);
+ }
}
LOG_EXIT_STATUS(status);
return(status);
-}
+} /* ocfs_journal_init */
/*
if the journal has been ocfs_malloc'd it needs to be freed after this call.
@@ -1083,9 +1036,7 @@
* release any locks that are still held.
* set the SHUTDOWN flag and release the trans lock.
* the commit thread will take the trans lock for us below. */
- down(&osb->trans_lock);
journal->state = OCFS_JOURNAL_IN_SHUTDOWN;
- up(&osb->trans_lock);
/* wake the commit thread */
atomic_set (&osb->flush_event_woken, 1);
@@ -1108,20 +1059,22 @@
OCFS_I(inode)->open_hndl_cnt--;
+ down(&OCFS_I(inode)->ip_io_sem);
/* unlock our journal */
status = ocfs_release_lock (osb, journal->lock_id,
OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
journal->lockbh, inode);
+ up(&OCFS_I(inode)->ip_io_sem);
if (status < 0)
LOG_ERROR_STATUS (status);
-
+
brelse (journal->lockbh);
journal->lockbh = NULL;
journal->state = OCFS_JOURNAL_FREE;
- up (&osb->trans_lock);
+// up_write(&journal->trans_barrier);
done:
if (inode)
iput(inode);
@@ -1381,7 +1334,7 @@
+ osb->vol_layout.root_int_off;
/* Ok, look up the inode for our journal */
- inode = ocfs_iget(osb, lock_id, NULL);
+ inode = ocfs_iget(osb, lock_id);
if (inode == NULL) {
LOG_ERROR_STR("access error");
status = -EACCES;
@@ -1394,6 +1347,9 @@
status = -EACCES;
goto done;
}
+
+ down(&OCFS_I(inode)->ip_io_sem);
+
SET_INODE_JOURNAL(inode);
/* Should not ever be called to recover ourselves -- in that
@@ -1405,6 +1361,8 @@
OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
&bh, inode);
+
+ up(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_TRACE_ARGS("status returned from acquire_lock=%d\n",
status);
@@ -1433,9 +1391,12 @@
OCFS_I(inode)->alloc_size = alloc_size;
/* add this node to openmap and update disk lock */
+ down(&OCFS_I(inode)->ip_io_sem);
+
status = ocfs_update_disk_lock (osb,
DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
&bh, inode, NULL);
+ up(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto done;
@@ -1501,11 +1462,15 @@
up(&(osb->recovery_lock));
/* drop the lock on this nodes journal */
- if (got_lock)
+ if (got_lock) {
+ down(&OCFS_I(inode)->ip_io_sem);
+
status = ocfs_release_lock(osb, lock_id,
OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
bh, inode);
+ up(&OCFS_I(inode)->ip_io_sem);
+ }
if (inode)
iput(inode);
@@ -1634,15 +1599,6 @@
(osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED))
finish = 1;
- if (down_trylock(&osb->trans_lock) != 0) {
- LOG_TRACE_ARGS("commit thread: trylock failed, miss=%d\n", misses);
- if (++misses < OCFS_COMMIT_MISS_MAX && finish == 0)
- continue;
- LOG_TRACE_ARGS("commit thread: about to down\n");
- down(&osb->trans_lock);
- misses = 0;
- }
-
status = ocfs_commit_cache(osb, 0);
if (status < 0)
LOG_ERROR_STATUS(status);
@@ -1651,8 +1607,6 @@
break;
}
-
-
/* Flush all scheduled tasks */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
flush_scheduled_work ();
@@ -1683,23 +1637,27 @@
LOG_ENTRY_ARGS("(data_flush = %u)\n", data_flush);
- if (down_trylock(&osb->trans_lock) == 0)
- BUG();
-
journal = osb->journal;
if (atomic_read(&journal->num_trans) == 0) {
- up(&osb->trans_lock);
-
LOG_TRACE_STR("No transactions for me to flush!");
goto flush_data;
}
/* flush all pending commits and checkpoint the journal. */
+ down_write(&journal->trans_barrier);
+
+ /* check again, this time locked :) */
+ if (atomic_read(&journal->num_trans) == 0) {
+ up_write(&journal->trans_barrier);
+ goto flush_data;
+ }
+
journal_lock_updates(journal->k_journal);
status = journal_flush(journal->k_journal);
+
+ up_write(&journal->trans_barrier);
if (status < 0) {
- up(&osb->trans_lock);
journal_unlock_updates(journal->k_journal);
LOG_ERROR_STATUS(status);
@@ -1709,8 +1667,6 @@
LOG_TRACE_ARGS("flushing %d transactions\n",
atomic_read(&journal->num_trans));
- atomic_set(&journal->num_trans, 0);
-
/* now we can run an unlock against any pending handles and
* release them. */
down(&journal->commit_sem);
@@ -1732,9 +1688,6 @@
up(&journal->commit_sem);
osb->needs_flush = 0;
- /* shutdown code wants to hold the trans lock */
- if (journal->state != OCFS_JOURNAL_IN_SHUTDOWN)
- up(&osb->trans_lock);
down(&commit->c_lock);
list_for_each_safe(p, n, &commit->c_list) {
@@ -1744,6 +1697,8 @@
LOG_ERROR_STATUS((status = tmpstat));
list_del(&(handle->h_list));
kfree(handle);
+
+ atomic_dec(&journal->num_trans);
}
up(&commit->c_lock);
Modified: branches/format-changes/src/lockres.c
===================================================================
--- branches/format-changes/src/lockres.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/lockres.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -132,6 +132,10 @@
return status;
} /* ocfs_find_update_res */
+
+#define ocfs_container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
/*
* ocfs_acquire_lockres()
*
@@ -144,6 +148,8 @@
unsigned long jif = 0;
int status = 0;
int cnt = 0;
+ struct inode *inode;
+ ocfs_inode_private *ip;
LOG_ENTRY_ARGS ("(0x%p, %u)\n", lockres, timeout);
@@ -160,6 +166,11 @@
if (lockres->in_use) {
if (lockres->thread_id != mypid) {
spin_unlock (&lockres->lock_mutex);
+ LOG_ERROR_ARGS ("lockpid=%d, newpid=%d,"
+ " timedout\n",
+ lockres->thread_id, mypid);
+ BUG();
+
if (jif && jif < jiffies) {
LOG_TRACE_ARGS ("lockpid=%d, newpid=%d,"
" timedout\n",
@@ -175,7 +186,7 @@
}
ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
}
- else {
+ else {
printk("lockres in_use=%d, pid=%d, mypid=%d\n", lockres->in_use, lockres->thread_id, mypid);
BUG();
lockres->in_use++;
@@ -186,6 +197,13 @@
lockres->in_use = 1;
lockres->thread_id = mypid;
spin_unlock (&lockres->lock_mutex);
+ ip = ocfs_container_of(lockres, ocfs_inode_private, i_lockres);
+ inode = ip->inode;
+ if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
+ LOG_ERROR_ARGS("locking lockres without io_sem! ino = %lu, offset = %llu\n", inode->i_ino, OCFS_I(inode)->feoff);
+
+ BUG();
+ }
break;
}
}
Modified: branches/format-changes/src/namei.c
===================================================================
--- branches/format-changes/src/namei.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/namei.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -88,8 +88,8 @@
#endif
{
int status;
- ocfs2_dinode *fe;
- struct buffer_head *fe_bh = NULL, *dirent_bh = NULL;
+ __u64 fe_off;
+ struct buffer_head *dirent_bh = NULL;
struct inode *inode = NULL;
struct super_block *sb = dir->i_sb;
struct dentry *ret;
@@ -108,21 +108,15 @@
LOG_TRACE_ARGS("about to call find_files_on_disk with inode=%p\n",
dir);
- status = ocfs_find_files_on_disk (osb, dentry->d_name.name, dentry->d_name.len,
- &fe_bh, dir, inode, 1, &dirent_bh, &dirent);
+ down(&OCFS_I(dir)->ip_io_sem);
+ status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+ dentry->d_name.len, &fe_off,
+ dir, 1, &dirent_bh, &dirent);
+ up(&OCFS_I(dir)->ip_io_sem);
if (status < 0)
goto bail_add;
- fe = OCFS_BH_GET_DATA_READ(fe_bh);
- if (!IS_VALID_FILE_ENTRY(fe)) {
- printk("ocfs2: invalid file entry! parent=%llu, name='%*s'\n",
- GET_INODE_FEOFF(dir), dentry->d_name.len,
- dentry->d_name.name);
- BUG();
- }
- OCFS_BH_PUT_DATA(fe_bh);
-
- inode = ocfs_iget(osb, 0, fe_bh);
+ inode = ocfs_iget(osb, fe_off);
if (!inode) {
LOG_ERROR_STR("Could not create inode!");
ret = ERR_PTR (-EACCES);
@@ -135,8 +129,6 @@
ret = NULL;
bail:
- if (fe_bh)
- brelse(fe_bh);
if (dirent_bh)
brelse(dirent_bh);
@@ -159,6 +151,8 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
dev, dentry->d_name.len, dentry->d_name.name);
+ down(&OCFS_I(dir)->ip_io_sem);
+
/* get our super block */
osb = OCFS_SB(dir->i_sb);
if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
@@ -204,8 +198,9 @@
}
/* Ok, we got the lock -- we'd better add it to our transaction */
- ocfs_journal_add_lock(handle, OCFS_DLM_ENABLE_CACHE_LOCK,
- FLAG_FILE_CREATE | FLAG_DIR, parent_fe_bh, dir);
+ ocfs_handle_add_lock(handle, OCFS_DLM_ENABLE_CACHE_LOCK,
+ FLAG_FILE_CREATE | FLAG_DIR, parent_fe_bh, dir,
+ 0);
/* do the real work now. */
status = ocfs_mknod_locked(osb, dir, dentry, mode, dev,
@@ -229,9 +224,9 @@
ocfs_init_lockres (osb, inode);
+ ocfs_handle_add_inode(handle, inode);
status = ocfs_update_lockres (osb, GET_INODE_FEOFF(inode),
&new_fe_bh, NULL, 0, inode, 0, 0);
-
if (S_ISDIR (mode)) {
struct buffer_head *newdirbh = NULL;
int retval = 0;
@@ -295,6 +290,8 @@
if ((status < 0) && handle)
ocfs_abort_trans(handle);
+ up(&OCFS_I(dir)->ip_io_sem);
+
if (status == -ENOSPC)
LOG_TRACE_STR ("Disk is full");
else if (status < 0 && status != -EINTR)
@@ -364,7 +361,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
-
+
status = ocfs_read_bh(osb, disk_off, new_fe_bh,
OCFS_BH_CACHED, inode);
@@ -511,7 +508,7 @@
struct inode *inode = dentry->d_inode;
int retval = -EBUSY;
ocfs_super *osb = OCFS_SB(dir->i_sb);
- __u64 fileOff = GET_INODE_FEOFF(inode);
+ __u64 fe_off = GET_INODE_FEOFF(inode);
struct inode *parentInode = dentry->d_parent->d_inode;
ocfs2_dinode *fe = NULL;
__u32 lockFlags = (S_ISDIR (inode->i_mode) ? (FLAG_FILE_DELETE | FLAG_DIR) : FLAG_FILE_DELETE);
@@ -527,7 +524,7 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
dentry->d_name.len, dentry->d_name.name);
- LOG_TRACE_ARGS("ino = %llu\n", fileOff);
+ LOG_TRACE_ARGS("ino = %llu\n", fe_off);
status = -EBUSY;
@@ -535,7 +532,7 @@
LOG_TRACE_STR ("dentry is not empty, cannot delete");
goto bail;
} else if (OCFS_I(inode)->open_hndl_cnt > 0) {
- LOG_TRACE_ARGS ("Cannot remove an open file (open_hndl_cnt = %u, fileOff = %llu, d_count=%u)\n", OCFS_I(inode)->open_hndl_cnt, fileOff, atomic_read(&dentry->d_count));
+ LOG_TRACE_ARGS ("Cannot remove an open file (open_hndl_cnt = %u, fe_off = %llu, d_count=%u)\n", OCFS_I(inode)->open_hndl_cnt, fe_off, atomic_read(&dentry->d_count));
goto bail;
} else if (inode == osb->root_inode) {
LOG_TRACE_STR ("Cannot delete the root directory");
@@ -547,16 +544,19 @@
spin_lock(&oin_num_ext_lock);
if (OCFS_I(inode)->num_extends) {
LOG_ERROR_ARGS ("Cannot remove a file with = "
- "%u, pending extends (fileOff "
+ "%u, pending extends (fe_off "
"= %llu)\n",
OCFS_I(inode)->num_extends,
- fileOff);
+ fe_off);
spin_unlock(&oin_num_ext_lock);
status = -EBUSY;
goto bail;
}
spin_unlock(&oin_num_ext_lock);
+ down(&OCFS_I(dir)->ip_io_sem);
+ down(&OCFS_I(inode)->ip_io_sem);
+
handle = ocfs_start_trans(osb, OCFS_FILE_DELETE_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
@@ -575,16 +575,26 @@
got_parent = 1;
/* this will re-read the directory now with the EXCLUSIVE */
- /* lock already held; it will also return the fe_bh to us */
- status = ocfs_find_files_on_disk (osb, dentry->d_name.name, dentry->d_name.len,
- &fe_bh, parentInode,
- inode, 0, &dirent_bh, &dirent);
+ /* lock already held; it will also return the fe_off to us */
+ status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+ dentry->d_name.len, &fe_off,
+ parentInode, 0, &dirent_bh,
+ &dirent);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- status = ocfs_acquire_lock (osb, fileOff, OCFS_DLM_EXCLUSIVE_LOCK,
+ if (fe_off != GET_INODE_FEOFF(inode))
+ BUG();
+
+ status = ocfs_read_bh(osb, fe_off, &fe_bh, OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ status = ocfs_acquire_lock (osb, fe_off, OCFS_DLM_EXCLUSIVE_LOCK,
lockFlags, &fe_bh, inode);
if (status < 0) {
if (status != -EINTR)
@@ -698,7 +708,7 @@
/* need this to alert dentry-owners on other nodes */
/* Release the file lock if we acquired it */
if (got_file) {
- tmpstat = ocfs_release_lock(osb, fileOff,
+ tmpstat = ocfs_release_lock(osb, fe_off,
OCFS_DLM_EXCLUSIVE_LOCK,
lockFlags, fe_bh, inode);
if (tmpstat < 0)
@@ -728,6 +738,9 @@
if (drop_inode)
SET_INODE_DELETED(inode);
}
+
+ up(&OCFS_I(inode)->ip_io_sem);
+ up(&OCFS_I(dir)->ip_io_sem);
bail:
if (status < 0 && status != -ENOTEMPTY &&
status != -EPERM && status != -EBUSY && status != -EINTR) {
@@ -838,18 +851,38 @@
}
} else if (handle) {
if (id2_locked)
- ocfs_journal_add_lock(handle, type2, flags2,
- *bh2, inode2);
- ocfs_journal_add_lock(handle, type1, flags1, *bh1,
- inode1);
+ ocfs_handle_add_lock(handle, type2, flags2,
+ *bh2, inode2, 0);
+ ocfs_handle_add_lock(handle, type1, flags1, *bh1,
+ inode1, 0);
}
LOG_EXIT_STATUS(status);
return(status);
} /* ocfs_double_lock */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static inline void double_down(struct semaphore *s1, struct semaphore *s2)
+{
+ if (s1 != s2) {
+ if ((unsigned long) s1 < (unsigned long) s2) {
+ struct semaphore *tmp = s2;
+ s2 = s1; s1 = tmp;
+ }
+ down(s1);
+ }
+ down(s2);
+}
+static inline void double_up(struct semaphore *s1, struct semaphore *s2)
+{
+ up(s1);
+ if (s1 != s2)
+ up(s2);
+}
+#endif
+
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) ((char *) buffer + \
le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->rec_len)))->inode
@@ -901,10 +934,13 @@
/* new parent dir offset */
newDirOff = GET_INODE_FEOFF(new_dir);
+ double_down(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
+ down(&OCFS_I(old_inode)->ip_io_sem);
if (new_inode) {
if (ocfs_inc_icount(new_inode) < 0)
BUG();
+ down(&OCFS_I(new_inode)->ip_io_sem);
}
if (atomic_read (&old_dentry->d_count) > 2) {
@@ -1033,8 +1069,10 @@
/* check if the target already exists (in which case we need
* to delete it */
- status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name, new_dentry->d_name.len,
- &newfe_bh, new_dir, new_inode, 0, &new_de_bh, &new_de);
+ status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ &newfe_lockid, new_dir, 0,
+ &new_de_bh, &new_de);
/* The only error we allow here is -ENOENT because the new
* file not existing is perfectly valid. */
if ((status < 0) && (status != -ENOENT)) {
@@ -1047,14 +1085,26 @@
/* In case we need to overwrite an existing file, we blow it
* away first */
if (new_de) {
+ if (newfe_lockid != GET_INODE_FEOFF(new_inode))
+ BUG();
+
+ status = ocfs_read_bh(osb, newfe_lockid, &newfe_bh,
+ OCFS_BH_CACHED, new_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto finally;
+ }
+
/* TODO: change this block to the ext3-style orphan model */
newfe = OCFS_BH_GET_DATA_READ(newfe_bh);
if (S_ISDIR(newfe->i_mode))
newfe_flags = FLAG_DIR;
newfe_flags |= FLAG_FILE_DELETE;
- newfe_lockid =
- newfe->i_blkno << osb->sb->s_blocksize_bits;
+ if (newfe_lockid !=
+ (newfe->i_blkno << osb->sb->s_blocksize_bits))
+ BUG();
+
OCFS_BH_PUT_DATA(newfe_bh);
newfe = NULL;
@@ -1241,8 +1291,13 @@
newfe_flags, NULL, new_inode);
}
- if (new_inode)
+ double_up(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
+ up(&OCFS_I(old_inode)->ip_io_sem);
+
+ if (new_inode) {
+ up(&OCFS_I(new_inode)->ip_io_sem);
iput(new_inode);
+ }
if (tmpfe)
ocfs_release_file_entry (tmpfe);
@@ -1304,17 +1359,23 @@
sb = dir->i_sb;
osb = OCFS_SB(sb);
+ down(&OCFS_I(dir)->ip_io_sem);
+
inode = new_inode (sb);
if (IS_ERR (inode)) {
status = PTR_ERR(inode);
+ inode = NULL;
LOG_ERROR_STR("new_inode failed!");
goto bail;
}
if (ocfs_inode_init_private(inode)) {
LOG_ERROR_STATUS(status = -ENOMEM);
+ iput(inode);
+ inode = NULL;
goto bail;
}
+ down(&OCFS_I(inode)->ip_io_sem);
l = strlen (symname) + 1;
newsize = l - 1;
@@ -1408,6 +1469,10 @@
}
bail:
+ if (inode)
+ up(&OCFS_I(inode)->ip_io_sem);
+ up(&OCFS_I(dir)->ip_io_sem);
+
if (new_fe_bh) {
if (fe)
OCFS_BH_PUT_DATA(new_fe_bh);
Modified: branches/format-changes/src/nm.c
===================================================================
--- branches/format-changes/src/nm.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/nm.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -87,7 +87,7 @@
"REMASTER_THIS", // remaster lock to me
"REMASTER_REQUESTOR", // remaster lock to requestor
"DROP_READONLY", // RO cachelock needs to convert to RW
- "READONLY"
+ "READONLY",
};
#endif
@@ -525,6 +525,11 @@
my_node_wins = (node_num < osb->node_num);
}
+// if (flags & FLAG_DROP_LINK) {
+// vote_type = RELEASE_DENTRY;
+// goto done;
+// }
+
if (flags & FLAG_DROP_READONLY) {
vote_type = DROP_READONLY;
goto done;
@@ -635,7 +640,7 @@
int inc_inode_seq = 0;
int disk_vote = (ctxt->request_method == DISK_VOTE);
int comm_vote = (ctxt->request_method == COMM_VOTE);
- int have_i_sem = 0;
+ int have_io_sem = 0;
ocfs_publish *publish = (disk_vote ? ctxt->u.publish : NULL);
ocfs_dlm_msg *dlm_msg = (comm_vote ? ctxt->u.dlm_msg : NULL);
__u32 node_num = ctxt->node_num;
@@ -687,7 +692,7 @@
if ((flags & (FLAG_FILE_DELETE | FLAG_FILE_RENAME)) && (flags & FLAG_RELEASE_LOCK))
inode = NULL;
else {
- inode = ocfs_iget(osb, lock_id, NULL);
+ inode = ocfs_iget(osb, lock_id);
if (!inode) {
status = -EFAIL;
LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
@@ -697,8 +702,8 @@
goto leave;
}
- down(&inode->i_sem);
- have_i_sem = 1;
+ down(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 1;
lockres = GET_INODE_LOCKRES(inode);
status = ocfs_update_lockres (osb, lock_id, NULL, NULL,
@@ -772,7 +777,21 @@
}
vote_response = FLAG_VOTE_OIN_UPDATED;
break;
-
+
+#if 0
+ case RELEASE_DENTRY:
+ if (!inode)
+ BUG();
+
+ /* we always vote yes on this one. */
+ vote_response = FLAG_VOTE_NODE;
+ printk("going to prune dentries for inode %lu\n",
+ inode->i_ino);
+
+ d_prune_aliases (inode);
+ inode->i_nlink--;
+ break;
+#endif
case DELETE_RENAME_RELEASE:
/* ACK and done */
vote_response = FLAG_VOTE_NODE;
@@ -842,15 +861,15 @@
#else
fsync_inode_buffers (inode);
#endif
- up(&inode->i_sem);
- have_i_sem = 0;
+ up(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 0;
}
break;
}
if (inode) {
- up(&inode->i_sem);
- have_i_sem = 0;
+ up(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 0;
}
/* Set the always update master on open flag */
@@ -953,7 +972,7 @@
ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
OCFS_ASSERT(lockres->readonly_node == osb->node_num);
-#warning need to make sure inode is not NULL in process_vote
+ OCFS_ASSERT(inode);
status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
if (status < 0)
LOG_ERROR_STATUS(status);
@@ -1079,9 +1098,9 @@
* the actual IO that a readdir may have in
* progress, if it's possible to have a corrupt
* readdir. for now, skip it.
- * NOTE: can't just take i_sem because lock order
- * needs to be i_sem->lockres... would have to
- * drop lockres, take i_sem, take lockres, then
+ * NOTE: can't just take io_sem because lock order
+ * needs to be io_sem->lockres... would have to
+ * drop lockres, take io_sem, take lockres, then
* recheck all the conditions to see if still
* appropriate, then do the work and drop both.
* seems like a lot of work. almost as many lines
@@ -1237,8 +1256,8 @@
if (inode) {
if (inc_inode_seq)
ocfs_inc_inode_seq(osb, inode, 1);
- if (have_i_sem)
- up(&inode->i_sem);
+ if (have_io_sem)
+ up(&OCFS_I(inode)->ip_io_sem);
iput(inode);
}
@@ -1365,6 +1384,7 @@
if (yield) {
/* this will wait until process_vote gets to the release */
+ down(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
}
@@ -1401,6 +1421,9 @@
if (yield) {
/* from nm thread, give some time to waiters */
ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+ up(&OCFS_I(inode)->ip_io_sem);
+
+ down(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
}
continue;
@@ -1417,8 +1440,10 @@
lockres->lock_state &= ~FLAG_READONLY_DROPPING;
leave:
- if (yield)
+ if (yield) {
ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+ up(&OCFS_I(inode)->ip_io_sem);
+ }
if (inode)
iput(inode);
Modified: branches/format-changes/src/ocfs.h
===================================================================
--- branches/format-changes/src/ocfs.h 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/ocfs.h 2004-06-09 07:11:07 UTC (rev 1031)
@@ -172,7 +172,7 @@
#define FLAG_FILE_CREATE_DIR 0x00000040
#define FLAG_FILE_UPDATE_OIN 0x00000080
#define FLAG_FILE_RELEASE_MASTER 0x00000100
-#define FLAG_FILE_UNUSED2 0x00000200
+#define FLAG_DROP_LINK 0x00000200
#define FLAG_CHANGE_MASTER 0x00000400
#define FLAG_ADD_OIN_MAP 0x00000800
#define FLAG_DIR 0x00001000
@@ -802,18 +802,13 @@
__u64 last_upd_seq_num;
};
-/* OCFS2 Inode Private Data
- *
- * feoff/voteoff can change during rename. Luckily, rename takes a ton
- * of locks and does several checks, so you're safe reading these values
- * if any of the following is true:
- * 1) you have i_sem
- * 2) you have priv_sem
- * 3) open_hndl_cnt > 0
- */
+struct _ocfs_journal_handle;
+
+/* OCFS2 Inode Private Data */
typedef struct _ocfs_inode_private
{
- /* always valid, just a simple back pointer. */
+ /* inode and feoff fields never change and are always safe to
+ * read. */
struct inode *inode;
__u64 feoff;
@@ -821,12 +816,24 @@
/* These fields are protected by priv_sem */
struct semaphore priv_sem;
__u32 open_hndl_cnt;
- int needs_verification;
+ int needs_verification;
__u64 chng_seq_num;
ocfs_extent_map map;
__s64 alloc_size;
__u32 oin_flags;
+ /* This protects io on the metadata buffers related to this
+ * inode. We also consider an "abort_trans" an I/O as it will
+ * revert the buffer back to a previous state. */
+ struct semaphore ip_io_sem;
+
+ /* Used by the journalling code to attach an inode to a
+ * handle. These are protected by ip_io_sem in order to lock
+ * out other I/O to the inode until we either commit or
+ * abort. */
+ struct list_head ip_handle_list;
+ struct _ocfs_journal_handle *ip_handle;
+
/* inode_extend_sem locks out extends on behalf of other nodes. */
struct semaphore inode_extend_sem;
@@ -848,8 +855,6 @@
ocfs_lock_res i_lockres;
__u32 i_dir_start_lookup;
-
- struct list_head handle_list;
} ocfs_inode_private;
/* Eventually, the 'flags' and 'oin_flags' fields need to be
@@ -894,7 +899,6 @@
#define GET_INODE_FEOFF(i) OCFS_I(i)->feoff
-#warning take this out when all the lockres stuff checks out
#define GET_INODE_LOCKRES(i) ({ if (i==NULL) BUG(); (&(OCFS_I(i)->i_lockres)); })
typedef enum _ocfs_vol_state
@@ -1088,7 +1092,6 @@
__u32 cfg_numblocks;
struct semaphore publish_lock; /* protects r/w to publish sector */
atomic_t node_req_vote; /* set when node's vote req pending */
- struct semaphore trans_lock; /* serializes transactions */
int publish_dirty;
struct list_head needs_flush_head;
wait_queue_head_t flush_event;
@@ -1542,17 +1545,6 @@
[S_IFLNK >> S_SHIFT] OCFS_FT_SYMLINK,
};
-
-typedef struct _ocfs_find_inode_args
-{
- __u64 feoff;
- struct buffer_head *fe_bh;
- unsigned long ino;
- __u32 flags;
-} ocfs_find_inode_args;
-
-#define OCFS_FIND_INODE_FLAG_SYSFILE 0x00000002
-
/* timeout structure taken from Ben's aio.c */
typedef struct _ocfs_timeout {
struct timer_list timer;
@@ -1829,22 +1821,6 @@
return (u32)(blocks >> b_to_c_bits);
}
-
-/*
- * Trans Lock:
- * Right now OCFS2 only supports a single transaction at a
- * time. Transactions are locked out by using trans_lock.
- */
-static inline void ocfs_take_trans_lock(ocfs_super *osb)
-{
- down(&osb->trans_lock);
-}
-
-static inline void ocfs_release_trans_lock(ocfs_super *osb)
-{
- up(&osb->trans_lock);
-}
-
typedef struct _ocfs_journal_handle ocfs_journal_handle;
#endif /* !OCFS_H */
Modified: branches/format-changes/src/ocfs_buffer_head.h
===================================================================
--- branches/format-changes/src/ocfs_buffer_head.h 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/ocfs_buffer_head.h 2004-06-09 07:11:07 UTC (rev 1031)
@@ -120,6 +120,8 @@
while (1) {
if (ocfs_bh_sem_lock(bh) == OCFS_BH_SEM_WAIT_ON_MODIFY) {
+ BUG();
+
ocfs_bh_sem_unlock(bh);
wait_on_buffer_modified(bh);
} else {
@@ -180,6 +182,7 @@
"this process is not the lock "
"holder!\n");
#endif
+ BUG();
ocfs_bh_sem_unlock(bh);
wait_on_buffer_modified(bh);
} else {
@@ -210,6 +213,8 @@
"this process is not the lock "
"holder!\n");
#endif
+ LOG_ERROR_STR("Trylock about to BUG()");
+ BUG();
ocfs_bh_sem_unlock(bh);
return NULL;
}
Modified: branches/format-changes/src/ocfs_journal.h
===================================================================
--- branches/format-changes/src/ocfs_journal.h 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/ocfs_journal.h 2004-06-09 07:11:07 UTC (rev 1031)
@@ -46,8 +46,6 @@
struct _ocfs2_dinode;
struct _ocfs_journal_handle;
-/* most of the ocfs_journal structure is protected by the
- * trans_lock. BEWARE. */
typedef struct _ocfs_journal ocfs_journal;
struct _ocfs_journal {
enum ocfs_journal_state state; /* Journals current state */
@@ -71,12 +69,6 @@
to access file entry */
atomic_t num_trans; /* Number of transactions
* currently in the system. */
- struct _ocfs_journal_handle *curr; /* pointer to currently
- * running handle. In
- * the future when we do
- * multiple concurrent
- * transactions this may
- * become a list.*/
/* locking order: trans_lock -> commit_sem -> journal.curr.list_lock */
struct semaphore commit_sem; /* protects *everything*
* in the commited list
@@ -86,6 +78,7 @@
struct list_head commited; /* doubly linked list of all
* commited handles awaiting
* checkpointing. */
+ struct rw_semaphore trans_barrier;
};
typedef struct _ocfs_journal_lock ocfs_journal_lock;
@@ -94,6 +87,7 @@
__u32 flags;
struct buffer_head *bh;
struct inode *inode;
+ int req_io_sem;
struct list_head lock_list;
};
@@ -120,7 +114,7 @@
int num_buffs;
struct buffer_head **buffs;
- /* The following three fields are for ocfs_journal_add_lock */
+ /* The following three fields are for ocfs_handle_add_lock */
spinlock_t list_lock; /* Used to protect the 'locks'
* list. Only used if the
* handle is the same as
@@ -242,9 +236,9 @@
* buffer. Will have to call ocfs_journal_dirty once
* we've actually dirtied it. Type is one of . or .
* ocfs_journal_dirty - Mark a journalled buffer as having dirty data.
- * ocfs_journal_add_lock - Sometimes we need to delay lock release
+ * ocfs_handle_add_lock - Sometimes we need to delay lock release
* until after a transaction has been completed. Use
- * ocfs_journal_add_lock to indicate that a lock needs
+ * ocfs_handle_add_lock to indicate that a lock needs
* to be released at the end of that handle. Locks
* will be released in the order that they are added.
* ocfs_handle_add_inode - Add a locked inode to a transaction.
@@ -293,10 +287,11 @@
*/
int ocfs_journal_dirty(ocfs_journal_handle *handle,
struct buffer_head *bh);
-void ocfs_journal_add_lock(ocfs_journal_handle *handle,
- __u32 type, __u32 flags,
- struct buffer_head *bh,
- struct inode *inode);
+void ocfs_handle_add_lock(ocfs_journal_handle *handle,
+ __u32 type, __u32 flags,
+ struct buffer_head *bh,
+ struct inode *inode,
+ int req_io_sem);
/*
* Some transactions require us to leave inodes in a locked state
* until we either commit or abort because the buffer state can change
Modified: branches/format-changes/src/super.c
===================================================================
--- branches/format-changes/src/super.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/super.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -233,13 +233,14 @@
LOG_ENTRY();
- new = ocfs_iget(osb, OCFS_SYS_ROOT_INODE_OFF(osb), NULL);
+ new = ocfs_iget(osb, OCFS_SYS_ROOT_INODE_OFF(osb));
if (!new) {
LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
osb->sys_root_inode = new;
- for (i=0; i<NUM_SYSTEM_INODES; i++) {
+
+ for (i = 0; i < NUM_SYSTEM_INODES; i++) {
new = ocfs_get_system_file_inode(osb, i, osb->node_num);
if (!new) {
ocfs_release_system_inodes(osb);
@@ -248,6 +249,7 @@
}
osb->system_inodes[i] = new;
}
+
bail:
LOG_EXIT_STATUS(status);
return(status);
@@ -940,7 +942,7 @@
osb->vol_state = VOLUME_ENABLED;
up (&(osb->osb_res));
- inode = ocfs_iget(osb, OCFS_ROOT_INODE_FE_OFF(osb), NULL);
+ inode = ocfs_iget(osb, OCFS_ROOT_INODE_FE_OFF(osb));
if (!inode) {
status = -EIO;
LOG_ERROR_STATUS (status);
@@ -1364,7 +1366,6 @@
init_MUTEX (&(osb->osb_res));
init_MUTEX (&(osb->recovery_lock));
init_MUTEX (&(osb->comm_lock));
- init_MUTEX (&(osb->trans_lock));
init_MUTEX (&(osb->extend_sem));
init_MUTEX (&(osb->cfg_lock));
init_MUTEX (&(osb->vote_sem));
Modified: branches/format-changes/src/sysfile.c
===================================================================
--- branches/format-changes/src/sysfile.c 2004-06-09 06:06:33 UTC (rev 1030)
+++ branches/format-changes/src/sysfile.c 2004-06-09 07:11:07 UTC (rev 1031)
@@ -86,32 +86,30 @@
{
char namebuf[40];
struct inode *inode = NULL;
- struct buffer_head *fe_bh = NULL;
+ __u64 fe_off = 0;
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *de = NULL;
int status = 0;
if (file_type == GLOBAL_BITMAP_SYSTEM_INODE)
- // "There Can Be Only One!"
+ /* "There Can Be Only One!" */
sprintf(namebuf, system_file_names[file_type]);
else
sprintf(namebuf, system_file_names[file_type], node);
- status = ocfs_find_files_on_disk (osb, namebuf, strlen(namebuf),
- &fe_bh, osb->sys_root_inode,
- NULL, 1, &dirent_bh, &de);
+ status = ocfs_find_files_on_disk(osb, namebuf, strlen(namebuf),
+ &fe_off, osb->sys_root_inode,
+ 1, &dirent_bh, &de);
if (status < 0) {
goto bail;
}
- inode = ocfs_iget(osb, 0, fe_bh);
+ inode = ocfs_iget(osb, fe_off);
if (!inode) {
LOG_ERROR_STR("Could not create inode!");
goto bail;
}
bail:
- if (fe_bh)
- brelse(fe_bh);
if (dirent_bh)
brelse(dirent_bh);
return inode;
@@ -326,6 +324,7 @@
int numbhs, i;
char *data;
struct buffer_head **bhs;
+ struct inode *ext_alloc_inode = NULL;
LOG_ENTRY_ARGS ("(FileId = %u, Size = %llu)\n", FileId, FileSize);
@@ -377,6 +376,14 @@
osb->vol_layout.data_start_off;
actualLength = numClusterAlloc * osb->vol_layout.cluster_size;
+ ext_alloc_inode = igrab(osb->system_inodes[EXTENT_ALLOC_BITMAP_SYSTEM_INODE]);
+ if (!ext_alloc_inode) {
+ status = -EFAIL;
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ ocfs_handle_add_inode(handle, ext_alloc_inode);
status = ocfs_allocate_extent(osb, fe_bh, handle,
actualDiskOffset >> osb->sb->s_blocksize_bits,
actualLength >> osb->s_clustersize_bits,
@@ -446,6 +453,9 @@
OCFS_BH_PUT_DATA(fe_bh);
if (local_fe)
brelse(fe_bh);
+ if (ext_alloc_inode)
+ iput(ext_alloc_inode);
+
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_extend_system_file */
More information about the Ocfs2-commits
mailing list