[Ocfs2-commits] mfasheh commits r1106 - in trunk: . src src/inc
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Tue Jun 15 18:55:35 CDT 2004
Author: mfasheh
Date: 2004-06-15 17:55:33 -0500 (Tue, 15 Jun 2004)
New Revision: 1106
Modified:
trunk/TODO
trunk/src/alloc.c
trunk/src/file.c
trunk/src/inc/ocfs.h
trunk/src/inc/proto.h
trunk/src/inode.c
trunk/src/journal.c
trunk/src/lockres.c
trunk/src/namei.c
trunk/src/nm.c
Log:
* turn ip_io_sem into an rwsem.
* finish locking down disk I/O. I believe I've got all paths now...
* turn inode_extend_sem into an rwsem. This way multiple people can be
doing read/writes to a file at the same time (it was blocking
otherwise). Fix a deadlock that can happen between most other locks
and the inode_extend_sem by taking it ouside of any other lock
context, the only exception being the lockres in process_vote.
* make lockres locking semaphore based instead of our lame recursive
lock. There are no more recursive locks in ocfs.
* increment i_count on an inode when we put it on the recovery list
and iput it when we take it off. This needs some testing...
Modified: trunk/TODO
===================================================================
--- trunk/TODO 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/TODO 2004-06-15 22:55:33 UTC (rev 1106)
@@ -20,16 +20,9 @@
DISK_LOCK_SEQNUM(fe) = changeSeqNum;
code as it is equally useless.
-* get rid of as much of lockres as possible.
-
* make slabs for: ocfs_journal_handle, and ocfs_journal_copyout and maybe
ocfs_journal_lock
-* when we put the inode on the recovery list we should inc i_count, and just
- be sure to iput it when we remove it off that list, and clean up the lists
- during shutdown (before we start doing all our iputs otherwise we'll leak
- those inodes)
-
* fops, iops on bitmap file (and maybe other system files) should probably be
different and / or special cased. In a related todo: Get rid of the
INODE_JOURNAL flag on our inodes and use the system file flag instead.
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/alloc.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -321,7 +321,7 @@
LOG_ERROR_STATUS (status);
goto abort;
}
- down(&OCFS_I(extnode_inode[i])->ip_io_sem);
+ down_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
status = ocfs_acquire_lock (osb,
OCFS_DLM_EXCLUSIVE_LOCK,
@@ -329,7 +329,7 @@
&ugly_hack_bh,
extnode_inode[i]);
if (status < 0) {
- up(&OCFS_I(extnode_inode[i])->ip_io_sem);
+ up_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
extnode_inode[i] = NULL;
if (status != -EINTR)
@@ -350,13 +350,13 @@
LOG_ERROR_STATUS (status);
goto abort;
}
- down(&OCFS_I(vol_inode)->ip_io_sem);
+ down_write(&OCFS_I(vol_inode)->ip_io_sem);
status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
&globalbh, vol_inode);
if (status < 0) {
- up(&OCFS_I(vol_inode)->ip_io_sem);
+ up_write(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
vol_inode = NULL;
@@ -442,7 +442,7 @@
if (extnode_inode) {
for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
if (extnode_inode[i]) {
- up(&OCFS_I(extnode_inode[i])->ip_io_sem);
+ up_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
}
}
@@ -450,7 +450,7 @@
}
if (vol_inode) {
- up(&OCFS_I(vol_inode)->ip_io_sem);
+ up_write(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
}
@@ -2422,7 +2422,9 @@
* decoded and updated in the extent map.
*
*/
-int ocfs_lookup_file_allocation (ocfs_super * osb, __s64 Vbo, __s64 * Lbo, __u32 sectors, u32 *sector_count, struct inode *inode)
+int ocfs_lookup_file_allocation (ocfs_super * osb, __s64 Vbo, __s64 * Lbo,
+ __u32 sectors, u32 *sector_count,
+ struct inode *inode, int locked)
{
int status = -EFAIL;
ocfs_file_entry *fe = NULL;
@@ -2434,28 +2436,76 @@
__s64 localVbo;
__u64 cnt;
__u32 NumIndex;
+ int have_io_sem = 0;
- LOG_ENTRY_ARGS("(vbo=%llu, sectors=%u, inode=%llu)\n", Vbo, sectors, GET_INODE_FEOFF(inode));
+ LOG_ENTRY_ARGS("(vbo=%llu, sectors=%u, inode=%llu)\n", Vbo, sectors,
+ GET_INODE_FEOFF(inode));
OCFS_ASSERT (osb);
OCFS_ASSERT (inode);
- if (INODE_JOURNAL(inode) || Vbo < OCFS_I(inode)->alloc_size) {
- if (ocfs_lookup_extent_map_entry (osb, &(OCFS_I(inode)->map),
- Vbo, Lbo, &cnt, &NumIndex) && cnt >= sectors) {
- status = 0;
- goto finally;
- }
+ /* for direct io we want to skip all locking. If you're a
+ * system file, ip_io_sem should already have been taken
+ * before coming here. */
+ if (((OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)
+ || OCFS_I(inode)->flags & OCFS_INODE_SYSTEM_FILE)
+ && (!locked))
+ printk("ocfs2: inode %lu, locked = %d, open direct = %u, "
+ "sysfile = %u\n", inode->i_ino, locked,
+ (OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO),
+ (OCFS_I(inode)->flags & OCFS_INODE_SYSTEM_FILE));
+
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
+
+check_alloc_sz:
+ status = 0;
+
+ if (Vbo < OCFS_I(inode)->alloc_size)
+ status = ocfs_lookup_extent_map_entry(osb,
+ &(OCFS_I(inode)->map),
+ Vbo, Lbo, &cnt,
+ &NumIndex);
+
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
+
+ if (status && cnt >= sectors) {
+ /* Found a what we were looking for. */
+ status = 0;
+ goto finally;
}
+ /* Ok, we didn't find it in the extent map (or we need to
+ * refresh as alloc sizes don't match up. */
+
+ if (!locked) {
+ /* yay for lock ordering. We must take ip_io_sem
+ * before priv_sem. */
+ down_read(&OCFS_I(inode)->ip_io_sem);
+ down(&OCFS_I(inode)->priv_sem);
+ }
+
+ /* Make sure we still need to hit disk. */
+ if (Vbo >= OCFS_I(inode)->alloc_size) {
+ if (!locked)
+ up_read(&OCFS_I(inode)->ip_io_sem);
+ goto check_alloc_sz;
+ }
+ up(&OCFS_I(inode)->priv_sem);
+
+ if (!locked)
+ have_io_sem = 1;
+
remainingLength = sectors;
localVbo = Vbo;
-
+
/* We are looking for a Vbo, but it is not in the Map or not Valid. */
/* Thus we have to go to the disk, and update the Map */
/* Read the file Entry corresponding to this */
- status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh, OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
+ OCFS_BH_COND_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -2468,7 +2518,7 @@
goto finally;
}
- if (!INODE_JOURNAL(inode) && Vbo >= (__s64) fe->alloc_size) {
+ if (Vbo >= (__s64) fe->alloc_size) {
LOG_ERROR_ARGS ("vbo=%llu, fe->alloc_sz=%llu alloc_size=%llu",
Vbo, fe->alloc_size,
OCFS_I(inode)->alloc_size);
@@ -2477,8 +2527,12 @@
}
if (fe->local_ext) {
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
status = ocfs_update_extent_map (osb, &OCFS_I(inode)->map, fe,
NULL, NULL, LOCAL_EXT);
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -2487,7 +2541,8 @@
/* Extents are branched and we are no longer using
* Local Extents for this File Entry. */
- status = ocfs_get_leaf_extent (osb, fe, localVbo, &ext_bh, inode);
+ status = ocfs_get_leaf_extent(osb, fe, localVbo, &ext_bh,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -2495,8 +2550,16 @@
OcfsExtent = OCFS_BH_GET_DATA_READ(ext_bh);
while (1) {
- status = ocfs_update_extent_map (osb, &OCFS_I(inode)->map, OcfsExtent,
- &localVbo, &remainingLength, NONLOCAL_EXT);
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
+ status = ocfs_update_extent_map (osb,
+ &OCFS_I(inode)->map,
+ OcfsExtent,
+ &localVbo,
+ &remainingLength,
+ NONLOCAL_EXT);
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -2523,7 +2586,8 @@
status = ocfs_read_bh(osb, next_data_ext,
&ext_bh,
- OCFS_BH_COND_CACHED, inode);
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -2539,17 +2603,24 @@
}
}
- if (ocfs_lookup_extent_map_entry (osb, &(OCFS_I(inode)->map), Vbo, Lbo, &cnt, &NumIndex) &&
- cnt >= sectors) {
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
+ if (ocfs_lookup_extent_map_entry (osb, &(OCFS_I(inode)->map), Vbo, Lbo,
+ &cnt, &NumIndex) && cnt >= sectors) {
status = 0;
} else
status = -EFAIL;
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
/* want to return cnt only if asked for it */
if (sector_count)
*sector_count = (u32) cnt;
finally:
+ if (have_io_sem)
+ up_read(&OCFS_I(inode)->ip_io_sem);
+
if (fe_bh) {
if (fe)
OCFS_BH_PUT_DATA(fe_bh);
@@ -2565,7 +2636,6 @@
return (status);
} /* ocfs_lookup_file_allocation */
-
/* ocfs_get_leaf_extent()
* '*data_exent_bh' should be NULL.
*/
@@ -2917,11 +2987,6 @@
/* Allocate a block of size blocksize from the relevant file/bitmap */
OCFS_ASSERT (blockSize);
- if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
- LOG_TRACE_ARGS("Uhoh, asking me to allocate on an unlocked system file! (type = %u, i_ino = %lu)\n", Type, inode->i_ino);
- BUG();
- }
-
status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE, &bh, inode);
if (status < 0) {
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/file.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -165,7 +165,8 @@
int truncate_pages = 0;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
- file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ file->f_dentry->d_name.len,
+ file->f_dentry->d_name.name);
osb = OCFS_SB(inode->i_sb);
@@ -177,7 +178,7 @@
/* kch - for an open request we are already given the
* inode, and therefore we are given the oin too */
- down (&(OCFS_I(inode)->ip_io_sem));
+ down_write (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
have_oin_sem = 1;
@@ -186,14 +187,14 @@
status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto leave;
}
status = ocfs_inode_notify_open(osb, fe_bh, NULL, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
if (status != -EINTR) {
LOG_ERROR_ARGS("Open request made for nonexistent "
@@ -207,7 +208,7 @@
status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto leave;
}
@@ -217,13 +218,13 @@
status = ocfs_verify_update_inode (osb, inode, &truncate_pages,
0);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS (status);
goto leave;
}
}
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
/* yes, hold onto priv_sem. */
if (OCFS_I(inode)->open_hndl_cnt > 0) {
@@ -321,6 +322,7 @@
ocfs_super * osb;
struct dentry *dentry;
int last_close = 0;
+ int dec = 0;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
file->f_dentry->d_name.len, file->f_dentry->d_name.name);
@@ -368,7 +370,9 @@
OCFS_I(inode)->num_extends = 0;
list_del(&OCFS_I(inode)->recovery_list);
INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
- up(&OCFS_I(inode)->inode_extend_sem);
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+
+ dec = 1;
}
spin_unlock(&oin_num_ext_lock);
up(&recovery_list_sem);
@@ -384,6 +388,9 @@
bail:
// ocfs_bh_sem_hash_cleanup_pid(ocfs_getpid());
+ if (dec)
+ iput(inode);
+
LOG_EXIT_INT (0);
return 0;
} /* ocfs_file_release */
@@ -751,12 +758,12 @@
if (OCFS_I(inode)->needs_verification) {
LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
- down (&(OCFS_I(inode)->ip_io_sem));
+ down_read (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
status = ocfs_verify_update_inode (osb, inode, &needs_trunc,
0);
up (&(OCFS_I(inode)->priv_sem));
- up (&(OCFS_I(inode)->ip_io_sem));
+ up_read (&(OCFS_I(inode)->ip_io_sem));
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
if (status < 0) {
@@ -793,9 +800,9 @@
LOG_TRACE_ARGS
("Will need more allocation: have=%llu, need=%llu\n",
OCFS_I(inode)->alloc_size, newsize);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode, NULL);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
LOG_ERROR_STATUS (status);
@@ -882,12 +889,12 @@
if (OCFS_I(inode)->needs_verification) {
/* yay, locking hell! */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_read(&OCFS_I(inode)->ip_io_sem);
down (&(OCFS_I(inode)->priv_sem));
status = ocfs_verify_update_inode (osb, inode, &needs_trunc,
0);
up (&(OCFS_I(inode)->priv_sem));
- up(&OCFS_I(inode)->ip_io_sem);
+ up_read(&OCFS_I(inode)->ip_io_sem);
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
if (status < 0) {
@@ -1331,7 +1338,7 @@
osb = OCFS_SB(inode->i_sb);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
if (!dentry->d_parent || !dentry->d_parent->d_inode) {
LOG_ERROR_STR ("bad inode or root inode");
@@ -1432,7 +1439,7 @@
inode_setattr (inode, attr);
bail:
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
#ifndef BH_SEM_LEAK_CHECKING
if (error < 0)
Modified: trunk/src/inc/ocfs.h
===================================================================
--- trunk/src/inc/ocfs.h 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/inc/ocfs.h 2004-06-15 22:55:33 UTC (rev 1106)
@@ -1084,11 +1084,9 @@
__u32 master_node_num; /* Master Node */
__u32 lock_state;
__u32 lock_holders;
- __u8 in_use;
__u8 lock_type;
- int thread_id; // XXX
atomic_t lr_ref_cnt; /* When 0, freed */ // XXX
- spinlock_t lock_mutex; // XXX
+ struct semaphore lock_mutex;
__u32 readonly_node;
__u64 readonly_map;
__u64 oin_openmap;
@@ -1118,7 +1116,7 @@
/* This protects io on the metadata buffers related to this
* inode. We also consider an "abort_trans" an I/O as it will
* revert the buffer back to a previous state. */
- struct semaphore ip_io_sem;
+ struct rw_semaphore ip_io_sem;
/* Used by the journalling code to attach an inode to a
* handle. These are protected by ip_io_sem in order to lock
@@ -1128,7 +1126,7 @@
struct _ocfs_journal_handle *ip_handle;
/* inode_extend_sem locks out extends on behalf of other nodes. */
- struct semaphore inode_extend_sem;
+ struct rw_semaphore inode_extend_sem;
struct list_head recovery_list; /* protected by recovery_list_sem */
__u32 num_extends; /* protected by oin_num_ext_lock */
Modified: trunk/src/inc/proto.h
===================================================================
--- trunk/src/inc/proto.h 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/inc/proto.h 2004-06-15 22:55:33 UTC (rev 1106)
@@ -63,8 +63,9 @@
struct inode *inode);
int ocfs_load_local_alloc(ocfs_super *osb);
int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
- __s64 *Lbo, __u32 sectors,
- u32 *sector_count, struct inode *inode);
+ __s64 *Lbo, __u32 sectors,
+ u32 *sector_count, struct inode *inode,
+ int locked);
int ocfs_process_bitmap_free_head(ocfs_super *osb,
ocfs_bitmap_free_head *f);
int ocfs_recover_local_alloc(ocfs_super *osb, int node_num);
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/inode.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -314,14 +314,14 @@
i->flags = 0;
atomic_set(&i->i_clean_buffer_seq, 0);
init_MUTEX(&(i->priv_sem));
- init_MUTEX(&(i->inode_extend_sem));
+ init_rwsem(&(i->inode_extend_sem));
i->open_hndl_cnt = 0;
ocfs_extent_map_init (&i->map);
INIT_LIST_HEAD(&i->recovery_list);
INIT_LIST_HEAD(&i->ip_handle_list);
i->ip_handle = NULL;
- init_MUTEX(&i->ip_io_sem);
+ init_rwsem(&i->ip_io_sem);
/* These should be set in read_inode2. */
i->alloc_size = 0ULL;
@@ -717,12 +717,12 @@
/* take ip_io_sem on the inode, only to avoid a warning in
* acquire_lockres. We can get rid of it when we get rid of
* acquire_lockres */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
if (S_ISDIR(inode->i_mode))
lock_flags |= FLAG_DIR;
status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK, lock_flags,
&fe_bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
/* EBUSY here is assumed to mean that other nodes are
* still using the inode. We're done here though, so
@@ -788,10 +788,10 @@
ocfs_abort_trans(handle);
if (release_disk_lock) {
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_release_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
lock_flags, fe_bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0)
LOG_ERROR_STATUS(status);
}
@@ -1062,7 +1062,7 @@
}
// do we need extend sem? no extend dlm message for dirs
- tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL, inode);
+ tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL, inode,1);
if (tmperr < 0)
goto fail;
@@ -1119,13 +1119,19 @@
__s64 vbo = 0;
__s64 lbo = 0;
__u32 len;
- int oin_locked = 0;
+ int open_direct;
LOG_ENTRY_ARGS ("(0x%p, %llu, 0x%p, %d)\n", inode,
(unsigned long long)iblock, bh_result, create);
+ open_direct = OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO;
+
if (S_ISLNK (inode->i_mode)) {
- err = ocfs_symlink_get_block (inode, iblock, bh_result, create);
+ /* this always does I/O for some reason. */
+ down_read(&OCFS_I(inode)->ip_io_sem);
+ err = ocfs_symlink_get_block (inode, iblock, bh_result,
+ create);
+ up_read(&OCFS_I(inode)->ip_io_sem);
goto bail;
}
@@ -1139,23 +1145,27 @@
if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->alloc_size) {
LOG_TRACE_STR("Extending allocation");
- err = ocfs_extend_file(osb, vbo + osb->sect_size, GET_INODE_FEOFF(inode), NULL, inode, NULL);
+ LOG_ERROR_ARGS("extending inode %lu in get_block!!\n",
+ inode->i_ino);
+ down_write(&OCFS_I(inode)->ip_io_sem);
+ err = ocfs_extend_file(osb, vbo + osb->sect_size,
+ GET_INODE_FEOFF(inode), NULL, inode,
+ NULL);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (err < 0) {
err = -ENOSPC;
LOG_ERROR_STATUS (err);
goto bail;
}
}
-
- if (!(OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
- down(&(OCFS_I(inode)->priv_sem));
- oin_locked = 1;
- }
len = 1;
- down(&OCFS_I(inode)->inode_extend_sem);
- err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL, inode);
- up(&OCFS_I(inode)->inode_extend_sem);
+ if (!open_direct)
+ down_read(&OCFS_I(inode)->inode_extend_sem);
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, open_direct);
+ if (!open_direct)
+ up_read(&OCFS_I(inode)->inode_extend_sem);
if (err < 0) {
LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo, lbo, len);
goto bail;
@@ -1176,10 +1186,6 @@
if (err < 0)
err = -EIO;
- if (oin_locked && !(OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
- up(&(OCFS_I(inode)->priv_sem));
- }
-
LOG_EXIT_INT (err);
return err;
} /* ocfs_get_block */
@@ -1216,7 +1222,8 @@
vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
len = 1;
- err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL, inode);
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, 1);
if (err < 0) {
LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo,
lbo, len);
@@ -1255,7 +1262,8 @@
vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
len = 1;
- err = ocfs_lookup_file_allocation (osb, vbo, &lbo, len, NULL, inode);
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, 1);
if (err < 0) {
LOG_ERROR_STATUS (err);
err = -1;
@@ -1404,7 +1412,7 @@
* our logical offset */
/* TODO: Try our damndest to give sizes in multiples of PAGE_SIZE */
status = ocfs_lookup_file_allocation(osb, vbo, &lbo, max_blocks,
- &new_size, inode);
+ &new_size, inode, 1);
/* Do whatever we need to the buffer_head */
if (set_new) {
@@ -1884,7 +1892,7 @@
osb = OCFS_SB(inode->i_sb);
- down (&(OCFS_I(inode)->ip_io_sem));
+ down_read (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
if (INODE_DELETED(inode)) {
@@ -1913,7 +1921,7 @@
bail:
up (&(OCFS_I(inode)->priv_sem));
- up (&(OCFS_I(inode)->ip_io_sem));
+ up_read (&(OCFS_I(inode)->ip_io_sem));
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/journal.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -196,7 +196,8 @@
atomic_inc(&inode->i_count);
- down(&OCFS_I(inode)->ip_io_sem);
+ /* we're obviously changing it... */
+ down_write(&OCFS_I(inode)->ip_io_sem);
/* sanity check */
if (OCFS_I(inode)->ip_handle)
@@ -223,7 +224,7 @@
list_del(&OCFS_I(inode)->ip_handle_list);
INIT_LIST_HEAD(&OCFS_I(inode)->ip_handle_list);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
iput(inode);
}
return;
@@ -255,7 +256,7 @@
/* The cache list holds unlocked inodes */
if (action == TRANS_CACHE || lock->req_io_sem)
- down(&OCFS_I(lock->inode)->ip_io_sem);
+ down_write(&OCFS_I(lock->inode)->ip_io_sem);
/* The file may have been deleted before we got to
* this lock release. If so, just skip it. */
@@ -277,7 +278,7 @@
}
if (action == TRANS_CACHE || lock->req_io_sem)
- up(&OCFS_I(lock->inode)->ip_io_sem);
+ up_write(&OCFS_I(lock->inode)->ip_io_sem);
if (lock->bh != NULL)
brelse(lock->bh);
@@ -882,7 +883,7 @@
goto done;
}
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
SET_INODE_JOURNAL(inode);
@@ -891,7 +892,7 @@
status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE, &bh, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status != -EINTR)
LOG_ERROR_STR("Could not get lock on journal!");
@@ -910,7 +911,7 @@
if (status < 0) {
OCFS_BH_PUT_DATA(bh);
fe = NULL;
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
goto done;
}
@@ -934,7 +935,7 @@
DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
&bh, inode, NULL);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto done;
@@ -943,7 +944,7 @@
LOG_TRACE_ARGS("inode->alloc_size = %llu\n",
OCFS_I(inode)->alloc_size);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
/* call the kernels journal init function now */
k_journal = journal_init_inode(inode);
@@ -1047,12 +1048,12 @@
OCFS_I(inode)->open_hndl_cnt--;
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
/* unlock our journal */
status = ocfs_release_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
journal->lockbh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0)
LOG_ERROR_STATUS (status);
@@ -1161,7 +1162,7 @@
LOG_TRACE_ARGS("Force reading %u blocks\n", totalblks);
status = ocfs_lookup_file_allocation(osb, vbo, &lbo, size, NULL,
- inode);
+ inode, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1335,7 +1336,7 @@
goto done;
}
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
SET_INODE_JOURNAL(inode);
@@ -1348,7 +1349,7 @@
FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
&bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_TRACE_ARGS("status returned from acquire_lock=%d\n",
status);
@@ -1377,12 +1378,12 @@
OCFS_I(inode)->alloc_size = alloc_size;
/* add this node to openmap and update disk lock */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_update_disk_lock (osb,
DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
&bh, inode, NULL);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto done;
@@ -1449,12 +1450,12 @@
/* drop the lock on this nodes journal */
if (got_lock) {
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_release_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
}
if (inode)
iput(inode);
Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/lockres.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -119,10 +119,6 @@
return status;
} /* ocfs_find_update_res */
-
-#define ocfs_container_of(ptr, type, member) ({ \
- const typeof( ((type *)0)->member ) *__mptr = (ptr); \
- (type *)( (char *)__mptr - offsetof(type,member) );})
/*
* ocfs_acquire_lockres()
*
@@ -131,68 +127,33 @@
*/
int ocfs_acquire_lockres (ocfs_lock_res * lockres, __u32 timeout)
{
- int mypid;
- unsigned long jif = 0;
- int status = 0;
- int cnt = 0;
- struct inode *inode;
- ocfs_inode_private *ip;
+ unsigned long jif;
+ int status;
LOG_ENTRY_ARGS ("(0x%p, %u)\n", lockres, timeout);
OCFS_ASSERT(lockres);
- mypid = ocfs_getpid ();
+ if (!timeout) {
+ down(&lockres->lock_mutex);
+ status = 0;
+ goto bail;
+ }
- if (timeout)
- jif = jiffies + (timeout * HZ / 1000);
+ jif = jiffies + (timeout * HZ / 1000);
- while (1) {
- spin_lock (&lockres->lock_mutex);
+ while(1) {
+ if (!down_trylock(&lockres->lock_mutex)) {
+ status = 0;
+ break;
+ }
- if (lockres->in_use) {
- if (lockres->thread_id != mypid) {
- spin_unlock (&lockres->lock_mutex);
- LOG_ERROR_ARGS ("lockpid=%d, newpid=%d,"
- " timedout\n",
- lockres->thread_id, mypid);
- BUG();
-
- if (jif && jif < jiffies) {
- LOG_TRACE_ARGS ("lockpid=%d, newpid=%d,"
- " timedout\n",
- lockres->thread_id, mypid);
- status = -ETIMEDOUT;
- goto bail;
- }
-
- if (++cnt == 10) {
- LOG_TRACE_ARGS ("lockpid=%d, newpid=%d\n",
- lockres->thread_id, mypid);
- cnt = 0;
- }
- ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- }
- else {
- printk("lockres in_use=%d, pid=%d, mypid=%d\n", lockres->in_use, lockres->thread_id, mypid);
- BUG();
- lockres->in_use++;
- spin_unlock (&lockres->lock_mutex);
- break;
- }
- } else {
- lockres->in_use = 1;
- lockres->thread_id = mypid;
- spin_unlock (&lockres->lock_mutex);
- ip = ocfs_container_of(lockres, ocfs_inode_private, i_lockres);
- inode = ip->inode;
- if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
- LOG_ERROR_ARGS("locking lockres without io_sem! ino = %lu, offset = %llu\n", inode->i_ino, OCFS_I(inode)->feoff);
-
- BUG();
- }
+ if (jif < jiffies) {
+ status = -ETIMEDOUT;
break;
}
+
+ ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
}
bail:
@@ -210,20 +171,7 @@
OCFS_ASSERT(lockres);
- spin_lock (&lockres->lock_mutex);
- if (lockres->in_use == 0) {
- LOG_ERROR_ARGS("Releasing lockres with inuse 0: 0x%p\n", lockres);
- BUG();
- } else {
- if (lockres->thread_id != current->pid)
- LOG_ERROR_ARGS("PID %d is trying to release lockres held by PID %d\n",
- current->pid, lockres->thread_id);
- lockres->in_use--;
- if (lockres->in_use == 0) {
- lockres->thread_id = 0;
- }
- }
- spin_unlock (&lockres->lock_mutex);
+ up(&lockres->lock_mutex);
LOG_EXIT ();
return;
@@ -244,10 +192,9 @@
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
lockres->last_upd_seq_num = 0;
lockres->oin_openmap = 0;
- lockres->in_use = 0;
lockres->lock_state = 0;
- spin_lock_init (&lockres->lock_mutex);
+ init_MUTEX(&lockres->lock_mutex);
atomic_set (&lockres->lr_ref_cnt, 0);
lockres->readonly_map = 0ULL;
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/namei.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -32,8 +32,6 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NAMEI
-extern spinlock_t oin_num_ext_lock;
-
static int inline search_dirblock(struct buffer_head * bh, struct inode *dir,
const char *name, int namelen,
unsigned long offset,
@@ -65,7 +63,7 @@
struct inode *inode, ocfs_file_entry *fe);
static struct dentry_operations ocfs_dentry_ops = {
- .d_revalidate = ocfs_dentry_revalidate // let's test it out!
+ .d_revalidate = ocfs_dentry_revalidate
};
static inline int ocfs_add_entry(ocfs_journal_handle *handle,
@@ -110,11 +108,11 @@
LOG_TRACE_ARGS("about to call find_files_on_disk with inode=%p\n",
dir);
- down(&OCFS_I(dir)->ip_io_sem);
+ down_read(&OCFS_I(dir)->ip_io_sem);
status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
dentry->d_name.len, &fe_off, dir, 1,
&dirent_bh, &dirent);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_read(&OCFS_I(dir)->ip_io_sem);
if (status < 0)
goto bail_add;
@@ -153,7 +151,7 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
dev, dentry->d_name.len, dentry->d_name.name);
- down(&OCFS_I(dir)->ip_io_sem);
+ down_write(&OCFS_I(dir)->ip_io_sem);
/* get our super block */
osb = OCFS_SB(dir->i_sb);
@@ -287,7 +285,7 @@
if ((status < 0) && handle)
ocfs_abort_trans(handle);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_write(&OCFS_I(dir)->ip_io_sem);
if (status == -ENOSPC)
LOG_TRACE_STR ("Disk is full");
@@ -555,22 +553,9 @@
}
status = -EFAIL;
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(inode)->num_extends) {
- LOG_ERROR_ARGS ("Cannot remove a file with = "
- "%u, pending extends (fe_off "
- "= %llu)\n",
- OCFS_I(inode)->num_extends,
- fe_off);
- spin_unlock(&oin_num_ext_lock);
- status = -EBUSY;
- goto bail;
- }
- spin_unlock(&oin_num_ext_lock);
+ down_write(&OCFS_I(dir)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
- down(&OCFS_I(dir)->ip_io_sem);
- down(&OCFS_I(inode)->ip_io_sem);
-
handle = ocfs_start_trans(osb, OCFS_FILE_DELETE_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
@@ -725,8 +710,8 @@
}
- up(&OCFS_I(inode)->ip_io_sem);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(dir)->ip_io_sem);
bail:
if (status < 0 && status != -ENOTEMPTY &&
status != -EPERM && status != -EBUSY && status != -EINTR) {
@@ -850,28 +835,27 @@
return(status);
} /* ocfs_double_lock */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static inline void double_down(struct semaphore *s1, struct semaphore *s2)
+static inline void double_down_write(struct rw_semaphore *s1,
+ struct rw_semaphore *s2)
{
if (s1 != s2) {
if ((unsigned long) s1 < (unsigned long) s2) {
- struct semaphore *tmp = s2;
+ struct rw_semaphore *tmp = s2;
s2 = s1; s1 = tmp;
}
- down(s1);
+ down_write(s1);
}
- down(s2);
+ down_write(s2);
}
-static inline void double_up(struct semaphore *s1, struct semaphore *s2)
+static inline void double_up_write(struct rw_semaphore *s1,
+ struct rw_semaphore *s2)
{
- up(s1);
+ up_write(s1);
if (s1 != s2)
- up(s2);
+ up_write(s2);
}
-#endif
-
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) ((char *) buffer + \
le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->rec_len)))->inode
@@ -915,13 +899,14 @@
oldfe_lockid = GET_INODE_FEOFF(old_inode);
- double_down(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
- down(&OCFS_I(old_inode)->ip_io_sem);
+ double_down_write(&OCFS_I(old_dir)->ip_io_sem,
+ &OCFS_I(new_dir)->ip_io_sem);
+ down_write(&OCFS_I(old_inode)->ip_io_sem);
if (new_inode) {
if (ocfs_inc_icount(new_inode) < 0)
BUG();
- down(&OCFS_I(new_inode)->ip_io_sem);
+ down_write(&OCFS_I(new_inode)->ip_io_sem);
}
if (atomic_read (&old_dentry->d_count) > 2) {
@@ -943,13 +928,6 @@
status = -EBUSY;
goto bail;
}
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(old_inode)->num_extends) {
- spin_unlock(&oin_num_ext_lock);
- status = -EBUSY;
- goto bail;
- }
- spin_unlock(&oin_num_ext_lock);
/* start our transaction */
handle = ocfs_start_trans(osb, OCFS_FILE_RENAME_CREDITS);
@@ -1220,11 +1198,12 @@
newfe_flags, NULL, new_inode);
}
- double_up(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
- up(&OCFS_I(old_inode)->ip_io_sem);
+ double_up_write(&OCFS_I(old_dir)->ip_io_sem,
+ &OCFS_I(new_dir)->ip_io_sem);
+ up_write(&OCFS_I(old_inode)->ip_io_sem);
if (new_inode) {
- up(&OCFS_I(new_inode)->ip_io_sem);
+ up_write(&OCFS_I(new_inode)->ip_io_sem);
iput(new_inode);
}
@@ -1283,7 +1262,7 @@
sb = dir->i_sb;
osb = OCFS_SB(sb);
- down(&OCFS_I(dir)->ip_io_sem);
+ down_write(&OCFS_I(dir)->ip_io_sem);
inode = new_inode (sb);
if (IS_ERR (inode)) {
@@ -1299,7 +1278,7 @@
inode = NULL;
goto bail;
}
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
l = strlen (symname) + 1;
newsize = l - 1;
@@ -1391,8 +1370,8 @@
bail:
if (inode)
- up(&OCFS_I(inode)->ip_io_sem);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(dir)->ip_io_sem);
if (new_fe_bh) {
if (fe)
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-06-15 22:29:45 UTC (rev 1105)
+++ trunk/src/nm.c 2004-06-15 22:55:33 UTC (rev 1106)
@@ -51,6 +51,10 @@
int yield;
} ocfs_ro_cache_drop_ctxt;
+static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num);
+static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num);
void ocfs_process_vote_worker(void *val);
@@ -576,7 +580,112 @@
return vote_type;
}
+static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num)
+{
+ down(&OCFS_I(inode)->priv_sem);
+
+ /* if we don't have it open, then don't bother with this. */
+ if (!OCFS_I(inode)->open_hndl_cnt) {
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+ }
+
+ spin_lock(&oin_num_ext_lock);
+
+ if (OCFS_I(inode)->num_extends < 0)
+ BUG();
+
+ /* this isn't the 1st extend against the inode, so just inc
+ * the counter. */
+ if (OCFS_I(inode)->num_extends > 0) {
+ OCFS_I(inode)->num_extends++;
+ spin_unlock(&oin_num_ext_lock);
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+ }
+
+ /* ok, we're going to have to take the extend sem. We can't do
+ * this holding priv_sem so we drop it and recheck after we've
+ * got it. */
+ spin_unlock(&oin_num_ext_lock);
+ up(&OCFS_I(inode)->priv_sem);
+
+ /* take the extend_sem on behalf of
+ * this other node. It won't be
+ * released until he does his last
+ * release broadcast. This has the
+ * effect of locking out
+ * lookup_file_allocation on this
+ * inode. */
+ down_write(&OCFS_I(inode)->inode_extend_sem);
+
+ down(&OCFS_I(inode)->priv_sem);
+ if (!OCFS_I(inode)->open_hndl_cnt) {
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+ }
+
+ atomic_inc(&inode->i_count);
+
+ /* Ok, we've still got it open. Put this guy on the recovery
+ * list in case the extending node dies. */
+ down(&recovery_list_sem);
+ spin_lock(&oin_num_ext_lock);
+ OCFS_I(inode)->num_extends++;
+ list_add_tail(&OCFS_I(inode)->recovery_list,
+ &osb->lock_recovery_lists[node_num]);
+ spin_unlock(&oin_num_ext_lock);
+ up(&recovery_list_sem);
+
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+}
+
+static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num)
+{
+ int dec = 0;
+
+ down(&OCFS_I(inode)->priv_sem);
+
+ /* if we no longer have it open, then the close path has dealt
+ * with this. */
+ if (!OCFS_I(inode)->open_hndl_cnt)
+ goto done;
+
+ down(&recovery_list_sem);
+ spin_lock(&oin_num_ext_lock);
+
+ OCFS_I(inode)->num_extends--;
+
+ if (OCFS_I(inode)->num_extends < 0)
+ BUG();
+
+ if (!OCFS_I(inode)->num_extends) {
+ list_del(&OCFS_I(inode)->recovery_list);
+ INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
+
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+
+ dec = 1;
+ }
+
+ spin_unlock(&oin_num_ext_lock);
+ up(&recovery_list_sem);
+
+done:
+ up(&OCFS_I(inode)->priv_sem);
+
+ /* we want iputs to happen outside of as many locks as possible. */
+ if (dec)
+ iput(inode);
+
+ return;
+}
+
/* Search the journals committed transactions list for a given
* inode. If it's in there, return true, zero otherwise and -1 on
* error. Must hold the journal->commit_sem before going here! */
@@ -703,7 +812,7 @@
* Please see the note in ocfs_delete_inode. */
osb->voting_ino = inode->i_ino;
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
have_io_sem = 1;
lockres = GET_INODE_LOCKRES(inode);
@@ -737,15 +846,17 @@
printk("Invalid request! flags = 0x%x\n", flags);
#endif
- /* get_process_vote_action will only allow CHANGE_MASTER, RELEASE_CACHE, and
- * ADD_OIN_MAP on a CACHE lock held by this node. the CHANGE_MASTER/RELEASE_CACHE
- * path needs to check the readonly map to see if any nodes need to be updated. this
- * is not necessary for the ADD_OIN_MAP path since it cannot actually modify any
- * data or metadata under the lock.
+ /* get_process_vote_action will only allow CHANGE_MASTER,
+ * RELEASE_CACHE, and ADD_OIN_MAP on a CACHE lock held by this
+ * node. the CHANGE_MASTER/RELEASE_CACHE path needs to check
+ * the readonly map to see if any nodes need to be updated.
+ * this is not necessary for the ADD_OIN_MAP path since it
+ * cannot actually modify any data or metadata under the lock.
*/
if (disk_vote) {
- /* Zero out the vote for everybody, if any already set and hung */
+ /* Zero out the vote for everybody, if any already set
+ * and hung */
vote = OCFS_BH_GET_DATA_WRITE(vote_bh);
for (i = 0; i < num_nodes; i++)
vote->vote[i] = 0;
@@ -1129,6 +1240,9 @@
break;
}
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 0;
+
if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) &&
((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
(flags & FLAG_RELEASE_LOCK))) {
@@ -1137,57 +1251,14 @@
"extend" : "truncate", flags & FLAG_RELEASE_LOCK ?
"release" : "acquire", inode, node_num);
- down(&OCFS_I(inode)->priv_sem);
- if (OCFS_I(inode)->open_hndl_cnt
- && (flags & FLAG_ACQUIRE_LOCK)) {
- spin_lock(&oin_num_ext_lock);
-
- if (OCFS_I(inode)->num_extends < 0)
- BUG();
-
- if (OCFS_I(inode)->num_extends > 0) {
- OCFS_I(inode)->num_extends++;
- spin_unlock(&oin_num_ext_lock);
- } else {
- spin_unlock(&oin_num_ext_lock);
-
- /* take the extend_sem on behalf of
- * this other node. It won't be
- * released until he does his last
- * release broadcast. This has the
- * effect of locking out
- * lookup_file_allocation on this
- * inode. */
- down(&OCFS_I(inode)->inode_extend_sem);
-
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
- OCFS_I(inode)->num_extends++;
- list_add_tail(&OCFS_I(inode)->recovery_list, &osb->lock_recovery_lists[node_num]);
- spin_unlock(&oin_num_ext_lock);
- up(&recovery_list_sem);
- }
- } else if (OCFS_I(inode)->open_hndl_cnt
- && (flags & FLAG_RELEASE_LOCK)) {
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
-
- OCFS_I(inode)->num_extends--;
-
- if (OCFS_I(inode)->num_extends < 0)
- BUG();
-
- if (!OCFS_I(inode)->num_extends) {
- list_del(&OCFS_I(inode)->recovery_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
-
- up(&OCFS_I(inode)->inode_extend_sem);
- }
- spin_unlock(&oin_num_ext_lock);
-
- up(&recovery_list_sem);
+ if (flags & FLAG_ACQUIRE_LOCK)
+ ocfs_mark_inode_for_extend(osb, inode, node_num);
+ else if (flags & FLAG_RELEASE_LOCK)
+ ocfs_clear_inode_for_extend(osb, inode, node_num);
+ else {
+ printk("uhoh, bad vote flags! 0x%x\n", flags);
+ BUG();
}
- up(&OCFS_I(inode)->priv_sem);
}
if (disk_vote) {
@@ -1223,16 +1294,15 @@
}
}
- if (lockres) {
+ if (lockres)
ocfs_release_lockres (lockres); // ocfs_process_vote
- }
leave:
if (inode) {
if (inc_inode_seq)
ocfs_inc_inode_seq(osb, inode, 1);
if (have_io_sem)
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
}
if (inode)
@@ -1290,6 +1360,7 @@
LOG_ENTRY_ARGS("(node_num = %u)\n", node_num);
+start:
down(&recovery_list_sem);
list_for_each_safe (iter, temp, &osb->lock_recovery_lists[node_num]) {
i = list_entry (iter, ocfs_inode_private, recovery_list);
@@ -1301,7 +1372,12 @@
OCFS_I(inode)->num_extends = 0;
list_del(&OCFS_I(inode)->recovery_list);
INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
- up(&OCFS_I(inode)->inode_extend_sem);
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+
+ spin_unlock(&oin_num_ext_lock);
+ up (&recovery_list_sem);
+ iput(inode);
+ goto start;
} else
LOG_ERROR_STR("oin is in recovery list, but has zero extend counter value!");
@@ -1362,7 +1438,7 @@
if (yield) {
/* this will wait until process_vote gets to the release */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
}
@@ -1397,9 +1473,9 @@
if (yield) {
/* from nm thread, give some time to waiters */
ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
}
continue;
@@ -1418,7 +1494,7 @@
leave:
if (yield) {
ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
}
if (inode)
More information about the Ocfs2-commits
mailing list