[Ocfs2-commits]
jlbec commits r1110 - in branches/format-changes: . src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Wed Jun 16 15:08:42 CDT 2004
Author: jlbec
Date: 2004-06-16 14:08:40 -0500 (Wed, 16 Jun 2004)
New Revision: 1110
Modified:
branches/format-changes/TODO
branches/format-changes/src/alloc.c
branches/format-changes/src/alloc.h
branches/format-changes/src/dlm.c
branches/format-changes/src/file.c
branches/format-changes/src/inode.c
branches/format-changes/src/journal.c
branches/format-changes/src/lockres.c
branches/format-changes/src/namei.c
branches/format-changes/src/nm.c
branches/format-changes/src/ocfs.h
branches/format-changes/src/super.c
branches/format-changes/src/sysfile.c
Log:
o Merge 1064:1106 from trunk:
- [1105] Communicate lock release for extend/trunc to other
nodes.
- [1106] Turn ip_io_sem into an rwsem.
- [1106] Turn inode_extend_sem into an rwsem.
- [1106] Remove lockres recursive lock (no recursive locks!).
- [1106] Hold i_count on inode when on the recovery list.
Modified: branches/format-changes/TODO
===================================================================
--- branches/format-changes/TODO 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/TODO 2004-06-16 19:08:40 UTC (rev 1110)
@@ -14,16 +14,9 @@
* Make bitmap free functions do their job without relocking the bitmaps for
each record.
-* get rid of as much of lockres as possible.
-
* make slabs for: ocfs_journal_handle, and ocfs_journal_copyout and maybe
ocfs_journal_lock
-* when we put the inode on the recovery list we should inc i_count, and just
- be sure to iput it when we remove it off that list, and clean up the lists
- during shutdown (before we start doing all our iputs otherwise we'll leak
- those inodes)
-
* fops, iops on bitmap file (and maybe other system files) should probably be
different and / or special cased. In a related todo: Get rid of the
INODE_JOURNAL flag on our inodes and use the system file flag instead.
Modified: branches/format-changes/src/alloc.c
===================================================================
--- branches/format-changes/src/alloc.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/alloc.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -250,7 +250,6 @@
struct inode **extnode_inode = NULL;
struct inode *vol_inode = NULL;
__u32 tmp_indx;
- __u64 lock_id;
struct buffer_head *globalbh = NULL;
ocfs_journal_handle *handle = NULL;
int credits = 33; /* one for each potential sysfile fe. This
@@ -358,8 +357,7 @@
LOG_ERROR_STATUS (status);
goto abort;
}
- lock_id = GET_INODE_FEOFF(extnode_inode[i]);
- down(&OCFS_I(extnode_inode[i])->ip_io_sem);
+ down_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
status = ocfs_acquire_lock (osb,
OCFS_DLM_EXCLUSIVE_LOCK,
@@ -367,7 +365,7 @@
&ugly_hack_bh,
extnode_inode[i]);
if (status < 0) {
- up(&OCFS_I(extnode_inode[i])->ip_io_sem);
+ up_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
extnode_inode[i] = NULL;
if (status != -EINTR)
@@ -388,13 +386,13 @@
LOG_ERROR_STATUS (status);
goto abort;
}
- down(&OCFS_I(vol_inode)->ip_io_sem);
+ down_write(&OCFS_I(vol_inode)->ip_io_sem);
status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
&globalbh, vol_inode);
if (status < 0) {
- up(&OCFS_I(vol_inode)->ip_io_sem);
+ up_write(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
vol_inode = NULL;
@@ -408,15 +406,16 @@
/* free vol block */
if (free_vol_bits != NULL)
- ocfs_free_vol_block (osb, handle, free_vol_bits, -1,
- DISK_ALLOC_VOLUME);
+ ocfs_free_vol_block(osb, handle, free_vol_bits, -1,
+ DISK_ALLOC_VOLUME);
/* We can potentiallly loose some allocation for dirNodes or extent */
/* nodes but they should not be much... */
for (i = 0; i < osb->max_nodes; i++) {
if (free_ext_node[i] != NULL)
- ocfs_free_vol_block (osb, handle, free_ext_node[i], i,
- DISK_ALLOC_EXTENT_NODE);
+ ocfs_free_vol_block(osb, handle,
+ free_ext_node[i], i,
+ DISK_ALLOC_EXTENT_NODE);
}
/* release all locks */
@@ -479,7 +478,7 @@
if (extnode_inode) {
for (i = 0; i < osb->max_nodes; i++) {
if (extnode_inode[i]) {
- up(&OCFS_I(extnode_inode[i])->ip_io_sem);
+ up_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
}
}
@@ -487,7 +486,7 @@
}
if (vol_inode) {
- up(&OCFS_I(vol_inode)->ip_io_sem);
+ up_write(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
}
@@ -547,7 +546,10 @@
* ocfs_free_vol_block()
*
*/
-static int ocfs_free_vol_block (ocfs_super * osb, ocfs_journal_handle *handle, ocfs_free_rec * FreeLog, __u32 NodeNum, __u32 Type)
+static int ocfs_free_vol_block(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs_free_rec *FreeLog, __u32 NodeNum,
+ __u32 Type)
{
int status = 0;
__u64 fileSize = 0;
@@ -611,7 +613,8 @@
bitmapblocks = ocfs_blocks_for_bits(osb->sb,
tmpbitmap->validbits);
- status = ocfs_read_system_file(osb, file_type, NodeNum, AllocBitmap.chunk,
+ status = ocfs_read_system_file(osb, file_type, NodeNum,
+ AllocBitmap.chunk,
bitmapblocks << osb->sb->s_blocksize_bits);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -2611,8 +2614,9 @@
*
*/
int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
- __s64 *Lbo, __u32 sectors,
- __s64 *contig_bytes, struct inode *inode)
+ __s64 *Lbo, __u32 blocks,
+ __s64 *contig_bytes,
+ struct inode *inode, int locked)
{
int status = -EFAIL;
ocfs2_dinode *fe = NULL;
@@ -2624,31 +2628,78 @@
__s64 localVbo;
__u64 cnt;
__u32 NumIndex;
+ int have_io_sem = 0;
- LOG_ENTRY_ARGS("(vbo=%llu, sectors=%u, inode=%llu)\n", Vbo, sectors, GET_INODE_FEOFF(inode));
+ LOG_ENTRY_ARGS("(vbo=%llu, blocks=%u, inode=%llu)\n", Vbo, blocks,
+ GET_INODE_FEOFF(inode));
OCFS_ASSERT (osb);
OCFS_ASSERT (inode);
- if (INODE_JOURNAL(inode) || Vbo < OCFS_I(inode)->alloc_size) {
- if (ocfs_lookup_extent_map_entry(osb,
- &(OCFS_I(inode)->map),
- Vbo, Lbo, &cnt,
- &NumIndex) &&
- (cnt >= (__u64)sectors << osb->sb->s_blocksize_bits)) {
- status = 0;
- goto success;
- }
+ /* for direct io we want to skip all locking. If you're a
+ * system file, ip_io_sem should already have been taken
+ * before coming here. */
+ if (((OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)
+ || OCFS_I(inode)->flags & OCFS_INODE_SYSTEM_FILE)
+ && (!locked))
+ printk("ocfs2: inode %lu, locked = %d, open direct = %u, "
+ "sysfile = %u\n", inode->i_ino, locked,
+ (OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO),
+ (OCFS_I(inode)->flags & OCFS_INODE_SYSTEM_FILE));
+
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
+
+check_alloc_sz:
+ status = 0;
+
+ if (Vbo < OCFS_I(inode)->alloc_size)
+ status = ocfs_lookup_extent_map_entry(osb,
+ &(OCFS_I(inode)->map),
+ Vbo, Lbo, &cnt,
+ &NumIndex);
+
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
+
+ if (status &&
+ (cnt >= (u64)blocks << osb->sb->s_blocksize_bits)) {
+ /* Found a what we were looking for. */
+ status = 0;
+ goto success;
}
- remainingLength = sectors << osb->sb->s_blocksize_bits;
+ /* Ok, we didn't find it in the extent map (or we need to
+ * refresh as alloc sizes don't match up. */
+
+ if (!locked) {
+ /* yay for lock ordering. We must take ip_io_sem
+ * before priv_sem. */
+ down_read(&OCFS_I(inode)->ip_io_sem);
+ down(&OCFS_I(inode)->priv_sem);
+ }
+
+ /* Make sure we still need to hit disk. */
+ if (Vbo >= OCFS_I(inode)->alloc_size) {
+ if (!locked)
+ up_read(&OCFS_I(inode)->ip_io_sem);
+ goto check_alloc_sz;
+ }
+ up(&OCFS_I(inode)->priv_sem);
+
+ if (!locked)
+ have_io_sem = 1;
+
+ remainingLength = blocks << osb->sb->s_blocksize_bits;
localVbo = Vbo;
-
- /* We are looking for a Vbo, but it is not in the Map or not Valid. */
- /* Thus we have to go to the disk, and update the Map */
+ /*
+ * We are looking for a Vbo, but it is not in the Map or not
+ * Valid. Thus we have to go to the disk, and update the Map
+ */
+
/* Read the file Entry corresponding to this */
- status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
+ status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
OCFS_BH_COND_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -2662,8 +2713,7 @@
goto finally;
}
- if (!INODE_JOURNAL(inode) &&
- (Vbo >= (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits))) {
+ if (Vbo >= (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) {
LOG_ERROR_ARGS ("vbo=%llu, fe->i_clusters=%u alloc_size=%llu",
Vbo, fe->i_clusters,
OCFS_I(inode)->alloc_size);
@@ -2672,9 +2722,13 @@
}
if (fe->id2.i_list.l_tree_depth < 0) {
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
status = ocfs_update_extent_map(osb,
&OCFS_I(inode)->map, fe,
NULL, NULL, LOCAL_EXT);
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -2692,11 +2746,15 @@
eb = OCFS_BH_GET_DATA_READ(eb_bh);
while (1) {
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
status = ocfs_update_extent_map(osb,
&OCFS_I(inode)->map,
eb, &localVbo,
&remainingLength,
NONLOCAL_EXT);
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -2724,7 +2782,8 @@
status = ocfs_read_bh(osb,
next_leaf << osb->sb->s_blocksize_bits,
&eb_bh,
- OCFS_BH_COND_CACHED, inode);
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -2741,13 +2800,18 @@
}
}
- if (ocfs_lookup_extent_map_entry (osb, &(OCFS_I(inode)->map), Vbo, Lbo, &cnt, &NumIndex) &&
- cnt >= sectors) {
+ if (!locked)
+ down(&(OCFS_I(inode)->priv_sem));
+ if (ocfs_lookup_extent_map_entry(osb, &(OCFS_I(inode)->map),
+ Vbo, Lbo, &cnt, &NumIndex) &&
+ (cnt >= (u64)blocks << osb->sb->s_blocksize_bits)) {
status = 0;
} else {
status = -EFAIL;
printk("failed to lookup extmap... vbo=%llu mapcount=%d\n", Vbo, OCFS_I(inode)->map.count);
}
+ if (!locked)
+ up(&(OCFS_I(inode)->priv_sem));
success:
/* want to return cnt only if asked for it */
@@ -2757,6 +2821,9 @@
LOG_TRACE_ARGS("returning contig_bytes=%lld, lbo=%lld\n", cnt, *Lbo);
finally:
+ if (have_io_sem)
+ up_read(&OCFS_I(inode)->ip_io_sem);
+
if (fe_bh) {
if (fe)
OCFS_BH_PUT_DATA(fe_bh);
@@ -2772,7 +2839,6 @@
return (status);
} /* ocfs_lookup_file_allocation */
-
/* ocfs_get_leaf_extent()
* '*data_exent_bh' should be NULL.
*/
@@ -3085,7 +3151,10 @@
*
* You need to be holding node_alloc_sem!
*/
-int ocfs_alloc_node_block (ocfs_super * osb, __u64 bytes_wanted, __u64 * DiskOffset, __u64 * file_off, __u32 NodeNum, __u32 Type, ocfs_journal_handle *handle)
+int ocfs_alloc_node_block(ocfs_super *osb, __u64 bytes_wanted,
+ __u64 *DiskOffset, __u64 *file_off,
+ __u32 NodeNum, __u32 Type,
+ ocfs_journal_handle *handle)
{
int status = 0;
int startbh, numblocks;
@@ -3147,11 +3216,6 @@
/* Allocate a block of size blocksize from the relevant file/bitmap */
OCFS_ASSERT (blockSize);
- if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
- LOG_TRACE_ARGS("Uhoh, asking me to allocate on an unlocked system file! (type = %u, i_ino = %lu)\n", Type, inode->i_ino);
- BUG();
- }
-
status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE, &bh, inode);
if (status < 0) {
@@ -3160,8 +3224,8 @@
}
ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK,
- FLAG_FILE_CREATE,
- bh, inode, 1);
+ FLAG_FILE_CREATE,
+ bh, inode, 1);
status = ocfs_ugly_hack(handle, bh);
if (status < 0) {
@@ -3238,9 +3302,10 @@
LOG_TRACE_ARGS("extending the alloc file to %llu\n",
newFileSize + extent);
- status = ocfs_extend_file (osb, newFileSize + extent,
- GET_INODE_FEOFF(alloc_inode),
- handle, alloc_inode, NULL, 1, NULL);
+ status = ocfs_extend_file(osb, newFileSize + extent,
+ GET_INODE_FEOFF(alloc_inode),
+ handle, alloc_inode, NULL, 1,
+ NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -3305,7 +3370,8 @@
foundBit * blockSize, Type, blockSize, foundBit, alloc_file);
status = ocfs_lookup_file_allocation(osb, foundBit * blockSize,
- DiskOffset, 1, NULL, alloc_inode);
+ DiskOffset, 1, NULL,
+ alloc_inode, 1);
if (status < 0 || *DiskOffset == 0) {
if (!status)
status = -EINVAL;
Modified: branches/format-changes/src/alloc.h
===================================================================
--- branches/format-changes/src/alloc.h 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/alloc.h 2004-06-16 19:08:40 UTC (rev 1110)
@@ -64,8 +64,10 @@
struct inode *inode);
int ocfs_load_local_alloc(ocfs_super *osb);
int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
- __s64 *Lbo, __u32 sectors,
- __s64 *contig_bytes, struct inode *inode);
+ __s64 *Lbo, __u32 blocks,
+ __s64 *contig_bytes,
+ struct inode *inode,
+ int locked);
int ocfs_process_bitmap_free_head(ocfs_super *osb,
ocfs_bitmap_free_head *f);
int ocfs_recover_local_alloc(ocfs_super *osb, int node_num);
Modified: branches/format-changes/src/dlm.c
===================================================================
--- branches/format-changes/src/dlm.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/dlm.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -1214,7 +1214,8 @@
if ((lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) &&
(lockres->master_node_num == osb->node_num) &&
- !(flags & FLAG_FILE_DELETE)) {
+ !(flags & FLAG_FILE_DELETE) && !(flags & FLAG_FILE_EXTEND)
+ && !(FLAG_FILE_TRUNCATE)) {
status = 0;
goto finally;
}
Modified: branches/format-changes/src/file.c
===================================================================
--- branches/format-changes/src/file.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/file.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -187,7 +187,8 @@
int truncate_pages = 0;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
- file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+ file->f_dentry->d_name.len,
+ file->f_dentry->d_name.name);
osb = OCFS_SB(inode->i_sb);
@@ -199,7 +200,7 @@
/* kch - for an open request we are already given the
* inode, and therefore we are given the oin too */
- down (&(OCFS_I(inode)->ip_io_sem));
+ down_write (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
have_oin_sem = 1;
@@ -208,14 +209,14 @@
status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto leave;
}
status = ocfs_inode_notify_open(osb, fe_bh, NULL, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
if (status != -EINTR) {
LOG_ERROR_ARGS("Open request made for nonexistent "
@@ -229,7 +230,7 @@
status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto leave;
}
@@ -239,13 +240,13 @@
status = ocfs_verify_update_inode (osb, inode, &truncate_pages,
0);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS (status);
goto leave;
}
}
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
/* yes, hold onto priv_sem. */
if (OCFS_I(inode)->open_hndl_cnt > 0) {
@@ -343,6 +344,7 @@
ocfs_super * osb;
struct dentry *dentry;
int last_close = 0;
+ int dec = 0;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
file->f_dentry->d_name.len, file->f_dentry->d_name.name);
@@ -390,7 +392,9 @@
OCFS_I(inode)->num_extends = 0;
list_del(&OCFS_I(inode)->recovery_list);
INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
- up(&OCFS_I(inode)->inode_extend_sem);
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+
+ dec = 1;
}
spin_unlock(&oin_num_ext_lock);
up(&recovery_list_sem);
@@ -406,6 +410,9 @@
bail:
// ocfs_bh_sem_hash_cleanup_pid(current->pid);
+ if (dec)
+ iput(inode);
+
LOG_EXIT_INT (0);
return 0;
} /* ocfs_file_release */
@@ -765,12 +772,12 @@
if (OCFS_I(inode)->needs_verification) {
LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
- down (&(OCFS_I(inode)->ip_io_sem));
+ down_read (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
status = ocfs_verify_update_inode (osb, inode, &needs_trunc,
0);
up (&(OCFS_I(inode)->priv_sem));
- up (&(OCFS_I(inode)->ip_io_sem));
+ up_read (&(OCFS_I(inode)->ip_io_sem));
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
if (status < 0) {
@@ -807,10 +814,10 @@
LOG_TRACE_ARGS
("Will need more allocation: have=%llu, need=%llu\n",
OCFS_I(inode)->alloc_size, newsize);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode,
NULL, 0, NULL);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
LOG_ERROR_STATUS (status);
@@ -897,12 +904,12 @@
if (OCFS_I(inode)->needs_verification) {
/* yay, locking hell! */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_read(&OCFS_I(inode)->ip_io_sem);
down (&(OCFS_I(inode)->priv_sem));
status = ocfs_verify_update_inode (osb, inode, &needs_trunc,
0);
up (&(OCFS_I(inode)->priv_sem));
- up(&OCFS_I(inode)->ip_io_sem);
+ up_read(&OCFS_I(inode)->ip_io_sem);
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
if (status < 0) {
@@ -1081,7 +1088,10 @@
/* ocfs_extend_file()
*
*/
-int ocfs_extend_file (ocfs_super * osb, __u64 file_size, __u64 file_off, ocfs_journal_handle *passed_handle, struct inode *inode, struct iattr *attr, int system_file, struct buffer_head *fe_bh)
+int ocfs_extend_file(ocfs_super *osb, __u64 file_size, __u64 file_off,
+ ocfs_journal_handle *passed_handle,
+ struct inode *inode, struct iattr *attr,
+ int system_file, struct buffer_head *fe_bh)
{
int status = 0;
int tmpstat;
@@ -1404,7 +1414,7 @@
osb = OCFS_SB(inode->i_sb);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
if (!dentry->d_parent || !dentry->d_parent->d_inode) {
LOG_ERROR_STR ("bad inode or root inode");
@@ -1505,7 +1515,7 @@
inode_setattr (inode, attr);
bail:
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
#ifndef BH_SEM_LEAK_CHECKING
if (error < 0)
Modified: branches/format-changes/src/inode.c
===================================================================
--- branches/format-changes/src/inode.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/inode.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -320,14 +320,14 @@
i->flags = 0;
atomic_set(&i->i_clean_buffer_seq, 0);
init_MUTEX(&(i->priv_sem));
- init_MUTEX(&(i->inode_extend_sem));
+ init_rwsem(&(i->inode_extend_sem));
i->open_hndl_cnt = 0;
ocfs_extent_map_init (&i->map);
INIT_LIST_HEAD(&i->recovery_list);
INIT_LIST_HEAD(&i->ip_handle_list);
i->ip_handle = NULL;
- init_MUTEX(&i->ip_io_sem);
+ init_rwsem(&i->ip_io_sem);
/* These should be set in read_inode2. */
i->alloc_size = 0ULL;
@@ -717,12 +717,12 @@
/* take ip_io_sem on the inode, only to avoid a warning in
* acquire_lockres. We can get rid of it when we get rid of
* acquire_lockres */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
if (S_ISDIR(inode->i_mode))
lock_flags |= FLAG_DIR;
status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK, lock_flags,
&fe_bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
/* EBUSY here is assumed to mean that other nodes are
* still using the inode. We're done here though, so
@@ -787,10 +787,10 @@
ocfs_abort_trans(handle);
if (release_disk_lock) {
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_release_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
lock_flags, fe_bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0)
LOG_ERROR_STATUS(status);
}
@@ -1065,8 +1065,13 @@
}
/* do we need extend sem? no extend dlm message for dirs */
+ /*
+ * UGLY: last argument to lookup_file_allocation() (locked) is
+ * forced to '1' here, even though we don't have the lock. This
+ * is to force fast, unlocked operation. Get A Real DLM.
+ */
tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL,
- inode);
+ inode, 1);
if (tmperr < 0)
goto fail;
@@ -1121,13 +1126,19 @@
__s64 vbo = 0;
__s64 lbo = 0;
__u32 len;
- int oin_locked = 0;
+ int open_direct;
LOG_ENTRY_ARGS ("(0x%p, %llu, 0x%p, %d)\n", inode,
(unsigned long long)iblock, bh_result, create);
+ open_direct = OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO;
+
if (S_ISLNK (inode->i_mode)) {
- err = ocfs_symlink_get_block (inode, iblock, bh_result, create);
+ /* this always does I/O for some reason. */
+ down_read(&OCFS_I(inode)->ip_io_sem);
+ err = ocfs_symlink_get_block (inode, iblock, bh_result,
+ create);
+ up_read(&OCFS_I(inode)->ip_io_sem);
goto bail;
}
@@ -1140,26 +1151,33 @@
vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->alloc_size) {
+ int vbo_pad;
+
+ vbo_pad = inode->i_sb->s_blocksize;
+ vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
+
LOG_TRACE_STR("Extending allocation");
- err = ocfs_extend_file(osb, vbo + osb->sb->s_blocksize,
- GET_INODE_FEOFF(inode), NULL, inode,
- NULL, 0, NULL);
+ LOG_ERROR_ARGS("extending inode %lu in get_block!!\n",
+ inode->i_ino);
+ down_write(&OCFS_I(inode)->ip_io_sem);
+ err = ocfs_extend_file(osb, vbo + vbo_pad,
+ GET_INODE_FEOFF(inode),
+ NULL, inode, NULL, 0, NULL);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (err < 0) {
err = -ENOSPC;
LOG_ERROR_STATUS (err);
goto bail;
}
}
-
- if (!(OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
- down(&(OCFS_I(inode)->priv_sem));
- oin_locked = 1;
- }
len = 1;
- down(&OCFS_I(inode)->inode_extend_sem);
- err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL, inode);
- up(&OCFS_I(inode)->inode_extend_sem);
+ if (!open_direct)
+ down_read(&OCFS_I(inode)->inode_extend_sem);
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, open_direct);
+ if (!open_direct)
+ up_read(&OCFS_I(inode)->inode_extend_sem);
if (err < 0) {
LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo, lbo, len);
goto bail;
@@ -1180,10 +1198,6 @@
if (err < 0)
err = -EIO;
- if (oin_locked && !(OCFS_I(inode)->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
- up(&(OCFS_I(inode)->priv_sem));
- }
-
LOG_EXIT_INT (err);
return err;
} /* ocfs_get_block */
@@ -1220,7 +1234,8 @@
vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
len = 1;
- err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL, inode);
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, 1);
if (err < 0) {
LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo,
lbo, len);
@@ -1259,7 +1274,8 @@
vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
len = 1;
- err = ocfs_lookup_file_allocation (osb, vbo, &lbo, len, NULL, inode);
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, 1);
if (err < 0) {
LOG_ERROR_STATUS (err);
err = -1;
@@ -1408,7 +1424,7 @@
* our logical offset */
/* TODO: Try our damndest to give sizes in multiples of PAGE_SIZE */
status = ocfs_lookup_file_allocation(osb, vbo, &lbo, max_blocks,
- &new_size, inode);
+ &new_size, inode, 1);
/* Do whatever we need to the buffer_head */
if (set_new) {
@@ -1890,7 +1906,7 @@
osb = OCFS_SB(inode->i_sb);
- down (&(OCFS_I(inode)->ip_io_sem));
+ down_read (&(OCFS_I(inode)->ip_io_sem));
down (&(OCFS_I(inode)->priv_sem));
if (INODE_DELETED(inode)) {
@@ -1919,7 +1935,7 @@
bail:
up (&(OCFS_I(inode)->priv_sem));
- up (&(OCFS_I(inode)->ip_io_sem));
+ up_read (&(OCFS_I(inode)->ip_io_sem));
if (needs_trunc)
ocfs_truncate_inode_pages(inode, 0);
Modified: branches/format-changes/src/journal.c
===================================================================
--- branches/format-changes/src/journal.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/journal.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -219,7 +219,8 @@
atomic_inc(&inode->i_count);
- down(&OCFS_I(inode)->ip_io_sem);
+ /* we're obviously changing it... */
+ down_write(&OCFS_I(inode)->ip_io_sem);
/* sanity check */
if (OCFS_I(inode)->ip_handle)
@@ -246,7 +247,7 @@
list_del(&OCFS_I(inode)->ip_handle_list);
INIT_LIST_HEAD(&OCFS_I(inode)->ip_handle_list);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
iput(inode);
}
return;
@@ -278,7 +279,7 @@
/* The cache list holds unlocked inodes */
if (action == TRANS_CACHE || lock->req_io_sem)
- down(&OCFS_I(lock->inode)->ip_io_sem);
+ down_write(&OCFS_I(lock->inode)->ip_io_sem);
/* The file may have been deleted before we got to
* this lock release. If so, just skip it. */
@@ -300,7 +301,7 @@
}
if (action == TRANS_CACHE || lock->req_io_sem)
- up(&OCFS_I(lock->inode)->ip_io_sem);
+ up_write(&OCFS_I(lock->inode)->ip_io_sem);
if (lock->bh != NULL)
brelse(lock->bh);
@@ -901,7 +902,7 @@
}
lock_id = GET_INODE_FEOFF(inode);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
SET_INODE_JOURNAL(inode);
@@ -910,7 +911,7 @@
status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE, &bh, inode);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status != -EINTR)
LOG_ERROR_STR("Could not get lock on journal!");
@@ -929,7 +930,7 @@
if (status < 0) {
OCFS_BH_PUT_DATA(bh);
fe = NULL;
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
goto done;
}
@@ -953,7 +954,7 @@
DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
&bh, inode, NULL);
if (status < 0) {
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS(status);
goto done;
@@ -962,7 +963,7 @@
LOG_TRACE_ARGS("inode->alloc_size = %llu\n",
OCFS_I(inode)->alloc_size);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
/* call the kernels journal init function now */
k_journal = journal_init_inode(inode);
@@ -1066,12 +1067,12 @@
OCFS_I(inode)->open_hndl_cnt--;
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
/* unlock our journal */
status = ocfs_release_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE,
journal->lockbh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0)
LOG_ERROR_STATUS (status);
@@ -1180,7 +1181,7 @@
LOG_TRACE_ARGS("Force reading %u blocks\n", totalblks);
status = ocfs_lookup_file_allocation(osb, vbo, &lbo, size, NULL,
- inode);
+ inode, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1351,7 +1352,7 @@
}
lock_id = GET_INODE_FEOFF(inode);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
SET_INODE_JOURNAL(inode);
@@ -1364,7 +1365,7 @@
FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
&bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_TRACE_ARGS("status returned from acquire_lock=%d\n",
status);
@@ -1393,12 +1394,12 @@
OCFS_I(inode)->alloc_size = alloc_size;
/* add this node to openmap and update disk lock */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_update_disk_lock (osb,
DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
&bh, inode, NULL);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto done;
@@ -1465,12 +1466,12 @@
/* drop the lock on this nodes journal */
if (got_lock) {
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs_release_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
bh, inode);
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
}
if (inode)
iput(inode);
Modified: branches/format-changes/src/lockres.c
===================================================================
--- branches/format-changes/src/lockres.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/lockres.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -134,10 +134,6 @@
return status;
} /* ocfs_find_update_res */
-
-#define ocfs_container_of(ptr, type, member) ({ \
- const typeof( ((type *)0)->member ) *__mptr = (ptr); \
- (type *)( (char *)__mptr - offsetof(type,member) );})
/*
* ocfs_acquire_lockres()
*
@@ -146,68 +142,33 @@
*/
int ocfs_acquire_lockres (ocfs_lock_res * lockres, __u32 timeout)
{
- int mypid;
- unsigned long jif = 0;
- int status = 0;
- int cnt = 0;
- struct inode *inode;
- ocfs_inode_private *ip;
+ unsigned long jif;
+ int status;
LOG_ENTRY_ARGS ("(0x%p, %u)\n", lockres, timeout);
OCFS_ASSERT(lockres);
- mypid = current->pid;
+ if (!timeout) {
+ down(&lockres->lock_mutex);
+ status = 0;
+ goto bail;
+ }
- if (timeout)
- jif = jiffies + (timeout * HZ / 1000);
+ jif = jiffies + (timeout * HZ / 1000);
- while (1) {
- spin_lock (&lockres->lock_mutex);
+ while(1) {
+ if (!down_trylock(&lockres->lock_mutex)) {
+ status = 0;
+ break;
+ }
- if (lockres->in_use) {
- if (lockres->thread_id != mypid) {
- spin_unlock (&lockres->lock_mutex);
- LOG_ERROR_ARGS ("lockpid=%d, newpid=%d,"
- " timedout\n",
- lockres->thread_id, mypid);
- BUG();
-
- if (jif && jif < jiffies) {
- LOG_TRACE_ARGS ("lockpid=%d, newpid=%d,"
- " timedout\n",
- lockres->thread_id, mypid);
- status = -ETIMEDOUT;
- goto bail;
- }
-
- if (++cnt == 10) {
- LOG_TRACE_ARGS ("lockpid=%d, newpid=%d\n",
- lockres->thread_id, mypid);
- cnt = 0;
- }
- ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- }
- else {
- printk("lockres in_use=%d, pid=%d, mypid=%d\n", lockres->in_use, lockres->thread_id, mypid);
- BUG();
- lockres->in_use++;
- spin_unlock (&lockres->lock_mutex);
- break;
- }
- } else {
- lockres->in_use = 1;
- lockres->thread_id = mypid;
- spin_unlock (&lockres->lock_mutex);
- ip = ocfs_container_of(lockres, ocfs_inode_private, i_lockres);
- inode = ip->inode;
- if (down_trylock(&OCFS_I(inode)->ip_io_sem) == 0) {
- LOG_ERROR_ARGS("locking lockres without io_sem! ino = %lu, offset = %llu\n", inode->i_ino, OCFS_I(inode)->feoff);
-
- //BUG();
- }
+ if (jif < jiffies) {
+ status = -ETIMEDOUT;
break;
}
+
+ ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
}
bail:
@@ -225,20 +186,7 @@
OCFS_ASSERT(lockres);
- spin_lock (&lockres->lock_mutex);
- if (lockres->in_use == 0) {
- LOG_ERROR_ARGS("Releasing lockres with inuse 0: 0x%p\n", lockres);
- BUG();
- } else {
- if (lockres->thread_id != current->pid)
- LOG_ERROR_ARGS("PID %d is trying to release lockres held by PID %d\n",
- current->pid, lockres->thread_id);
- lockres->in_use--;
- if (lockres->in_use == 0) {
- lockres->thread_id = 0;
- }
- }
- spin_unlock (&lockres->lock_mutex);
+ up(&lockres->lock_mutex);
LOG_EXIT ();
return;
@@ -259,10 +207,9 @@
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
lockres->last_upd_seq_num = 0;
ocfs_node_map_init(osb, &lockres->oin_openmap);
- lockres->in_use = 0;
lockres->lock_state = 0;
- spin_lock_init (&lockres->lock_mutex);
+ init_MUTEX(&lockres->lock_mutex);
atomic_set (&lockres->lr_ref_cnt, 0);
ocfs_node_map_init(osb, &lockres->readonly_map);
Modified: branches/format-changes/src/namei.c
===================================================================
--- branches/format-changes/src/namei.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/namei.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -67,8 +67,6 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NAMEI
-extern spinlock_t oin_num_ext_lock;
-
static int inline search_dirblock(struct buffer_head *bh,
struct inode *dir,
const char *name, int namelen,
@@ -147,11 +145,11 @@
LOG_TRACE_ARGS("about to call find_files_on_disk with inode=%p\n",
dir);
- down(&OCFS_I(dir)->ip_io_sem);
+ down_read(&OCFS_I(dir)->ip_io_sem);
status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
dentry->d_name.len, &fe_off,
dir, 1, &dirent_bh, &dirent);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_read(&OCFS_I(dir)->ip_io_sem);
if (status < 0)
goto bail_add;
@@ -190,7 +188,7 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
dev, dentry->d_name.len, dentry->d_name.name);
- down(&OCFS_I(dir)->ip_io_sem);
+ down_write(&OCFS_I(dir)->ip_io_sem);
/* get our super block */
osb = OCFS_SB(dir->i_sb);
@@ -327,7 +325,7 @@
if ((status < 0) && handle)
ocfs_abort_trans(handle);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_write(&OCFS_I(dir)->ip_io_sem);
if (status == -ENOSPC)
LOG_TRACE_STR ("Disk is full");
@@ -586,22 +584,9 @@
}
status = -EFAIL;
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(inode)->num_extends) {
- LOG_ERROR_ARGS ("Cannot remove a file with = "
- "%u, pending extends (fe_off "
- "= %llu)\n",
- OCFS_I(inode)->num_extends,
- fe_off);
- spin_unlock(&oin_num_ext_lock);
- status = -EBUSY;
- goto bail;
- }
- spin_unlock(&oin_num_ext_lock);
+ down_write(&OCFS_I(dir)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
- down(&OCFS_I(dir)->ip_io_sem);
- down(&OCFS_I(inode)->ip_io_sem);
-
handle = ocfs_start_trans(osb, OCFS_FILE_DELETE_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
@@ -757,8 +742,8 @@
}
- up(&OCFS_I(inode)->ip_io_sem);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(dir)->ip_io_sem);
bail:
if (status < 0 && status != -ENOTEMPTY &&
status != -EPERM && status != -EBUSY && status != -EINTR) {
@@ -882,28 +867,27 @@
return(status);
} /* ocfs_double_lock */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static inline void double_down(struct semaphore *s1, struct semaphore *s2)
+static inline void double_down_write(struct rw_semaphore *s1,
+ struct rw_semaphore *s2)
{
if (s1 != s2) {
if ((unsigned long) s1 < (unsigned long) s2) {
- struct semaphore *tmp = s2;
+ struct rw_semaphore *tmp = s2;
s2 = s1; s1 = tmp;
}
- down(s1);
+ down_write(s1);
}
- down(s2);
+ down_write(s2);
}
-static inline void double_up(struct semaphore *s1, struct semaphore *s2)
+static inline void double_up_write(struct rw_semaphore *s1,
+ struct rw_semaphore *s2)
{
- up(s1);
+ up_write(s1);
if (s1 != s2)
- up(s2);
+ up_write(s2);
}
-#endif
-
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) ((char *) buffer + \
le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->rec_len)))->inode
@@ -945,13 +929,14 @@
oldfe_lockid = GET_INODE_FEOFF(old_inode);
- double_down(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
- down(&OCFS_I(old_inode)->ip_io_sem);
+ double_down_write(&OCFS_I(old_dir)->ip_io_sem,
+ &OCFS_I(new_dir)->ip_io_sem);
+ down_write(&OCFS_I(old_inode)->ip_io_sem);
if (new_inode) {
if (ocfs_inc_icount(new_inode) < 0)
BUG();
- down(&OCFS_I(new_inode)->ip_io_sem);
+ down_write(&OCFS_I(new_inode)->ip_io_sem);
}
if (atomic_read (&old_dentry->d_count) > 2) {
@@ -973,13 +958,6 @@
status = -EBUSY;
goto bail;
}
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(old_inode)->num_extends) {
- spin_unlock(&oin_num_ext_lock);
- status = -EBUSY;
- goto bail;
- }
- spin_unlock(&oin_num_ext_lock);
/* start our transaction */
handle = ocfs_start_trans(osb, OCFS_FILE_RENAME_CREDITS);
@@ -1251,11 +1229,12 @@
newfe_flags, NULL, new_inode);
}
- double_up(&OCFS_I(old_dir)->ip_io_sem, &OCFS_I(new_dir)->ip_io_sem);
- up(&OCFS_I(old_inode)->ip_io_sem);
+ double_up_write(&OCFS_I(old_dir)->ip_io_sem,
+ &OCFS_I(new_dir)->ip_io_sem);
+ up_write(&OCFS_I(old_inode)->ip_io_sem);
if (new_inode) {
- up(&OCFS_I(new_inode)->ip_io_sem);
+ up_write(&OCFS_I(new_inode)->ip_io_sem);
iput(new_inode);
}
@@ -1307,7 +1286,7 @@
sb = dir->i_sb;
osb = OCFS_SB(sb);
- down(&OCFS_I(dir)->ip_io_sem);
+ down_write(&OCFS_I(dir)->ip_io_sem);
inode = new_inode (sb);
if (IS_ERR (inode)) {
@@ -1323,7 +1302,7 @@
inode = NULL;
goto bail;
}
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
l = strlen (symname) + 1;
newsize = l - 1;
@@ -1415,8 +1394,8 @@
bail:
if (inode)
- up(&OCFS_I(inode)->ip_io_sem);
- up(&OCFS_I(dir)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(dir)->ip_io_sem);
if (new_fe_bh) {
if (fe)
Modified: branches/format-changes/src/nm.c
===================================================================
--- branches/format-changes/src/nm.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/nm.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -73,6 +73,10 @@
int yield;
} ocfs_ro_cache_drop_ctxt;
+static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num);
+static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num);
void ocfs_process_vote_worker(void *val);
@@ -582,7 +586,112 @@
return vote_type;
}
+static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num)
+{
+ down(&OCFS_I(inode)->priv_sem);
+
+ /* if we don't have it open, then don't bother with this. */
+ if (!OCFS_I(inode)->open_hndl_cnt) {
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+ }
+
+ spin_lock(&oin_num_ext_lock);
+
+ if (OCFS_I(inode)->num_extends < 0)
+ BUG();
+
+ /* this isn't the 1st extend against the inode, so just inc
+ * the counter. */
+ if (OCFS_I(inode)->num_extends > 0) {
+ OCFS_I(inode)->num_extends++;
+ spin_unlock(&oin_num_ext_lock);
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+ }
+
+ /* ok, we're going to have to take the extend sem. We can't do
+ * this holding priv_sem so we drop it and recheck after we've
+ * got it. */
+ spin_unlock(&oin_num_ext_lock);
+ up(&OCFS_I(inode)->priv_sem);
+
+ /* take the extend_sem on behalf of
+ * this other node. It won't be
+ * released until he does his last
+ * release broadcast. This has the
+ * effect of locking out
+ * lookup_file_allocation on this
+ * inode. */
+ down_write(&OCFS_I(inode)->inode_extend_sem);
+
+ down(&OCFS_I(inode)->priv_sem);
+ if (!OCFS_I(inode)->open_hndl_cnt) {
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+ }
+
+ atomic_inc(&inode->i_count);
+
+ /* Ok, we've still got it open. Put this guy on the recovery
+ * list in case the extending node dies. */
+ down(&recovery_list_sem);
+ spin_lock(&oin_num_ext_lock);
+ OCFS_I(inode)->num_extends++;
+ list_add_tail(&OCFS_I(inode)->recovery_list,
+ &osb->lock_recovery_lists[node_num]);
+ spin_unlock(&oin_num_ext_lock);
+ up(&recovery_list_sem);
+
+ up(&OCFS_I(inode)->priv_sem);
+ return;
+}
+
+static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
+ __u32 node_num)
+{
+ int dec = 0;
+
+ down(&OCFS_I(inode)->priv_sem);
+
+ /* if we no longer have it open, then the close path has dealt
+ * with this. */
+ if (!OCFS_I(inode)->open_hndl_cnt)
+ goto done;
+
+ down(&recovery_list_sem);
+ spin_lock(&oin_num_ext_lock);
+
+ OCFS_I(inode)->num_extends--;
+
+ if (OCFS_I(inode)->num_extends < 0)
+ BUG();
+
+ if (!OCFS_I(inode)->num_extends) {
+ list_del(&OCFS_I(inode)->recovery_list);
+ INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
+
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+
+ dec = 1;
+ }
+
+ spin_unlock(&oin_num_ext_lock);
+ up(&recovery_list_sem);
+
+done:
+ up(&OCFS_I(inode)->priv_sem);
+
+ /* we want iputs to happen outside of as many locks as possible. */
+ if (dec)
+ iput(inode);
+
+ return;
+}
+
/* Search the journals committed transactions list for a given
* inode. If it's in there, return true, zero otherwise and -1 on
* error. Must hold the journal->commit_sem before going here! */
@@ -709,7 +818,7 @@
* Please see the note in ocfs_delete_inode. */
osb->voting_ino = inode->i_ino;
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
have_io_sem = 1;
lockres = GET_INODE_LOCKRES(inode);
@@ -745,15 +854,17 @@
printk("Invalid request! flags = 0x%x\n", flags);
#endif
- /* get_process_vote_action will only allow CHANGE_MASTER, RELEASE_CACHE, and
- * ADD_OIN_MAP on a CACHE lock held by this node. the CHANGE_MASTER/RELEASE_CACHE
- * path needs to check the readonly map to see if any nodes need to be updated. this
- * is not necessary for the ADD_OIN_MAP path since it cannot actually modify any
- * data or metadata under the lock.
+ /* get_process_vote_action will only allow CHANGE_MASTER,
+ * RELEASE_CACHE, and ADD_OIN_MAP on a CACHE lock held by this
+ * node. the CHANGE_MASTER/RELEASE_CACHE path needs to check
+ * the readonly map to see if any nodes need to be updated.
+ * this is not necessary for the ADD_OIN_MAP path since it
+ * cannot actually modify any data or metadata under the lock.
*/
if (disk_vote) {
- /* Zero out the vote, if any already set and hung */
+ /* Zero out the vote for everybody, if any already set
+ * and hung */
vote = OCFS_BH_GET_DATA_WRITE(vote_bh);
vote->node = (__u8)-1;
vote->type = 0;
@@ -1142,6 +1253,9 @@
break;
}
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 0;
+
if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) &&
((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
(flags & FLAG_RELEASE_LOCK))) {
@@ -1150,57 +1264,14 @@
"extend" : "truncate", flags & FLAG_RELEASE_LOCK ?
"release" : "acquire", inode, node_num);
- down(&OCFS_I(inode)->priv_sem);
- if (OCFS_I(inode)->open_hndl_cnt
- && (flags & FLAG_ACQUIRE_LOCK)) {
- spin_lock(&oin_num_ext_lock);
-
- if (OCFS_I(inode)->num_extends < 0)
- BUG();
-
- if (OCFS_I(inode)->num_extends > 0) {
- OCFS_I(inode)->num_extends++;
- spin_unlock(&oin_num_ext_lock);
- } else {
- spin_unlock(&oin_num_ext_lock);
-
- /* take the extend_sem on behalf of
- * this other node. It won't be
- * released until he does his last
- * release broadcast. This has the
- * effect of locking out
- * lookup_file_allocation on this
- * inode. */
- down(&OCFS_I(inode)->inode_extend_sem);
-
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
- OCFS_I(inode)->num_extends++;
- list_add_tail(&OCFS_I(inode)->recovery_list, &osb->lock_recovery_lists[node_num]);
- spin_unlock(&oin_num_ext_lock);
- up(&recovery_list_sem);
- }
- } else if (OCFS_I(inode)->open_hndl_cnt
- && (flags & FLAG_RELEASE_LOCK)) {
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
-
- OCFS_I(inode)->num_extends--;
-
- if (OCFS_I(inode)->num_extends < 0)
- BUG();
-
- if (!OCFS_I(inode)->num_extends) {
- list_del(&OCFS_I(inode)->recovery_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
-
- up(&OCFS_I(inode)->inode_extend_sem);
- }
- spin_unlock(&oin_num_ext_lock);
-
- up(&recovery_list_sem);
+ if (flags & FLAG_ACQUIRE_LOCK)
+ ocfs_mark_inode_for_extend(osb, inode, node_num);
+ else if (flags & FLAG_RELEASE_LOCK)
+ ocfs_clear_inode_for_extend(osb, inode, node_num);
+ else {
+ printk("uhoh, bad vote flags! 0x%x\n", flags);
+ BUG();
}
- up(&OCFS_I(inode)->priv_sem);
}
if (disk_vote) {
@@ -1237,16 +1308,15 @@
}
}
- if (lockres) {
+ if (lockres)
ocfs_release_lockres (lockres); // ocfs_process_vote
- }
leave:
if (inode) {
if (inc_inode_seq)
ocfs_inc_inode_seq(osb, inode, 1);
if (have_io_sem)
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
}
if (inode)
@@ -1304,6 +1374,7 @@
LOG_ENTRY_ARGS("(node_num = %u)\n", node_num);
+start:
down(&recovery_list_sem);
list_for_each_safe (iter, temp, &osb->lock_recovery_lists[node_num]) {
i = list_entry (iter, ocfs_inode_private, recovery_list);
@@ -1315,7 +1386,12 @@
OCFS_I(inode)->num_extends = 0;
list_del(&OCFS_I(inode)->recovery_list);
INIT_LIST_HEAD(&OCFS_I(inode)->recovery_list);
- up(&OCFS_I(inode)->inode_extend_sem);
+ up_write(&OCFS_I(inode)->inode_extend_sem);
+
+ spin_unlock(&oin_num_ext_lock);
+ up (&recovery_list_sem);
+ iput(inode);
+ goto start;
} else
LOG_ERROR_STR("oin is in recovery list, but has zero extend counter value!");
@@ -1376,7 +1452,7 @@
if (yield) {
/* this will wait until process_vote gets to the release */
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
}
@@ -1413,9 +1489,9 @@
if (yield) {
/* from nm thread, give some time to waiters */
ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
- down(&OCFS_I(inode)->ip_io_sem);
+ down_write(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
}
continue;
@@ -1434,7 +1510,7 @@
leave:
if (yield) {
ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
- up(&OCFS_I(inode)->ip_io_sem);
+ up_write(&OCFS_I(inode)->ip_io_sem);
}
if (inode)
Modified: branches/format-changes/src/ocfs.h
===================================================================
--- branches/format-changes/src/ocfs.h 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/ocfs.h 2004-06-16 19:08:40 UTC (rev 1110)
@@ -568,11 +568,9 @@
__u32 master_node_num; /* Master Node */
__u32 lock_state;
__u32 lock_holders;
- __u8 in_use;
__u8 lock_type;
- int thread_id; // XXX
atomic_t lr_ref_cnt; /* When 0, freed */ // XXX
- spinlock_t lock_mutex; // XXX
+ struct semaphore lock_mutex;
__u32 readonly_node;
ocfs_node_map readonly_map;
ocfs_node_map oin_openmap;
@@ -602,7 +600,7 @@
/* This protects io on the metadata buffers related to this
* inode. We also consider an "abort_trans" an I/O as it will
* revert the buffer back to a previous state. */
- struct semaphore ip_io_sem;
+ struct rw_semaphore ip_io_sem;
/* Used by the journalling code to attach an inode to a
* handle. These are protected by ip_io_sem in order to lock
@@ -612,7 +610,7 @@
struct _ocfs_journal_handle *ip_handle;
/* inode_extend_sem locks out extends on behalf of other nodes. */
- struct semaphore inode_extend_sem;
+ struct rw_semaphore inode_extend_sem;
struct list_head recovery_list; /* protected by recovery_list_sem */
__u32 num_extends; /* protected by oin_num_ext_lock */
Modified: branches/format-changes/src/super.c
===================================================================
--- branches/format-changes/src/super.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/super.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -1574,7 +1574,9 @@
LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
- status = ocfs_lookup_file_allocation (osb, 0ULL, &ret, 1, NULL, inode);
+ /* We're in the mount path, pretend locked=1 */
+ status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, 1, NULL,
+ inode, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1638,7 +1640,9 @@
LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
- status = ocfs_lookup_file_allocation (osb, 0ULL, &ret, 1, NULL, inode);
+ /* We're in the mount path, pretend locked=1 */
+ status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, 1, NULL,
+ inode, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
Modified: branches/format-changes/src/sysfile.c
===================================================================
--- branches/format-changes/src/sysfile.c 2004-06-16 00:06:38 UTC (rev 1109)
+++ branches/format-changes/src/sysfile.c 2004-06-16 19:08:40 UTC (rev 1110)
@@ -130,7 +130,8 @@
* after you're done with them!
*
*/
-int ocfs_read_system_file (ocfs_super * osb, int type, __u32 node, struct buffer_head *bhs[], __u64 Length)
+int ocfs_read_system_file(ocfs_super *osb, int type, __u32 node,
+ struct buffer_head *bhs[], __u64 Length)
{
int status = 0, i;
__u64 ret = 0;
@@ -150,10 +151,14 @@
i = 0;
blocks = Length >> osb->sb->s_blocksize_bits;
while (blocks > 0) {
- status = ocfs_lookup_file_allocation (osb, off, &ret, blocks,
- &contig_bytes, inode);
- contig_blocks = contig_bytes >> osb->sb->s_blocksize_bits;
- if (contig_bytes != contig_blocks << osb->sb->s_blocksize_bits) {
+ status = ocfs_lookup_file_allocation(osb, off, &ret,
+ blocks,
+ &contig_bytes,
+ inode, 1);
+ contig_blocks =
+ contig_bytes >> osb->sb->s_blocksize_bits;
+ if (contig_bytes !=
+ (contig_blocks << osb->sb->s_blocksize_bits)) {
LOG_ERROR_ARGS("unaligned system file read! off=%llu, "
"contig_bytes=%llu\n", off, contig_bytes);
LOG_ERROR_STATUS(status=-EINVAL);
More information about the Ocfs2-commits
mailing list