[Ocfs2-commits] mfasheh commits r1364 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Aug 19 18:18:02 CDT 2004
Author: mfasheh
Date: 2004-08-19 17:18:00 -0500 (Thu, 19 Aug 2004)
New Revision: 1364
Modified:
trunk/src/alloc.c
trunk/src/aops.c
trunk/src/dlm.c
trunk/src/file.c
trunk/src/inode.c
trunk/src/journal.c
trunk/src/lockres.c
trunk/src/lockres.h
trunk/src/namei.c
trunk/src/nm.c
trunk/src/ocfs.h
trunk/src/ocfs_journal.h
trunk/src/vote.c
Log:
* get rid of most unused lock flags (FLAG_DIR is still there), and
re-arrange them so they're easier to read. All nodes in a cluster will have
to be simulataneously upgraded after this revision as a result though (the
values have changed).
* Fix ocfs_inode_is_new to be nicer in return value. Also, fix a bug where
it wasn't dealing with system files properly.
* We were printing out an error on -EBUSY in vote.c and dlm.c even though
it's an entirely valid return vote (can't delete because file is still
open).
* ocfs_notify_on_open() didn't reloop properly on -ETIMEDOUT
* fix up ocfs_update_lockres() to do things correctly, and to only read when
it's absolutely necessary. As a result it's safe to call this from
process_vote now (so I removed that #warning)
* we had a nasty bug in which certain lock requests (like a release_dentry
for instance) skipped the whole CHANGE_MASTER stuff on the other node. We
now make acquire_lock put that in there for those types of requests and
process_vote can handle a change master along with most other requests now
too.
* clean up the flag computation in acquire_lock and new_lock_function.
- we now avoid doing lock writes (and cache zapping) unless it's actually
necessary.
- new_lock_function() takes on the full task of figuring out who to vote
with, as opposed to before where it was split between that and
acquire_lock.
- stuff is actually readable now.
* fix a bug in new_lock_function() where we were setting
lockres->master_node_num unconditionally. It's now set only when we're going
to write out a new lock state.
* update and fill out the lockres in symlink and in read_inode, two critical
places where we were leaving the lockres with invalid data.
* as a result of these last two cleanups, I managed to get rid of a case in
process_vote where we had a stale lockres which was causing voting issues.
* BUG() in find_inode/find_actor if we have no generic_ip.
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/alloc.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -362,7 +362,7 @@
status = ocfs_acquire_lock (osb,
OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE,
+ 0,
NULL,
inode_alloc_inode[i]);
if (status < 0) {
@@ -373,7 +373,7 @@
goto abort;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE,
+ 0,
inode_alloc_inode[i]);
}
}
@@ -391,7 +391,7 @@
status = ocfs_acquire_lock (osb,
OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE,
+ 0,
NULL,
ext_alloc_inode[i]);
if (status < 0) {
@@ -402,7 +402,7 @@
goto abort;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE,
+ 0,
ext_alloc_inode[i]);
}
}
@@ -417,7 +417,7 @@
ocfs_handle_add_inode(handle, vol_inode);
status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE,
+ 0,
&globalbh, vol_inode);
if (status < 0) {
iput(vol_inode);
@@ -428,7 +428,7 @@
goto abort;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE, vol_inode);
+ 0, vol_inode);
}
/* free vol block */
@@ -3140,14 +3140,14 @@
OCFS_ASSERT (blockSize);
status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE, &bh, inode);
+ 0, &bh, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE, inode);
+ 0, inode);
numBits = ((bytes_wanted + (blockSize-1)) >> blockSizeBits);
numBytes = (u64)numBits << blockSizeBits;
Modified: trunk/src/aops.c
===================================================================
--- trunk/src/aops.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/aops.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -87,7 +87,7 @@
/* We don't use the page cache to create symlink data, so if
* need be, copy it over from the buffer cache. */
- if (!buffer_uptodate(bh_result) && !ocfs_inode_is_new(osb, inode)) {
+ if (!buffer_uptodate(bh_result) && ocfs_inode_is_new(osb, inode)) {
buffer_cache_bh = sb_getblk(osb->sb,
fe->id2.i_list.l_recs[0].e_blkno + iblock);
if (!buffer_cache_bh) {
@@ -100,7 +100,7 @@
* the bh, even if it commits while we're doing the
* copy, the data is still good. */
if (buffer_jbd(buffer_cache_bh)
- && !ocfs_inode_is_new(osb, inode)) {
+ && ocfs_inode_is_new(osb, inode)) {
kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
if (!kaddr) {
LOG_ERROR_ARGS("couldn't kmap!\n");
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/dlm.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -451,7 +451,7 @@
ocfs_acquire_lockres(lockres, 0); // ocfs_file_open
flags = FLAG_TRUNCATE_PAGES;
- if (!ocfs_inode_is_new(osb, inode))
+ if (ocfs_inode_is_new(osb, inode))
flags |= FLAG_FAST_PATH_LOCK;
status = new_lock_function(osb, lockres->lock_type,
@@ -460,7 +460,7 @@
if (status != -EAGAIN)
LOG_ERROR_STATUS (status);
ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
- if (status == -EAGAIN) {
+ if (status == -EAGAIN || status == -ETIMEDOUT) {
if (ocfs_task_interruptible ()) {
LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
@@ -468,6 +468,7 @@
goto bail;
}
ocfs_sleep (50);
+ status = -EAGAIN;
continue;
}
@@ -512,8 +513,7 @@
__u32 flags, struct buffer_head **bh, struct inode *inode)
{
int status = -EFAIL;
- ocfs2_dinode *disklock = NULL;
- __u32 updated = 0;
+ __u32 updated;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
int k = 0;
struct buffer_head *tmpbh = NULL, **b = NULL;
@@ -526,7 +526,7 @@
LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
osb, lock_type, flags, bh);
-
+
OCFS_ASSERT(lock_type != OCFS_LKM_NLMODE);
OCFS_ASSERT(inode);
@@ -540,41 +540,31 @@
flags |= FLAG_ACQUIRE_LOCK;
- status = ocfs_update_lockres (osb, b, &updated, 0, inode,
- 1, 0);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
+ *b = sb_getblk(osb->sb, OCFS_I(inode)->ip_blkno);
+ if (*b == NULL) {
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto finally;
}
+ updated = 0;
again:
ocfs_acquire_lockres (lockres, 0); // ocfs_acquire_lock
LOG_TRACE_ARGS("attempting to get lock, pass: %d\n", ++k);
- if (lockres->master_node_num == osb->node_num)
- updated = 1;
-
if (!updated) {
- status = ocfs_read_bh (osb, lock_id, b, 0, inode);
+ status = ocfs_update_lockres(osb, *b, 0, inode, 1, 1);
if (status < 0) {
- ocfs_release_lockres (lockres); // ocfs_acquire_lock
+ ocfs_release_lockres (lockres);
LOG_ERROR_STATUS (status);
goto finally;
}
- disklock = (ocfs2_dinode *) (*b)->b_data;
-
- if (!IS_VALID_FILE_ENTRY(disklock))
- BUG();
-
- if (lockres->master_node_num != osb->node_num ||
- lockres->master_node_num != DISK_LOCK(disklock)->dl_master) {
- lockres->master_node_num = DISK_LOCK(disklock)->dl_master;
- lockres->lock_type = DISK_LOCK(disklock)->dl_level;
- }
- updated = 1;
}
+ /* alright, if we own it then no more updates are necessary. */
+ if (lockres->master_node_num == osb->node_num)
+ updated = 1;
reevaluate:
no_owner = (lockres->master_node_num == OCFS_INVALID_NODE_NUM);
@@ -648,46 +638,50 @@
goto finally;
}
- if (lockres->master_node_num != osb->node_num &&
- (wait_on_recovery || no_owner || owner_dead)) {
- extra_lock_flags = FLAG_REMASTER;
- } else if (flags & (FLAG_FILE_DELETE | FLAG_FILE_RENAME
- | FLAG_RELEASE_DENTRY)) {
- if (ocfs_inode_is_new(osb, inode)!=0) {
- extra_lock_flags = 0;
- } else if (lockres->lock_type == OCFS_LKM_EXMODE)
- extra_lock_flags = FLAG_FAST_PATH_LOCK;
- else
- extra_lock_flags = FLAG_CHANGE_MASTER;
- } else if (lockres->master_node_num == osb->node_num) {
- if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_TRUNCATE) &&
- ocfs_inode_is_new(osb, inode)!=0)
- extra_lock_flags = FLAG_REMASTER;
- else if (lockres->lock_type == OCFS_LKM_EXMODE)
- extra_lock_flags = FLAG_FAST_PATH_LOCK;
- else
- extra_lock_flags = FLAG_CHANGE_MASTER;
- } else {
- extra_lock_flags = FLAG_CHANGE_MASTER;
+ if (ocfs_inode_is_new(osb, inode)) {
+ if (lockres->master_node_num != osb->node_num) {
+ printk("inode is new, but lockres is out of date! "
+ "owner = %d, type = %d\n",
+ lockres->master_node_num, lockres->lock_type);
+
+ BUG();
+ }
+ extra_lock_flags |= FLAG_FAST_PATH_LOCK;
}
-
+ /* some lock requests are simple messages and don't require a
+ * master change. */
+ if ((flags & FLAG_TRUNCATE_PAGES) || (flags & FLAG_FILE_RENAME))
+ goto do_lock;
+
+ if ((lockres->master_node_num != osb->node_num)
+ && (wait_on_recovery || no_owner || owner_dead)) {
+ extra_lock_flags |= FLAG_REMASTER;
+ } else if (lockres->master_node_num != osb->node_num) {
+ extra_lock_flags |= FLAG_CHANGE_MASTER;
+ }
+
do_lock:
LOG_TRACE_ARGS("lockres: master=%d, locktype=%d, flags: %08x\n",
- lockres->master_node_num, lockres->lock_type, flags|extra_lock_flags);
-
+ lockres->master_node_num, lockres->lock_type,
+ flags|extra_lock_flags);
+
#ifdef VERBOSE_LOCKING_TRACE
- printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, flags=%08x, ronode=%d\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits, osb->node_num, lockres->master_node_num, lockres->lock_type,
- flags|extra_lock_flags, lockres->readonly_node);
+ printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, "
+ "flags=%08x, ronode=%d\n",
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+ osb->node_num, lockres->master_node_num, lockres->lock_type,
+ flags|extra_lock_flags, lockres->readonly_node);
#endif
- if (wait_on_recovery && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
+ if (wait_on_recovery
+ && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
int waitcnt = 0;
LOG_TRACE_ARGS("Waiting on node %u to be recovered\n",
lockres->master_node_num);
while (1) {
LOG_TRACE_ARGS("waitcnt = %d\n", waitcnt);
- if (!ocfs_node_is_recovering(osb, lockres->master_node_num))
+ if (!ocfs_node_is_recovering(osb,
+ lockres->master_node_num))
break;
ocfs_sleep(500);
}
@@ -696,10 +690,18 @@
status = new_lock_function(osb, lock_type, flags|extra_lock_flags,
*b, &disk_vote, inode);
if (status < 0) {
+ /* if we're going to jump back up, we want to update
+ * if we're not the master... */
+ if (lockres->master_node_num != osb->node_num)
+ updated = 0;
ocfs_release_lockres (lockres); // ocfs_acquire_lock
if (status == -EAGAIN || status == -ETIMEDOUT) {
if (status == -ETIMEDOUT)
- LOG_ERROR_ARGS("Timed out acquiring lock for inode %llu, (lockid = %llu) retrying...\n", OCFS_I(inode)->ip_blkno, lock_id);
+ LOG_ERROR_ARGS("Timed out acquiring lock for "
+ "inode %llu, (lockid = %llu) "
+ "retrying...\n",
+ OCFS_I(inode)->ip_blkno,
+ lock_id);
if (ocfs_task_interruptible ()) {
LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
@@ -708,7 +710,6 @@
goto finally;
}
ocfs_sleep (50);
- updated = 0;
goto again;
}
goto finally;
@@ -729,7 +730,6 @@
ocfs_release_lockres (lockres); // ocfs_acquire_lock
finally:
-bail:
if (tmpbh)
brelse(tmpbh);
@@ -885,9 +885,6 @@
goto finally;
}
- if (flags & FLAG_FILE_RELEASE_MASTER)
- lockres->master_node_num = OCFS_INVALID_NODE_NUM;
-
OCFS_ASSERT(lockres->uncommitted_holders <= num_ident);
/* we don't want to send over a count for any size change
@@ -959,16 +956,20 @@
} else if (flags & (FLAG_DROP_READONLY | FLAG_TRUNCATE_PAGES)) {
need_lock_write = 0;
need_to_zap_buffers = 0;
- } else if (flags & (FLAG_READDIR | FLAG_FILE_RELEASE_CACHE)) {
+ } else if (flags & FLAG_READDIR) {
need_lock_write = 0;
need_to_zap_buffers = 1;
+ } else if (!bh) {
+ need_lock_write = 0;
+ need_to_zap_buffers = 0;
} else {
- if (bh) {
- fe = (ocfs2_dinode *) bh->b_data;
- /* may not need to rewrite the lock later if we already have a cachelock */
- need_lock_write = (DISK_LOCK(fe)->dl_master != osb->node_num ||
- DISK_LOCK(fe)->dl_level != OCFS_LKM_EXMODE);
- } else
+ fe = (ocfs2_dinode *) bh->b_data;
+ /* may not need to rewrite the lock later if
+ * we already have a cachelock */
+ if ((DISK_LOCK(fe)->dl_master == osb->node_num)
+ && (DISK_LOCK(fe)->dl_level != requested_lock))
+ need_lock_write = 1;
+ else
need_lock_write = 0;
need_to_zap_buffers = 0;
}
@@ -978,11 +979,10 @@
goto vote_success;
+#define BROADCAST_FLAGS (FLAG_FILE_DELETE | FLAG_FILE_RENAME | FLAG_RELEASE_DENTRY | FLAG_FILE_EXTEND | FLAG_FILE_TRUNCATE | FLAG_FILE_UPDATE_OIN | FLAG_TRUNCATE_PAGES | FLAG_DROP_READONLY | FLAG_REMASTER)
/* figure out who to vote with */
- if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE |
- FLAG_FILE_RENAME | FLAG_DROP_READONLY |
- FLAG_RELEASE_DENTRY | FLAG_TRUNCATE_PAGES)) {
+ if (flags & BROADCAST_FLAGS) {
ocfs_node_map_dup(osb, &vote_map, &osb->publ_map); /* broadcast */
/* only nodes that see this is readonly */
if (flags & FLAG_DROP_READONLY)
@@ -1013,7 +1013,8 @@
if (status < 0) {
if (status != -EAGAIN &&
status != -ETIMEDOUT &&
- status != -EINTR)
+ status != -EINTR &&
+ status != -EBUSY)
LOG_ERROR_STATUS(status);
goto bail;
}
@@ -1037,14 +1038,11 @@
lockres->readonly_node = lockres->master_node_num;
goto bail;
}
-
- /* update the lockres */
- if (!(flags & FLAG_FILE_RELEASE_CACHE))
- lockres->master_node_num = osb->node_num;
- lockres->lock_type = requested_lock;
/* update the disk lock */
if (need_lock_write) {
+ lockres->lock_type = requested_lock;
+ lockres->master_node_num = osb->node_num;
status = ocfs_update_disk_lock (osb, lock_write_flags, &bh, inode);
if (status < 0)
LOG_ERROR_STATUS (status);
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/file.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -488,7 +488,7 @@
{
int status = 0;
ocfs2_dinode *fileEntry = NULL;
- u32 lock_flags = FLAG_FILE_UPDATE;
+ u32 lock_flags = 0;
struct buffer_head *bh = NULL;
ocfs_journal_handle *handle = NULL;
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/inode.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -178,6 +178,12 @@
LOG_ENTRY_ARGS ("(0x%p, %lu, %llu, 0x%p)\n", inode, inode->i_ino, OCFS_I(inode)->ip_blkno, opaque);
+ if (!inode->u.generic_ip) {
+ LOG_ERROR_ARGS("inode %lu has no generic_ip (is_bad_inode = "
+ "%d)!\n", inode->i_ino, is_bad_inode(inode));
+ BUG();
+ }
+
if (inode == NULL)
goto bail;
@@ -231,6 +237,12 @@
LOG_ENTRY_ARGS ("(0x%p, %lu, 0x%p)\n", inode, ino, opaque);
+ if (!inode->u.generic_ip) {
+ LOG_ERROR_ARGS("inode %lu has no generic_ip (is_bad_inode = "
+ "%d)!\n", inode->i_ino, is_bad_inode(inode));
+ BUG();
+ }
+
/* We have unique inode numbers so these are just sanity
* checks at this point. */
if (opaque == NULL || inode == NULL)
@@ -494,6 +506,11 @@
OCFS_SET_FLAG(OCFS_I(inode)->ip_flags, OCFS_INODE_SYSTEM_FILE);
ocfs_init_lockres (osb, inode);
+ status = ocfs_update_lockres(osb, bh, 0, inode, 0, 1);
+ if (status < 0) {
+ make_bad_inode(inode);
+ goto bail;
+ }
status = 0;
bail:
@@ -599,15 +616,13 @@
ocfs_handle_add_inode(handle, orphan_dir_inode);
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR,
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR,
&orphan_dir_bh, orphan_dir_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail_locked;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR,
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR,
orphan_dir_inode);
if (S_ISDIR(inode->i_mode))
@@ -1074,7 +1089,7 @@
up(&OCFS_I(inode)->ip_sem);
drop_ip_sem = 0;
- status = ocfs_update_lockres (osb, &fe_bh, NULL, 0, inode, 0,
+ status = ocfs_update_lockres (osb, fe_bh, 0, inode, 0,
lockres_locked);
status = 0;
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/journal.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -1023,7 +1023,7 @@
SET_INODE_JOURNAL(inode);
status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE, &bh, inode);
+ 0, &bh, inode);
if (status < 0) {
up_write(&OCFS_I(inode)->ip_io_sem);
@@ -1170,8 +1170,7 @@
OCFS_I(inode)->ip_open_cnt--;
/* unlock our journal */
- status = ocfs_release_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE, inode);
+ status = ocfs_release_lock (osb, OCFS_LKM_EXMODE, 0, inode);
if (status < 0)
LOG_ERROR_STATUS (status);
@@ -1460,8 +1459,7 @@
if (osb->node_num == node_num)
BUG();
- status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_FILE_RECOVERY,
&bh, inode);
up_write(&OCFS_I(inode)->ip_io_sem);
@@ -1554,8 +1552,8 @@
/* drop the lock on this nodes journal */
if (got_lock) {
tmpstat = ocfs_release_lock(osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_FILE_RECOVERY,
- inode);
+ FLAG_FILE_RECOVERY,
+ inode);
if (tmpstat < 0)
LOG_ERROR_STATUS(tmpstat);
}
Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/lockres.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -47,90 +47,93 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_LOCKRES
-
-
/*
* ocfs_update_lockres()
*
* @osb: ocfs super block for the volume
* @fe: corresponding file entry
- * @updated: set to 1 if lockres is refreshed from disk
*
* the lockres is refreshed from the disk.
*
* Returns 0 if success, < 0 if error.
*/
-int ocfs_update_lockres(ocfs_super *osb, struct buffer_head **bh,
- __u32 *updated, __u32 timeout,
- struct inode *inode, int reread, int locked)
+int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh,
+ __u32 timeout, struct inode *inode, int reread,
+ int locked)
{
int status = 0;
- struct buffer_head *tmpbh = NULL, **b = NULL;
ocfs2_dinode *fe;
int flags;
+ int drop_bh = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);;
- LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, 0x%p, 0x%p)\n", osb,
+ LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, 0x%p)\n", osb,
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- lockres, bh, updated);
+ lockres, bh);
- b = (bh == NULL) ? &tmpbh : bh;
+ /* hey, you can't do that! ;) */
+ if ((!bh) && !reread)
+ BUG();
+ if (!bh)
+ drop_bh = 1;
+
+ if (!locked)
+ status = ocfs_acquire_lockres (lockres, timeout);
+ if (status < 0) {
+ LOG_TRACE_ARGS ("Timedout locking lockres for inode: %llu\n",
+ OCFS_I(inode)->ip_blkno);
+ goto out;
+ }
+
+ /* Behavior for process_vote: if you don't pass a buffer, then
+ * we'll only read if you're not he master. */
+ if ((bh == NULL) && (lockres->master_node_num == osb->node_num))
+ goto out_unlock;
+
if (reread) {
flags = lockres->master_node_num == osb->node_num ?
OCFS_BH_CACHED : 0;
status = ocfs_read_bh(osb,
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- b, flags, inode);
+ &bh, flags, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
- goto finally;
+ goto out_unlock;
}
}
- if (lockres->master_node_num != osb->node_num || !reread) {
- if (!locked)
- status = ocfs_acquire_lockres (lockres, timeout); // ocfs_file_open ocfs_symlink ocfs_acquire_lock ocfs_process_vote ocfs_dentry_revalidate ocfs_file_write ocfs_file_read ocfs_setattr ocfs_getattr ocfs_direct_IO_get_blocks ocfs_rename
- if (status < 0) {
- LOG_TRACE_ARGS ("Timedout locking lockres for id: %llu\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
- goto finally;
- }
+ fe = (ocfs2_dinode *) bh->b_data;
+ if (!IS_VALID_FILE_ENTRY(fe))
+ BUG();
- fe = (ocfs2_dinode *) (*b)->b_data;
- if (!IS_VALID_FILE_ENTRY(fe))
- BUG();
+ lockres->lock_type = DISK_LOCK(fe)->dl_level;
+ lockres->master_node_num = DISK_LOCK(fe)->dl_master;
- lockres->lock_type = DISK_LOCK(fe)->dl_level;
- lockres->master_node_num = DISK_LOCK(fe)->dl_master;
+ if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
+ lockres->readonly_node != lockres->master_node_num) {
+ LOG_ERROR_ARGS("no longer readonly! ronode=%d, master=%d, lockid=%llu\n",
+ lockres->readonly_node, lockres->master_node_num,
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+ lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ }
- if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
- lockres->readonly_node != lockres->master_node_num) {
- LOG_ERROR_ARGS("no longer readonly! ronode=%d, master=%d, lockid=%llu\n",
- lockres->readonly_node, lockres->master_node_num,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
- }
+out_unlock:
+ if (!locked)
+ ocfs_release_lockres (lockres);
- if (!locked)
- ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink ocfs_acquire_lock ocfs_process_vote ocfs_dentry_revalidate ocfs_file_write ocfs_file_read ocfs_setattr ocfs_getattr ocfs_direct_IO_get_blocks ocfs_rename
-
- if (updated)
- *updated = 1;
- }
-
-finally:
+out:
if (status < 0) {
if (status != -ETIMEDOUT)
LOG_ERROR_STATUS (status);
}
- if (tmpbh)
- brelse(tmpbh);
+ if (bh && drop_bh)
+ brelse(bh);
LOG_EXIT_STATUS (status);
return status;
-} /* ocfs_find_update_res */
+} /* ocfs_update_lockres */
/*
* ocfs_acquire_lockres()
@@ -201,18 +204,15 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, lockres);
- lockres->lock_type = OCFS_LKM_NLMODE;
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
lockres->lock_state = 0;
-
+ lockres->lock_holders = 0;
+ lockres->uncommitted_holders = 0;
+ lockres->lock_type = OCFS_LKM_NLMODE;
init_MUTEX(&lockres->lock_mutex);
-
+ lockres->readonly_node = OCFS_INVALID_NODE_NUM;
ocfs_node_map_init(osb, &lockres->readonly_map);
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
- lockres->lock_holders = 0;
- lockres->uncommitted_holders = 0;
-
LOG_EXIT ();
return;
} /* ocfs_init_lockres */
Modified: trunk/src/lockres.h
===================================================================
--- trunk/src/lockres.h 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/lockres.h 2004-08-19 22:18:00 UTC (rev 1364)
@@ -33,8 +33,8 @@
int ocfs_acquire_lockres(ocfs_lock_res *lockres, __u32 timeout);
void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
void ocfs_release_lockres(ocfs_lock_res *lockres);
-int ocfs_update_lockres(ocfs_super *osb, struct buffer_head **bh,
- __u32 *updated, __u32 timeout, struct inode *inode,
+int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh,
+ __u32 timeout, struct inode *inode,
int reread, int locked);
#endif /* OCFS2_LOCKRES_H */
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/namei.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -230,8 +230,7 @@
}
/* lock the parent directory */
- status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR,
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_DIR,
&parent_fe_bh, dir);
if (status < 0) {
if (status != -EINTR)
@@ -240,8 +239,7 @@
}
/* Ok, we got the lock -- we'd better add it to our transaction */
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR, dir);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, dir);
/* do the real work now. */
status = ocfs_mknod_locked(osb, dir, dentry, mode, dev,
@@ -267,7 +265,7 @@
ocfs_init_lockres(osb, inode);
- status = ocfs_update_lockres(osb, &new_fe_bh, NULL, 0, inode, 0, 0);
+ status = ocfs_update_lockres(osb, new_fe_bh, 0, inode, 0, 0);
if (S_ISDIR (mode)) {
struct buffer_head *newdirbh = NULL;
int retval = 0;
@@ -574,29 +572,25 @@
down_write(&OCFS_I(dir)->ip_io_sem);
drop_dir_sem = 1;
/* lock the parent directory */
- err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR,
+ err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_DIR,
&parent_fe_bh, dir);
if (err < 0) {
if (err != -EINTR)
LOG_ERROR_STATUS (err);
goto bail;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_DIR, dir);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, dir);
down_write(&OCFS_I(inode)->ip_io_sem);
drop_inode_sem = 1;
- err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE,
- &fe_bh, inode);
+ err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0, &fe_bh, inode);
if (err < 0) {
if (err != -EINTR)
LOG_ERROR_STATUS (err);
goto bail;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_FILE_UPDATE_OIN, inode);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_FILE_UPDATE_OIN,
+ inode);
fe = (ocfs2_dinode *) fe_bh->b_data;
if (fe->i_links_count >= OCFS2_LINK_MAX) {
@@ -699,17 +693,13 @@
goto leave;
}
- /* lock parent directory, yes we use FLAG_FILE_CREATE even
- * though we're deleting ;) */
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_DIR, &parent_node_bh,
- parentInode);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR,
+ &parent_node_bh, parentInode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_DIR, parentInode);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, parentInode);
/* this will re-read the directory now with the EXCLUSIVE */
/* lock already held; it will also return the blkno to us */
@@ -961,7 +951,6 @@
__u64 oldfe_lockid;
u64 newfe_blkno;
ocfs_journal_handle *handle = NULL;
- __u32 dir_lock_flags = FLAG_FILE_CREATE | FLAG_DIR;
struct buffer_head *old_dir_bh = NULL;
struct buffer_head *new_dir_bh = NULL;
__u32 oldfe_flags = FLAG_RELEASE_DENTRY | FLAG_FILE_RENAME;
@@ -1019,10 +1008,10 @@
/* if old and new are the same, this'll just do one lock. */
status = ocfs_double_lock(osb, handle,
OCFS_LKM_EXMODE,
- dir_lock_flags,
+ FLAG_DIR,
&old_dir_bh, old_dir,
OCFS_LKM_EXMODE,
- dir_lock_flags,
+ FLAG_DIR,
&new_dir_bh, new_dir);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1483,8 +1472,7 @@
}
/* lock the parent directory */
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR,
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR,
&parent_fe_bh, dir);
if (status < 0) {
if (status != -EINTR)
@@ -1492,8 +1480,7 @@
goto abort_trans;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE|FLAG_DIR, dir);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR, dir);
status = ocfs_mknod_locked(osb, dir, dentry,
S_IFLNK | S_IRWXUGO, 0,
@@ -1516,6 +1503,9 @@
ocfs_inode_set_new(osb, inode);
ocfs_init_lockres(osb, inode);
+ status = ocfs_update_lockres(osb, new_fe_bh, 0, inode, 0, 0);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
status = ocfs_extend_file(osb, newsize, handle, inode, NULL, 0,
new_fe_bh);
@@ -1951,15 +1941,14 @@
ocfs_handle_add_inode(handle, orphan_dir_inode);
/* disk lock orphan dir here. */
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR,
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_DIR,
&orphan_dir_bh, orphan_dir_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_FILE_CREATE | FLAG_DIR, orphan_dir_inode);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_DIR,
+ orphan_dir_inode);
status = __ocfs_add_entry(handle, orphan_dir_inode, name, namelen,
inode, OCFS_I(inode)->ip_blkno,
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/nm.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -79,6 +79,8 @@
__u32 node_num, u32 num_rel);
static void ocfs_process_vote_worker(void *val);
+static int ocfs_process_inode_delete(struct inode *inode);
+static void ocfs_commit_inode_delete(struct inode *inode);
#ifdef VERBOSE_PROCESS_VOTE
static const char *process_vote_strings[] = {
@@ -86,7 +88,6 @@
"UPDATE_OIN_INODE", // update both oin and inode
"DELETE_ACQUIRE",// delete or rename request
"DELETE_RELEASE",// delete or rename release request
- "RELEASE_CACHE", // release a cache lock I hold
"CHANGE_MASTER", // request to change master to requestor
"NOT_MASTER", // I am not master, retry
"REMASTER_THIS", // remaster lock to me
@@ -451,7 +452,6 @@
return 0;
} /* ocfs_volume_thread */
-
static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num,
__u32 flags, int status, int *master_alive, struct inode *inode)
{
@@ -513,9 +513,7 @@
vote_type = DELETE_ACQUIRE;
else
vote_type = INVALID_REQUEST;
- } else if (flags & FLAG_FILE_RELEASE_CACHE)
- vote_type = RELEASE_CACHE;
- else if (flags & FLAG_FILE_UPDATE_OIN) {
+ } else if (flags & FLAG_FILE_UPDATE_OIN) {
if ((flags & FLAG_FILE_TRUNCATE) &&
(flags & FLAG_ACQUIRE_LOCK))
vote_type = TRUNCATE_PAGES;
@@ -540,8 +538,11 @@
}
if (inode == NULL &&
- (vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES))
+ (vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES)) {
+ printk("inode is null and it's not a delete release or a truncate pages!\n");
vote_type = INVALID_REQUEST;
+ }
+
done:
LOG_EXIT_STATUS(vote_type);
return vote_type;
@@ -732,6 +733,61 @@
#endif
+static int ocfs_process_inode_delete(struct inode *inode)
+{
+ int status;
+
+ LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
+ "lnk_cnt = %u\n", inode->i_ino,
+ inode->i_nlink);
+
+ /* force this as ours may be out of date. */
+ inode->i_nlink = 0;
+
+ down (&(OCFS_I(inode)->ip_sem));
+ /* vote no if the file is still open. */
+ if (OCFS_I(inode)->ip_open_cnt > 0) {
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("process_vote: open count = %u\n",
+ OCFS_I(inode)->ip_open_cnt);
+#endif
+ up(&(OCFS_I(inode)->ip_sem));
+ status = 0;
+ goto done;
+ }
+ up(&(OCFS_I(inode)->ip_sem));
+
+ /* vote no if someone's extending it. */
+ spin_lock(&oin_num_ext_lock);
+ if (OCFS_I(inode)->ip_num_extends) {
+ spin_unlock(&oin_num_ext_lock);
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("process_vote: extends pending\n");
+#endif
+ status = 0;
+ goto done;
+ }
+ spin_unlock(&oin_num_ext_lock);
+
+ status = 1;
+done:
+ return(status);
+}
+
+static void ocfs_commit_inode_delete(struct inode *inode)
+{
+ SET_INODE_DELETED(inode);
+ /* if we vote yes, then we set the SKIP_DELETE
+ * flag on the inode so we don't try to delete
+ * it in delete_inode ourselves. */
+ OCFS_SET_FLAG(OCFS_I(inode)->ip_flags,
+ OCFS_INODE_SKIP_DELETE);
+
+ d_prune_aliases (inode);
+ sync_mapping_buffers(inode->i_mapping);
+ ocfs_truncate_inode_pages(inode, 0);
+}
+
/*
* ocfs_process_vote()
*
@@ -752,13 +808,14 @@
struct buffer_head *fe_bh = NULL, *vote_bh = NULL;
int vote_type = INVALID_REQUEST, vote_response = 0;
struct inode *inode = NULL;
- int master_alive = 1, is_dir = 0;
- int is_locked, open_handle;
+ int master_alive = 1;
+ int open_handle = 0;
int lockflags = 0;
int inc_inode_seq = 0;
int disk_vote = (ctxt->request_method == DISK_VOTE);
int comm_vote = (ctxt->request_method == COMM_VOTE);
int have_io_sem = 0;
+ int change_master_succeeded = 0;
ocfs_publish *publish = (disk_vote ? ctxt->u.publish : NULL);
ocfs_dlm_msg *dlm_msg = (comm_vote ? ctxt->u.dlm_msg : NULL);
__s16 node_num = ctxt->node_num;
@@ -809,6 +866,10 @@
}
}
+ /* if we timeout on any of the locks, we want to send a retry
+ * instead of letting the other guy's network timeout. */
+ vote_response = FLAG_VOTE_UPDATE_RETRY;
+
/* delete / rename is slightly different -- we don't want to
* look up the inode in the release case -- it should already
* be gone. Eventually what we'll really want to do is get it
@@ -832,7 +893,7 @@
"node=%u, seq=%llu, flags=0x%x\n",
lock_id, node_num, seq_num, flags);
LOG_ERROR_STATUS(status);
- goto leave;
+ goto vote;
}
/* ahh, so you find yourself asking "what the
@@ -841,7 +902,7 @@
osb->voting_ino = inode->i_ino;
#ifdef PROCESS_VOTE_TRYLOCK
- if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
+ if (flags & FLAG_CHANGE_MASTER) {
if (ocfs_io_sem_write_trylock(inode,
(OCFS_NM_HEARTBEAT_TIME/2))) {
LOG_ERROR_ARGS("Could not get i/o lock on "
@@ -851,33 +912,34 @@
have_io_sem = 1;
}
#else
- if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
+ if (flags & FLAG_CHANGE_MASTER) {
down_write(&OCFS_I(inode)->ip_io_sem);
have_io_sem = 1;
}
#endif
lockres = GET_INODE_LOCKRES(inode);
- status = ocfs_update_lockres (osb, NULL, NULL,
- (OCFS_NM_HEARTBEAT_TIME/2),
- inode, 1, 0);
- if (status < 0) {
- if (status != -ETIMEDOUT)
- LOG_ERROR_STATUS (status);
- goto leave;
- }
status = ocfs_acquire_lockres (lockres, (OCFS_NM_HEARTBEAT_TIME/2)); // ocfs_process_vote
if (status < 0) {
+ lockres = NULL;
LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+ goto vote;
+ }
+
+ /* since we pass a NULL bh, this'll only do a read if
+ * we're not the master. */
+ status = ocfs_update_lockres (osb, NULL,
+ (OCFS_NM_HEARTBEAT_TIME/2),
+ inode, 1, 1);
+ if (status < 0) {
+ if (status != -ETIMEDOUT)
+ LOG_ERROR_STATUS (status);
goto leave;
}
}
- lockflags = (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
- ? 0 : OCFS_BH_CACHED;
-
no_inode_ok:
/* fail here if no inode, unless this is a delete/rename release */
vote_type = get_process_vote_action(osb, lockres, node_num, flags,
@@ -889,7 +951,8 @@
(flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote", num_ident);
if (vote_type == INVALID_REQUEST)
- printk("Invalid request! flags = 0x%x\n", flags);
+ printk("Invalid request! flags = 0x%x master=%d, level=%d\n",
+ flags, lockres->master_node_num, lockres->lock_state);
#endif
/* get_process_vote_action will only allow CHANGE_MASTER and
@@ -912,6 +975,134 @@
}
vote_response = 0;
+
+ /* some lock requests need to be processed before a possible
+ * change master. Beware however that the change_master might
+ * very well send a no vote, so you can't do things here that
+ * cannot be rolled back. */
+ switch (vote_type) {
+ case DELETE_ACQUIRE:
+ LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
+ if (!ocfs_process_inode_delete(inode))
+ vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
+ else
+ vote_response = FLAG_VOTE_NODE;
+ break;
+ case INVALID_REQUEST:
+ /* we catch INVALID_REQUEST up here now as we
+ * don't want to do a change_master on a
+ * messed up vote... */
+ LOG_TRACE_STR("INVALID_REQUEST");
+ goto vote;
+ default:
+ break;
+ }
+
+ if (vote_response > FLAG_VOTE_NODE) {
+ /* we shouldn't even get to the other cases. */
+ goto vote;
+ }
+
+ if ((lockres->master_node_num == osb->node_num)
+ && (flags & FLAG_CHANGE_MASTER)) {
+ LOG_TRACE_STR("CHANGE_MASTER");
+
+ status = -EFAIL;
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("process_vote: doing CHANGE_MASTER for this request\n");
+#endif
+ /* requestor will need to retry if anyone is using the
+ * lockres */
+ if (lockres->lock_holders > 0) {
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("process_vote: Lock id (%llu) has %u "
+ "holders\n",
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+ lockres->lock_holders);
+#endif
+ // kick the commit thread
+ atomic_set(&osb->flush_event_woken, 1);
+ wake_up(&osb->flush_event);
+
+ vote_response = FLAG_VOTE_UPDATE_RETRY;
+ status = 0;
+ goto vote;
+ }
+
+ if (!have_io_sem) {
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("process_vote: We haven't got io_sem on"
+ " Lock id (%llu)\n",
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+#endif
+ vote_response = FLAG_VOTE_UPDATE_RETRY;
+ status = 0;
+ goto vote;
+ }
+
+ /* this is currently a readonly cache lock.
+ * need to communicate to all the nodes in the
+ * map that lock will be changing to RW before we
+ * continue. RETRY this request while we spawn
+ * off a thread to collect up the communication */
+ if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
+ // assumption: node asking for vote has already dropped readonly_node
+ ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
+ // should not be in there, but...
+ ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
+ if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
+ OCFS_ASSERT(lockres->readonly_node == osb->node_num);
+ OCFS_ASSERT(inode);
+ status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("process_vote: node map is not "
+ "empty on readonly drop "
+ "request\n");
+#endif
+ vote_response = FLAG_VOTE_UPDATE_RETRY;
+ goto vote;
+ }
+ // noone left in map, so continue
+ lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ }
+
+ sync_mapping_buffers(inode->i_mapping);
+
+#warning do we need a truncate_inode_pages here?
+ lockflags = lockres->master_node_num == osb->node_num ?
+ OCFS_BH_CACHED : 0;
+ status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ fe = (ocfs2_dinode *) fe_bh->b_data;
+ DISK_LOCK(fe)->dl_master = node_num;
+ DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
+
+ /* need to do the write only if fe lock values
+ * need to change */
+ status = ocfs_write_bh(osb, fe_bh, 0, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ brelse(fe_bh);
+ goto leave;
+ }
+ lockres->master_node_num = node_num;
+ lockres->lock_type = OCFS_LKM_NLMODE;
+
+ brelse(fe_bh);
+ vote_response = FLAG_VOTE_NODE;
+ inc_inode_seq = 1;
+ status = 0;
+ change_master_succeeded = 1;
+ }
+
+ /* Below here, we can't have any of these cases failing if
+ * there was a successfull change master request. */
open_handle = 0;
switch (vote_type) {
case UPDATE_OIN_INODE:
@@ -953,54 +1144,14 @@
break;
case DELETE_ACQUIRE:
- LOG_TRACE_STR("DELETE_ACQUIRE");
+ LOG_TRACE_STR("DELETE_ACQUIRE (part two)");
+ /* If we got this far, then we assume we've
+ * done the 1st part of the DELETE_ACQUIRE
+ * case and we just have to commit it. */
+ if (vote_response != FLAG_VOTE_NODE)
+ BUG();
- LOG_TRACE_ARGS("DELETE vote on inode %lu, read "
- "lnk_cnt = %u\n", inode->i_ino,
- inode->i_nlink);
-
- /* force this as ours may be out of date. */
- inode->i_nlink = 0;
-
- down (&(OCFS_I(inode)->ip_sem));
- /* vote no if the file is still open. */
- if (OCFS_I(inode)->ip_open_cnt > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: open count = %u\n",
- OCFS_I(inode)->ip_open_cnt);
-#endif
- vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
- up(&(OCFS_I(inode)->ip_sem));
- break;
- }
- up(&(OCFS_I(inode)->ip_sem));
-
- /* vote no if someone's extending it. */
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(inode)->ip_num_extends) {
- spin_unlock(&oin_num_ext_lock);
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: extends pending\n");
-#endif
- vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
- break;
- }
- spin_unlock(&oin_num_ext_lock);
-
- vote_response = FLAG_VOTE_NODE;
-
- SET_INODE_DELETED(inode);
- /* if we vote yes, then we set the SKIP_DELETE
- * flag on the inode so we don't try to delete
- * it in delete_inode ourselves. */
- OCFS_SET_FLAG(OCFS_I(inode)->ip_flags,
- OCFS_INODE_SKIP_DELETE);
-
- inode->i_nlink = 0;
- d_prune_aliases (inode);
- sync_mapping_buffers(inode->i_mapping);
- ocfs_truncate_inode_pages(inode, 0);
- /* Alright, for the YES case, we're done here. */
+ ocfs_commit_inode_delete(inode);
break;
case READONLY:
@@ -1034,126 +1185,10 @@
status = 0;
break;
- case RELEASE_CACHE:
case CHANGE_MASTER:
-#warning nm thread could get this too early
- if (vote_type == RELEASE_CACHE)
- LOG_TRACE_STR("RELEASE_CACHE");
- else
- LOG_TRACE_STR("CHANGE_MASTER");
+ /* we dealt with this all above. */
+ break;
- status = -EFAIL;
-
- /* requestor will need to retry if anyone is using the lockres */
- if (lockres->lock_holders > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: Lock id (%llu) has %u "
- "holders\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- lockres->lock_holders);
-#endif
- // kick the commit thread
- atomic_set(&osb->flush_event_woken, 1);
- wake_up(&osb->flush_event);
-
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- status = 0;
- break;
- }
-
- if (!have_io_sem) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: We haven't got io_sem on"
- " Lock id (%llu)\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
-#endif
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- status = 0;
- break;
- }
-
- /* this is currently a readonly cache lock.
- * need to communicate to all the nodes in the
- * map that lock will be changing to RW before we
- * continue. RETRY this request while we spawn
- * off a thread to collect up the communication */
- if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- // assumption: node asking for vote has already dropped readonly_node
- ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
- // should not be in there, but...
- ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
- if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- OCFS_ASSERT(lockres->readonly_node == osb->node_num);
- OCFS_ASSERT(inode);
- status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: node map is not "
- "empty on readonly drop "
- "request\n");
-#endif
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- break;
- }
- // noone left in map, so continue
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
- }
-
- sync_mapping_buffers(inode->i_mapping);
-
-#warning do we need a truncate_inode_pages here?
-
- /* nobody currently owns the lock so fastpath it */
- if (vote_type == CHANGE_MASTER)
- lockres->master_node_num = node_num;
-
-// if (oin != NULL) {
- lockres->lock_type = OCFS_LKM_NLMODE;
-// }
-
- status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- break;
- }
-
- fe = (ocfs2_dinode *) fe_bh->b_data;
- is_dir = S_ISDIR(fe->i_mode);
- is_locked = DISK_LOCK(fe)->dl_level > OCFS_LKM_NLMODE;
- if (vote_type == CHANGE_MASTER) {
- fe = (ocfs2_dinode *) fe_bh->b_data;
- DISK_LOCK(fe)->dl_master = node_num;
- }
-
- if (is_dir && vote_type == RELEASE_CACHE)
- ocfs_inc_inode_seq(osb, inode);
-
- /* need to do the write only if fe lock values need to change */
- if (is_locked || vote_type == CHANGE_MASTER) {
- if (vote_type == RELEASE_CACHE) {
- LOG_TRACE_STR("release cache vote, setting to NO_LOCK");
- fe = (ocfs2_dinode *) fe_bh->b_data;
- DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
- }
- status = ocfs_write_bh(osb, fe_bh, 0, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- brelse(fe_bh);
- break;
- }
- if (vote_type == RELEASE_CACHE) {
- lockres->lock_type = OCFS_LKM_NLMODE;
- }
- else // CHANGE_MASTER
- lockres->master_node_num = node_num;
- }
- brelse(fe_bh);
- vote_response = FLAG_VOTE_NODE;
- inc_inode_seq = 1;
- status = 0;
- break;
-
case DROP_READONLY:
/* TODO: may need locking in here to lock out
* the actual IO that a readdir may have in
@@ -1203,7 +1238,7 @@
LOG_TRACE_STR("REMASTER_REQUESTOR");
vote_response = FLAG_VOTE_NODE;
if (inode && OCFS_I(inode)->ip_open_cnt
- && flags & (FLAG_FILE_EXTEND|FLAG_FILE_UPDATE)
+ && flags & FLAG_FILE_EXTEND
&& !(flags & FLAG_DIR))
open_handle = 1;
break;
@@ -1232,6 +1267,10 @@
break;
}
+ if (change_master_succeeded &&
+ ((vote_response == 0) || (vote_response > FLAG_VOTE_OIN_UPDATED)))
+ BUG();
+
if(have_io_sem && inode)
up_write(&OCFS_I(inode)->ip_io_sem);
have_io_sem = 0;
@@ -1255,6 +1294,7 @@
}
}
+vote:
if (disk_vote) {
vote = (ocfs_vote *) vote_bh->b_data;
vote->lock_id = lock_id;
@@ -1288,10 +1328,12 @@
}
}
- if (lockres)
+leave:
+ if (lockres) {
ocfs_release_lockres (lockres); // ocfs_process_vote
+ lockres = NULL;
+ }
-leave:
if (!inode)
goto no_inode_leave;
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/ocfs.h 2004-08-19 22:18:00 UTC (rev 1364)
@@ -81,7 +81,6 @@
UPDATE_OIN_INODE, // update both oin and inode
DELETE_ACQUIRE,// delete or rename acquire request
DELETE_RELEASE,// delete or rename release request
- RELEASE_CACHE, // release a cache lock I hold
CHANGE_MASTER, // request to change master to requestor
NOT_MASTER, // I am not master, retry
REMASTER_THIS, // remaster lock to me
@@ -120,41 +119,39 @@
** by ocfs_create_modify_file
*/
// FILEFLAG MASK
-#define FLAG_FILE_CREATE 0x00000001
-#define FLAG_FILE_EXTEND 0x00000002
-#define FLAG_FILE_DELETE 0x00000004
-#define FLAG_FILE_RENAME 0x00000008
-#define FLAG_FILE_UPDATE 0x00000010
+#define FLAG_ACQUIRE_LOCK 0x00000001
+#define FLAG_RELEASE_LOCK 0x00000002
+#define FLAG_FILE_EXTEND 0x00000004
+#define FLAG_FILE_DELETE 0x00000008
+#define FLAG_FILE_RENAME 0x00000010
#define FLAG_FILE_RECOVERY 0x00000020
-#define FLAG_FILE_CREATE_DIR 0x00000040
-#define FLAG_FILE_UPDATE_OIN 0x00000080
-#define FLAG_FILE_RELEASE_MASTER 0x00000100
-#define FLAG_RELEASE_DENTRY 0x00000200
-#define FLAG_CHANGE_MASTER 0x00000400
-#define FLAG_FILE_UNUSED5 0x00000800
-#define FLAG_DIR 0x00001000
-#define FLAG_REMASTER 0x00002000
-#define FLAG_FAST_PATH_LOCK 0x00004000
-#define FLAG_TRUNCATE_PAGES 0x00008000
-#define FLAG_FILE_UNUSED6 0x00010000
-//#define FLAG_DEL_NAME 0x00020000
-//#define FLAG_DEL_INODE 0x00040000
-#define FLAG_FILE_UNUSED7 0x00080000
-#define FLAG_FILE_UNUSED8 0x00100000
-#define FLAG_FILE_UNUSED9 0x00200000
-#define FLAG_FILE_RELEASE_CACHE 0x00400000
-#define FLAG_FILE_UNUSED10 0x00800000
-#define FLAG_FILE_UNUSED11 0x01000000
-#define FLAG_FILE_UNUSED12 0x02000000
-#define FLAG_FILE_UNUSED13 0x04000000
-#define FLAG_FILE_TRUNCATE 0x08000000
-#define FLAG_DROP_READONLY 0x10000000
-#define FLAG_READDIR 0x20000000
-#define FLAG_ACQUIRE_LOCK 0x40000000
-#define FLAG_RELEASE_LOCK 0x80000000
-
+#define FLAG_FILE_UPDATE_OIN 0x00000040
+#define FLAG_RELEASE_DENTRY 0x00000080
+#define FLAG_CHANGE_MASTER 0x00000100
+#define FLAG_DIR 0x00000200
+#define FLAG_REMASTER 0x00000400
+#define FLAG_FAST_PATH_LOCK 0x00000800
+#define FLAG_TRUNCATE_PAGES 0x00001000
+#define FLAG_FILE_TRUNCATE 0x00002000
+#define FLAG_DROP_READONLY 0x00004000
+#define FLAG_READDIR 0x00008000
+#define FLAG_FILE_UNUSED01 0x00010000
+#define FLAG_FILE_UNUSED02 0x00020000
+#define FLAG_FILE_UNUSED03 0x00040000
+#define FLAG_FILE_UNUSED04 0x00080000
+#define FLAG_FILE_UNUSED05 0x00100000
+#define FLAG_FILE_UNUSED06 0x00200000
+#define FLAG_FILE_UNUSED07 0x00400000
+#define FLAG_FILE_UNUSED08 0x00800000
+#define FLAG_FILE_UNUSED09 0x01000000
+#define FLAG_FILE_UNUSED10 0x02000000
+#define FLAG_FILE_UNUSED11 0x04000000
+#define FLAG_FILE_UNUSED12 0x08000000
+#define FLAG_FILE_UNUSED13 0x10000000
+#define FLAG_FILE_UNUSED14 0x20000000
+#define FLAG_FILE_UNUSED15 0x40000000
+#define FLAG_FILE_UNUSED16 0x80000000
-
#define OCFS_MAX_OSB_ID 65536
Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/ocfs_journal.h 2004-08-19 22:18:00 UTC (rev 1364)
@@ -119,16 +119,23 @@
/* convenience function to check if an inode has been checkpointed
* yet. Replaces ocfs_journal_new_file_search. Will do you a favor and
* set created_trans = 0 when you've been checkpointed.
- * returns '0' if the inode hasn't been checkpointed yet, 1 otherwise
+ * returns '1' if the inode hasn't been checkpointed yet.
+ *
*/
static inline int ocfs_inode_is_new(ocfs_super *osb,
struct inode *inode)
{
int ret;
+
+ /* System files are never "new" as they're written out by
+ * mkfs. This helps us early during mount, before we have the
+ * journal open and trans_id could be junk. */
+ if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
+ return(0);
spin_lock(&trans_inc_lock);
- ret = time_after(osb->journal->trans_id,
- OCFS_I(inode)->ip_created_trans);
- if (ret)
+ ret = !(time_after(osb->journal->trans_id,
+ OCFS_I(inode)->ip_created_trans));
+ if (!ret)
OCFS_I(inode)->ip_created_trans = 0;
spin_unlock(&trans_inc_lock);
return ret;
Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c 2004-08-19 20:38:04 UTC (rev 1363)
+++ trunk/src/vote.c 2004-08-19 22:18:00 UTC (rev 1364)
@@ -898,7 +898,7 @@
spin_lock (&obj->lock);
if (obj->vote_status >= 0 && obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
- } else if (obj->vote_status != -EAGAIN || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
+ } else if ((obj->vote_status != -EAGAIN && obj->vote_status != -EBUSY) || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
#warning "should we even be erroring here at all!"
LOG_ERROR_ARGS("inode %llu, vote_status=%d, vote_state=%d, "
"lockid=%llu, flags = 0x%x, asked type = %u "
More information about the Ocfs2-commits
mailing list