[Ocfs2-commits] khackel commits r1441 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Sep 9 03:03:28 CDT 2004
Author: khackel
Date: 2004-09-09 03:03:26 -0500 (Thu, 09 Sep 2004)
New Revision: 1441
Modified:
trunk/src/dlm.c
trunk/src/inode.c
trunk/src/journal.c
trunk/src/lockres.c
trunk/src/lockres.h
trunk/src/nm.c
trunk/src/nm.h
trunk/src/ocfs.h
trunk/src/ocfs_log.h
trunk/src/vote.c
Log:
changes lockres to rwsem... any use of the lockres that will not make changes should be changed to a read lock for performance
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/dlm.c 2004-09-09 08:03:26 UTC (rev 1441)
@@ -129,14 +129,14 @@
OCFS_I(inode)->ip_blkno, message_flags);
while (status == -EAGAIN) {
- ocfs_acquire_lockres(lockres, 0); // ocfs_file_open
+ ocfs_acquire_lockres_write(inode);
flags = message_flags;
if (ocfs_inode_is_new(osb, inode))
flags |= FLAG_FAST_PATH_LOCK;
if (ocfs_task_interruptible ()) {
- ocfs_release_lockres (lockres);
+ ocfs_release_lockres_write (inode);
LOG_TRACE_ARGS("interrupted... inode = %llu\n",
OCFS_I(inode)->ip_blkno);
status = -EINTR;
@@ -148,7 +148,7 @@
if (status < 0) {
if (status != -EAGAIN)
LOG_ERROR_STATUS (status);
- ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
+ ocfs_release_lockres_write (inode); // ocfs_file_open ocfs_symlink
if (status == -EAGAIN || status == -ETIMEDOUT) {
ocfs_sleep (50);
status = -EAGAIN;
@@ -157,7 +157,7 @@
goto bail;
}
- ocfs_release_lockres (lockres); // ocfs_file_open
+ ocfs_release_lockres_write (inode); // ocfs_file_open
}
bail:
LOG_EXIT_STATUS (status);
@@ -222,7 +222,7 @@
}
updated = 0;
- ocfs_acquire_lockres (lockres, 0); // ocfs_acquire_lock
+ ocfs_acquire_lockres_write (inode);
again:
LOG_TRACE_ARGS("attempting to get lock, pass: %d\n", ++k);
@@ -230,7 +230,7 @@
if (!updated) {
status = ocfs_update_lockres(osb, *bh, inode, 1);
if (status < 0) {
- ocfs_release_lockres (lockres);
+ ocfs_release_lockres_write (inode);
LOG_ERROR_STATUS (status);
goto finally;
}
@@ -248,7 +248,7 @@
lockres->master_node_num, no_owner?"no":"yes",
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
LOG_ERROR_STATUS (status = -EINVAL);
- ocfs_release_lockres (lockres); // ocfs_acquire_lock
+ ocfs_release_lockres_write (inode); // ocfs_acquire_lock
goto finally;
}
@@ -258,27 +258,32 @@
ocfs_node_is_alive(&osb->publ_map,
lockres->master_node_num));
if ((owner_dead || wait_on_recovery) &&
- lockres->readonly_node == lockres->master_node_num) {
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
// if owner is dead or in recovery and the lockres
- // has the readonly owner set, clear it
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ // has the readonly flag set, clear it
+ clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
}
status = 0;
extra_lock_flags = 0;
if (flags & FLAG_READONLY) {
- if (lockres->readonly_node != OCFS_INVALID_NODE_NUM)
- goto skip_lock_write;
- if (lockres->master_node_num == osb->node_num &&
- lockres->lock_type == OCFS_LKM_EXMODE) {
- /* local node is master */
+ if (test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ||
+ (lockres->master_node_num == osb->node_num &&
+ lockres->lock_type == OCFS_LKM_EXMODE)) {
+ /* already readonly or local node is master */
+ /* THIS node will see it as readonly, but OTHER
+ * nodes will have to wait until lock_holders drops
+ * to 0 (to finish journal flush on this inode) */
+ if (!test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
#ifdef VERBOSE_LOCKING_TRACE
- printk("acquire_lock: lockid %llu, setting ronode, was=%d, now=%d, master=%d\n", lock_id,
- lockres->readonly_node, osb->node_num, lockres->master_node_num);
+ printk("acquire_lock: lockid %llu, setting readonly\n",
+ lock_id);
#endif
- lockres->readonly_node = osb->node_num;
- goto skip_lock_write;
+ set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
+ }
+ ocfs_release_lockres_write (inode);
+ goto finally;
}
if (lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
@@ -295,17 +300,17 @@
goto do_lock;
}
+#warning NEED MORE HANDLING HERE NOW FOR DROPPING LOCAL READONLY!!!
// anything else is NOT a readonly request
- if (lockres->readonly_node != osb->node_num)
- lockres->readonly_node = OCFS_INVALID_NODE_NUM; // clear any owner
+ clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
status = ocfs_wait_for_readonly_drop(osb, inode);
if (status < 0) {
if (status == -EAGAIN) {
// the rodrop thread is already running and needs the lockres
- ocfs_release_lockres(lockres); // ocfs_acquire_lock
+ ocfs_release_lockres_write(inode);
ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- ocfs_acquire_lockres(lockres, 0); // ocfs_acquire_lock
+ ocfs_acquire_lockres_write(inode);
goto reevaluate;
}
LOG_ERROR_STATUS(status);
@@ -342,10 +347,11 @@
#ifdef VERBOSE_LOCKING_TRACE
printk("acquire_lock: lockid=%llu, this=%d, master=%d, locktype=%d, "
- "flags=%08x, ronode=%d\n",
+ "flags=%08x, readonly=%s\n",
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
osb->node_num, lockres->master_node_num, lockres->lock_type,
- flags|extra_lock_flags, lockres->readonly_node);
+ flags|extra_lock_flags,
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
#endif
if (wait_on_recovery
&& !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
@@ -362,7 +368,7 @@
}
if (ocfs_task_interruptible ()) {
- ocfs_release_lockres (lockres);
+ ocfs_release_lockres_write (inode);
LOG_TRACE_ARGS("interrupted... inode %llu\n",
OCFS_I(inode)->ip_blkno);
status = -EINTR;
@@ -371,7 +377,7 @@
status = new_lock_function(osb, lock_type, flags|extra_lock_flags, *bh, inode);
if (status < 0) {
- ocfs_release_lockres (lockres); // ocfs_acquire_lock
+ ocfs_release_lockres_write (inode); // ocfs_acquire_lock
if (status == -EAGAIN || status == -ETIMEDOUT) {
if (status == -ETIMEDOUT)
LOG_ERROR_ARGS("Timed out acquiring lock for "
@@ -380,7 +386,7 @@
OCFS_I(inode)->ip_blkno,
lock_id);
ocfs_sleep (50);
- ocfs_acquire_lockres(lockres, 0);
+ ocfs_acquire_lockres_write(inode);
/* if we're going to jump back up, we want to update
* if we're not the master... */
if (lockres->master_node_num != osb->node_num)
@@ -402,7 +408,7 @@
LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
LOG_TRACE_ARGS("lockres->uncommitted_holders = %u\n",
lockres->uncommitted_holders);
- ocfs_release_lockres (lockres); // ocfs_acquire_lock
+ ocfs_release_lockres_write (inode); // ocfs_acquire_lock
if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
ocfs2_dinode *fe = (ocfs2_dinode *) (*bh)->b_data;
@@ -430,6 +436,7 @@
__u64 lock_id;
__u32 num_to_send;
ocfs_node_map votemap;
+ int lockres_lock_held = NO_LOCK;
LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
osb, lock_type, flags, lockres);
@@ -442,25 +449,33 @@
flags |= FLAG_RELEASE_LOCK;
- ocfs_acquire_lockres (lockres, 0); // ocfs_release_lock
+ ocfs_acquire_lockres_read (inode);
+ lockres_lock_held = READ_LOCK;
if ((lockres->lock_type == OCFS_LKM_EXMODE) &&
(lockres->master_node_num == osb->node_num) &&
- !(flags & FLAG_FILE_EXTEND) && !(FLAG_FILE_TRUNCATE)) {
+ !(flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE))) {
status = 0;
goto finally;
}
if (flags & FLAG_READONLY) {
if (lockres->lock_type != OCFS_LKM_EXMODE ||
- lockres->master_node_num != lockres->readonly_node ||
- lockres->master_node_num == OCFS_INVALID_NODE_NUM)
- LOG_ERROR_ARGS("READONLY release has issues! type=%d, master=%d, ronode=%d\n",
- lockres->lock_type, lockres->master_node_num, lockres->readonly_node);
+ lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
+ !(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state))) {
+ LOG_ERROR_ARGS("READONLY release has issues! type=%d, master=%d, readonly=%s\n",
+ lockres->lock_type, lockres->master_node_num,
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
+ BUG();
+ }
status = 0;
- goto finally;
+ goto unlock;
}
+ ocfs_release_lockres_read(inode);
+ ocfs_acquire_lockres_write(inode);
+ lockres_lock_held = WRITE_LOCK;
+
OCFS_ASSERT(lockres->uncommitted_holders <= lockres->lock_holders);
num_to_send = num_ident;
@@ -518,9 +533,9 @@
status = -EAGAIN;
LOG_ERROR_ARGS("Timed out releasing lock for inode %llu, retrying...\n", OCFS_I(inode)->ip_blkno);
- ocfs_release_lockres(lockres);
+ ocfs_release_lockres_write(inode);
ocfs_sleep(200);
- ocfs_acquire_lockres(lockres, 0);
+ ocfs_acquire_lockres_write(inode);
continue;
} else
LOG_ERROR_STATUS (status);
@@ -535,8 +550,13 @@
#warning "is this wise, or shouldn't we be retrying the lock release later?"
lockres->lock_holders -= num_ident;
LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
- ocfs_release_lockres (lockres); // ocfs_release_lock
- LOG_EXIT_STATUS (status);
+
+unlock:
+ if (lockres_lock_held == READ_LOCK)
+ ocfs_release_lockres_read (inode);
+ else if (lockres_lock_held == WRITE_LOCK)
+ ocfs_release_lockres_write (inode);
+ LOG_EXIT_STATUS (status);
return (status);
} /* ocfs_release_lock_full */
@@ -558,15 +578,14 @@
if (flags & FLAG_READONLY) {
if (flags & (FLAG_CHANGE_MASTER | FLAG_REMASTER)) {
- /* there is no readonly_node. treat like normal change master. */
+ /* not currently readonly. treat like normal change master. */
flags &= ~FLAG_READONLY;
}
} else if (flags & FLAG_CHANGE_MASTER) {
- /* non-readonly with CHANGE_MASTER should have no readonly_node */
- if (lockres->readonly_node != OCFS_INVALID_NODE_NUM) {
- LOG_ERROR_ARGS("change_master but readonly_node was %d\n",
- lockres->readonly_node);
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ /* non-readonly with CHANGE_MASTER should have no readonly flag */
+ if (test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) {
+ LOG_ERROR_ARGS("change_master but currently readonly\n");
+ clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
}
}
if (flags & (FLAG_CHANGE_MASTER | FLAG_REMASTER)) {
@@ -646,6 +665,10 @@
/* just alerting owner on open */
if (flags & FLAG_TRUNCATE_PAGES)
goto bail;
+
+ /* converted EX to readonly EX */
+ if (flags & FLAG_READONLY)
+ goto bail;
/* drop readonly should remove anyone who has responded */
if (flags & FLAG_DROP_READONLY) {
@@ -653,12 +676,6 @@
goto bail;
}
- /* converted cachelock to readonly cachelock */
- if (flags & FLAG_READONLY) {
- lockres->readonly_node = lockres->master_node_num;
- goto bail;
- }
-
/* update the disk lock */
if (need_lock_write) {
lockres->lock_type = requested_lock;
@@ -683,9 +700,10 @@
}
bail:
- /* if we removed FLAG_READONLY above, set the readonly_node now */
- if (is_readonly && !(flags & FLAG_READONLY)) {
- lockres->readonly_node = lockres->master_node_num;
+ /* if we removed FLAG_READONLY above, or converted an
+ * EX to readonly, set the readonly state now */
+ if (status >= 0 && (is_readonly || flags & FLAG_READONLY)) {
+ set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
}
LOG_EXIT_STATUS (status);
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/inode.c 2004-09-09 08:03:26 UTC (rev 1441)
@@ -881,6 +881,7 @@
struct inode *inode = dentry->d_inode;
int status = 0;
ocfs_super *osb;
+ ocfs_lock_res *lockres;
LOG_SET_CONTEXT(REVALIDATE);
@@ -910,14 +911,15 @@
goto bail;
}
+ lockres = GET_INODE_LOCKRES(inode);
/* if I hold cache lock, no revalidate needed */
- ocfs_acquire_lockres(GET_INODE_LOCKRES(inode), 0);
+ ocfs_acquire_lockres_read(inode);
if (ocfs_is_local_cache_lock(osb, inode)) {
- ocfs_release_lockres(GET_INODE_LOCKRES(inode));
+ ocfs_release_lockres_read(inode);
LOG_TRACE_STR("local cache lock\n");
goto bail;
}
- ocfs_release_lockres(GET_INODE_LOCKRES(inode));
+ ocfs_release_lockres_read(inode);
atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
status = ocfs_verify_update_inode(osb, inode);
@@ -1106,9 +1108,9 @@
goto leave;
}
- ocfs_acquire_lockres(lockres, 0);
+ ocfs_acquire_lockres_write(inode);
status = ocfs_update_lockres (osb, fe_bh, inode, 0);
- ocfs_release_lockres(lockres);
+ ocfs_release_lockres_write(inode);
status = 0;
leave:
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/journal.c 2004-09-09 08:03:26 UTC (rev 1441)
@@ -279,10 +279,10 @@
if (lock->drop_holders) {
lockres = GET_INODE_LOCKRES(lock->inode);
- ocfs_acquire_lockres(lockres, 0);
+ ocfs_acquire_lockres_write(lock->inode);
OCFS_ASSERT(lockres->lock_holders >= lock->drop_holders);
lockres->lock_holders -= lock->drop_holders;
- ocfs_release_lockres(lockres);
+ ocfs_release_lockres_write(lock->inode);
}
iput(lock->inode);
Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/lockres.c 2004-09-09 08:03:26 UTC (rev 1441)
@@ -101,14 +101,6 @@
lockres->lock_type = DISK_LOCK(fe)->dl_level;
lockres->master_node_num = DISK_LOCK(fe)->dl_master;
- if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
- lockres->readonly_node != lockres->master_node_num) {
- LOG_ERROR_ARGS("no longer readonly! ronode=%d, master=%d, lockid=%llu\n",
- lockres->readonly_node, lockres->master_node_num,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
- }
-
out:
if (bh && drop_bh)
brelse(bh);
@@ -117,66 +109,8 @@
return status;
} /* ocfs_update_lockres */
-/*
- * ocfs_acquire_lockres()
- *
- * @lockres: lockres to acquire
- * @timeout: timeout in ms, 0 == no timeout
- */
-int ocfs_acquire_lockres (ocfs_lock_res * lockres, __u32 timeout)
-{
- unsigned long jif;
- int status;
- LOG_ENTRY_ARGS ("(0x%p, %u)\n", lockres, timeout);
-
- OCFS_ASSERT(lockres);
-
- if (!timeout) {
- down(&lockres->lock_mutex);
- status = 0;
- goto bail;
- }
-
- jif = jiffies + (timeout * HZ / 1000);
-
- while(1) {
- if (!down_trylock(&lockres->lock_mutex)) {
- status = 0;
- break;
- }
-
- if (jif < jiffies) {
- status = -ETIMEDOUT;
- break;
- }
-
- ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- }
-
-bail:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_acquire_lockres */
-
/*
- * ocfs_release_lockres()
- *
- */
-void ocfs_release_lockres (ocfs_lock_res * lockres)
-{
- LOG_ENTRY_ARGS ("(0x%p)\n", lockres);
-
- OCFS_ASSERT(lockres);
-
- up(&lockres->lock_mutex);
-
- LOG_EXIT ();
- return;
-} /* ocfs_release_lockres */
-
-
-/*
* ocfs_init_lockres()
*
*/
@@ -187,12 +121,11 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, lockres);
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
- lockres->lock_state = 0;
lockres->lock_holders = 0;
+ lockres->readonly_state = 0;
lockres->uncommitted_holders = 0;
lockres->lock_type = OCFS_LKM_NLMODE;
- init_MUTEX(&lockres->lock_mutex);
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ init_rwsem(&lockres->lock);
ocfs_node_map_init(osb, &lockres->readonly_map);
LOG_EXIT ();
Modified: trunk/src/lockres.h
===================================================================
--- trunk/src/lockres.h 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/lockres.h 2004-09-09 08:03:26 UTC (rev 1441)
@@ -29,11 +29,93 @@
#ifndef OCFS2_LOCKRES_H
#define OCFS2_LOCKRES_H
+#include "util.h"
+
/* lockres.c */
-int ocfs_acquire_lockres(ocfs_lock_res *lockres, __u32 timeout);
+
+
+/*
+ * ocfs_acquire_lockres_write_timeout()
+ *
+ * @lockres: lockres to acquire
+ * @timeout: timeout in ms, 0 == no timeout
+ */
+static inline int ocfs_acquire_lockres_write_timeout (struct inode *inode, __u32 timeout)
+{
+ unsigned long jif = jiffies + (timeout * HZ / 1000);
+ ocfs_lock_res * lockres = GET_INODE_LOCKRES(inode);
+
+ while(1) {
+ if (down_write_trylock(&lockres->lock))
+ return 0;
+
+ if (jif < jiffies)
+ return -ETIMEDOUT;
+
+ ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
+ }
+
+ return 0;
+}
+
+/*
+ * ocfs_acquire_lockres_write()
+ */
+static inline int ocfs_acquire_lockres_write (struct inode *inode)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ down_write(&lockres->lock);
+ return 0;
+}
+
+/*
+ * ocfs_acquire_lockres_read_timeout()
+ *
+ * @lockres: lockres to acquire
+ * @timeout: timeout in ms, 0 == no timeout
+ */
+static inline int ocfs_acquire_lockres_read_timeout (struct inode *inode, __u32 timeout)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ unsigned long jif = jiffies + (timeout * HZ / 1000);
+
+ while(1) {
+ if (down_read_trylock(&lockres->lock))
+ return 0;
+
+ if (jif < jiffies)
+ return -ETIMEDOUT;
+
+ ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
+ }
+
+ return 0;
+}
+
+/*
+ * ocfs_acquire_lockres_read()
+ */
+static inline int ocfs_acquire_lockres_read (struct inode *inode)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ down_read(&lockres->lock);
+ return 0;
+}
+
+static inline void ocfs_release_lockres_write(struct inode *inode)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ up_write(&lockres->lock);
+}
+static inline void ocfs_release_lockres_read(struct inode *inode)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ up_read(&lockres->lock);
+}
+
void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
-void ocfs_release_lockres(ocfs_lock_res *lockres);
int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh,
struct inode *inode, int reread);
+
#endif /* OCFS2_LOCKRES_H */
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/nm.c 2004-09-09 08:03:26 UTC (rev 1441)
@@ -60,8 +60,16 @@
static spinlock_t oin_num_ext_lock = SPIN_LOCK_UNLOCKED;
struct semaphore recovery_list_sem;
-static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, __u32 flags, int status, int *master_alive, struct inode *inode);
+static inline int need_write_lock(ocfs_super *osb, ocfs_lock_res *lockres, __u32 flags);
+static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num,
+ __u32 flags, int *master_alive, int *write_lock,
+ int *change_master, struct inode *inode);
+static int ocfs_process_vote_pre_change_master(int vote_type, int flags, int *vote_response, int *status, struct inode *inode);
+static int ocfs_process_vote_change_master(ocfs_super *osb, int *vote_response, int *status, struct inode *inode, ocfs_lock_res *lockres, __s16 node_num, __u64 lock_id);
+static int ocfs_process_vote_post_change_master(ocfs_super *osb, int vote_type, int flags, int *vote_response, struct inode *inode, ocfs_lock_res *lockres, int *status, __s16 node_num, int *inc_seq);
+static int ocfs_lock_busy(ocfs_super *osb, struct inode *inode, ocfs_lock_res *lockres);
+
static int _ocfs_drop_readonly_cache_lock(void *arg);
typedef struct _ocfs_ro_cache_drop_ctxt
@@ -85,7 +93,6 @@
"INVALID_REQUEST", // reply with a NO vote
"UPDATE_OIN_INODE", // update both oin and inode
"DELETE_ACQUIRE",// delete or rename request
- "DELETE_RELEASE",// delete or rename release request
"CHANGE_MASTER", // request to change master to requestor
"NOT_MASTER", // I am not master, retry
"REMASTER_THIS", // remaster lock to me
@@ -290,42 +297,67 @@
return 0;
} /* ocfs_volume_thread */
+
+// gets a best guess (based on dirty read of lockres)
+// of whether down_read or down_write should be used on lockres
+// NOTE: always RECHECK after getting the lock and follow what
+// get_process_vote_action says
+static inline int need_write_lock(ocfs_super *osb, ocfs_lock_res *lockres, __u32 flags)
+{
+ // always need write access to lockres if not master
+ if (lockres->master_node_num != osb->node_num)
+ return 1;
+ // usually need write access for these so just get it
+ if (flags & (FLAG_CHANGE_MASTER|FLAG_DROP_READONLY|FLAG_READONLY))
+ return 1;
+ // nothing else will need it, assuming it didnt just change under us
+ return 0;
+}
+
static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num,
- __u32 flags, int status, int *master_alive, struct inode *inode)
+ __u32 flags, int *master_alive, int *write_lock,
+ int *change_master, struct inode *inode)
{
int vote_type = INVALID_REQUEST;
int my_node_wins = 0;
+ int this_node_master = 0;
__u64 lockid = 0;
ocfs_vote_obj_lookup_data data;
- LOG_ENTRY_ARGS("(status=%d, lockid=%llu, node_num=%d, flags=%08x)\n", status,
+ LOG_ENTRY_ARGS("(lockid=%llu, node_num=%d, flags=%08x)\n",
lockid, node_num, flags);
- if (inode)
- lockid = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
+ OCFS_ASSERT(inode);
+ OCFS_ASSERT(lockres);
- *master_alive = 1;
+ lockid = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
- if (status < 0) {
- if (status == -ETIMEDOUT) {
- LOG_TRACE_STR("(INVALID_REQUEST) status == -ETIMEDOUT");
- return INVALID_REQUEST;
- }
- } else if (lockres) {
- *master_alive =
- (lockres->master_node_num !=
- OCFS_INVALID_NODE_NUM) &&
- ocfs_node_is_alive(&osb->publ_map,
- lockres->master_node_num);
+ *change_master = 0;
+ *write_lock = 0;
+ this_node_master = (lockres->master_node_num == osb->node_num);
+ *master_alive = (lockres->master_node_num != OCFS_INVALID_NODE_NUM) &&
+ ocfs_node_is_alive(&osb->publ_map, lockres->master_node_num);
- // if an outstanding vote request is found on this lockid
- // and this node number is higher, this node wins
- data.func = ocfs_lookup_obj_by_lockid;
- data.u.s.lock_id = lockid;
- data.ret = NULL;
- if (ocfs_lookup_vote_request_obj(osb, &data) == 0)
- my_node_wins = (node_num < osb->node_num);
+ // if an outstanding vote request is found on this lockid
+ // and this node number is higher, this node wins
+ data.func = ocfs_lookup_obj_by_lockid;
+ data.u.s.lock_id = lockid;
+ data.ret = NULL;
+ if (ocfs_lookup_vote_request_obj(osb, &data) == 0)
+ my_node_wins = (node_num < osb->node_num);
+
+ /* NOTE: FLAG_CHANGE_MASTER may be combined with
+ * other flags and result in a process_vote action
+ * other than CHANGE_MASTER. the change_master
+ * value returned here is independent of this action */
+ if (this_node_master && flags & FLAG_CHANGE_MASTER) {
+ *write_lock = 1;
+ *change_master = 1;
}
+
+ // if this node is not master, we will need to update the lockres
+ if (!this_node_master)
+ *write_lock = 1;
if (flags & (FLAG_RELEASE_DENTRY | FLAG_FILE_RENAME)) {
vote_type = RELEASE_DENTRY;
@@ -334,11 +366,12 @@
if (flags & FLAG_DROP_READONLY) {
vote_type = DROP_READONLY;
+ *write_lock = 1;
goto done;
} else if (flags & FLAG_READONLY) {
- if (lockres->master_node_num == osb->node_num &&
- lockres->lock_type == OCFS_LKM_EXMODE) {
+ if (this_node_master && lockres->lock_type == OCFS_LKM_EXMODE) {
vote_type = READONLY;
+ *write_lock = 1;
} else
vote_type = INVALID_REQUEST;
goto done;
@@ -346,7 +379,7 @@
if (flags & FLAG_FILE_DELETE) {
if (flags & FLAG_RELEASE_LOCK)
- vote_type = DELETE_RELEASE;
+ vote_type = INVALID_REQUEST;
else if (flags & FLAG_ACQUIRE_LOCK)
vote_type = DELETE_ACQUIRE;
else
@@ -359,7 +392,7 @@
vote_type = UPDATE_OIN_INODE;
} else if (flags & FLAG_TRUNCATE_PAGES) {
vote_type = TRUNCATE_PAGES;
- } else if (lockres->master_node_num == osb->node_num) {
+ } else if (this_node_master) {
if (flags & FLAG_CHANGE_MASTER)
vote_type = CHANGE_MASTER;
else {
@@ -375,12 +408,6 @@
vote_type = REMASTER_REQUESTOR;
}
- if (inode == NULL &&
- (vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES)) {
- printk("inode is null and it's not a delete release or a truncate pages!\n");
- vote_type = INVALID_REQUEST;
- }
-
done:
LOG_EXIT_STATUS(vote_type);
return vote_type;
@@ -435,10 +462,10 @@
OCFS_I(inode)->ip_num_extends++;
list_add_tail(&OCFS_I(inode)->ip_recovery_list,
&osb->lock_recovery_lists[node_num]);
-#ifdef VERBOSE_PROCESS_VOTE
- printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
+
+ LOG_TRACE_PROCESS_VOTE("inode %llu, num = %d\n",
OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-#endif
+
spin_unlock(&oin_num_ext_lock);
up(&recovery_list_sem);
@@ -459,22 +486,18 @@
* to get a release with a count > what we've had if
* we mount after the acquires have been sent. */
-#ifdef VERBOSE_PROCESS_VOTE
- printk("ocfs_clear_inode_for_extend: inode %llu, num_rel of "
+ LOG_TRACE_PROCESS_VOTE("inode %llu, num_rel of "
"%d would result in negative count (ip_num_extends "
"= %d)\n",
OCFS_I(inode)->ip_blkno, num_rel,
OCFS_I(inode)->ip_num_extends);
-#endif
OCFS_I(inode)->ip_num_extends = 0;
} else {
OCFS_I(inode)->ip_num_extends -= num_rel;
}
-#ifdef VERBOSE_PROCESS_VOTE
- printk("ocfs_clear_inode_for_extend: inode %llu, num = %d\n",
+ LOG_TRACE_PROCESS_VOTE("inode %llu, num = %d\n",
OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
-#endif
if (!OCFS_I(inode)->ip_num_extends) {
list_del(&OCFS_I(inode)->ip_recovery_list);
@@ -495,9 +518,7 @@
return;
}
-#define PROCESS_VOTE_TRYLOCK
-#ifdef PROCESS_VOTE_TRYLOCK
static int ocfs_io_sem_read_trylock(struct inode *inode, u32 timeout)
{
unsigned long jif;
@@ -534,7 +555,6 @@
return status;
}
-#endif
static int ocfs_process_inode_delete(struct inode *inode)
{
@@ -550,10 +570,8 @@
down (&(OCFS_I(inode)->ip_sem));
/* vote no if the file is still open. */
if (OCFS_I(inode)->ip_open_cnt > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: (delete) open count = %u\n",
+ LOG_TRACE_PROCESS_VOTE("open count = %u\n",
OCFS_I(inode)->ip_open_cnt);
-#endif
up(&(OCFS_I(inode)->ip_sem));
status = 0;
goto done;
@@ -564,9 +582,7 @@
spin_lock(&oin_num_ext_lock);
if (OCFS_I(inode)->ip_num_extends) {
spin_unlock(&oin_num_ext_lock);
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: (delete) extends pending\n");
-#endif
+ LOG_TRACE_PROCESS_VOTE("extends pending\n");
status = 0;
goto done;
}
@@ -576,10 +592,8 @@
* it? We want to make sure the inode is removed completely as
* a result of the iput in process_vote. */
if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: (delete) i_count = %u\n",
+ LOG_TRACE_PROCESS_VOTE("i_count = %u\n",
atomic_read(&inode->i_count));
-#endif
status = 0;
goto done;
}
@@ -607,6 +621,7 @@
up(&OCFS_I(inode)->ip_sem);
}
+
/*
* ocfs_process_vote()
*
@@ -615,37 +630,28 @@
* @node_num: node asking for the vote
*
*/
-int ocfs_process_vote (ocfs_super * osb, ocfs_vote_request_ctxt *ctxt)
+int ocfs_process_vote (ocfs_super * osb, ocfs_dlm_msg *dlm_msg)
{
int status = 0;
int tmpstat = 0;
ocfs_lock_res *lockres = NULL;
__u32 flags, num_ident;
__u16 num_nodes;
- struct buffer_head *fe_bh = NULL;
int vote_type = INVALID_REQUEST, vote_response = 0;
struct inode *inode = NULL;
- int master_alive = 1;
+ int master_alive = 1, change_master = 0, write_lock = 0;
int inc_inode_seq = 0;
int have_io_sem = 0;
int change_master_succeeded = 0;
- ocfs_dlm_msg *dlm_msg = ctxt->dlm_msg;
- __s16 node_num = ctxt->node_num;
+ __s16 node_num = dlm_msg->src_node;
__u64 lock_id, seq_num;
ocfs_dlm_req_master *req_master = NULL;
+ int lockres_lock_held = NO_LOCK;
- LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, ctxt);
+ LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, dlm_msg);
down(&osb->vote_sem);
- if (!dlm_msg) {
- status = -EINVAL;
- LOG_ERROR_STR("invalid vote reply context!");
- LOG_ERROR_STATUS (status);
- LOG_EXIT_STATUS (status);
- return status;
- }
-
num_nodes = osb->max_nodes;
req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
@@ -667,50 +673,70 @@
* instead of letting the other guy's network timeout. */
vote_response = FLAG_VOTE_UPDATE_RETRY;
- /* delete / rename is slightly different -- we don't want to
- * look up the inode in the release case -- it should already
- * be gone. Eventually what we'll really want to do is get it
- * via the old offsets and set the new ones. */
- if ((flags & FLAG_FILE_DELETE) && (flags & FLAG_RELEASE_LOCK))
- inode = NULL;
- else {
- if(flags & FLAG_TRUNCATE_PAGES) {
- inode = ocfs_ilookup(osb,
- lock_id >> osb->sb->s_blocksize_bits);
- if(!inode)
- goto no_inode_ok;
- } else {
- inode = ocfs_iget(osb,
- lock_id >> osb->sb->s_blocksize_bits);
+ if (flags & FLAG_TRUNCATE_PAGES) {
+ inode = ocfs_ilookup(osb,
+ lock_id >> osb->sb->s_blocksize_bits);
+ if(!inode) {
+ vote_type = TRUNCATE_PAGES;
+ goto got_vote_type;
}
+ } else {
+ inode = ocfs_iget(osb,
+ lock_id >> osb->sb->s_blocksize_bits);
+ }
- if (!inode) {
- status = -EFAIL;
- LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
- "node=%u, seq=%llu, flags=0x%x\n",
- lock_id, node_num, seq_num, flags);
- LOG_ERROR_STATUS(status);
- goto vote;
- }
+ if (!inode) {
+ status = -EFAIL;
+ LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
+ "node=%u, seq=%llu, flags=0x%x\n",
+ lock_id, node_num, seq_num, flags);
+ LOG_ERROR_STATUS(status);
+ goto vote;
+ }
- /* ahh, so you find yourself asking "what the
- * heck is this?"
- * Please see the note in ocfs_delete_inode. */
- osb->voting_ino = inode->i_ino;
+ /* ahh, so you find yourself asking "what the
+ * heck is this?"
+ * Please see the note in ocfs_delete_inode. */
+ osb->voting_ino = inode->i_ino;
- lockres = GET_INODE_LOCKRES(inode);
+ lockres = GET_INODE_LOCKRES(inode);
- status = ocfs_acquire_lockres (lockres, (OCFS_NM_HEARTBEAT_TIME/2)); // ocfs_process_vote
- if (status < 0) {
- lockres = NULL;
- LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
- goto vote;
- }
+ // take a good guess...
+ // at worst, we will take 2 passes through
+ write_lock = need_write_lock(osb, lockres, flags);
+retake_lock:
+ OCFS_ASSERT(lockres_lock_held == NO_LOCK);
+ if (write_lock)
+ status = ocfs_acquire_lockres_write_timeout (inode, (OCFS_NM_HEARTBEAT_TIME/2));
+ else
+ status = ocfs_acquire_lockres_read_timeout (inode, (OCFS_NM_HEARTBEAT_TIME/2));
+
+ if (status < 0) {
+ LOG_TRACE_ARGS("Timedout locking lockres for id: %llu\n",
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+ goto vote;
+ } else
+ lockres_lock_held = (write_lock ? WRITE_LOCK : READ_LOCK);
+
+ // find out everything now that a lock is held
+ vote_type = get_process_vote_action(osb, lockres, node_num, flags,
+ &master_alive, &write_lock,
+ &change_master, inode);
+
+ // bummer. we got the wrong lock. get the write lock and start over.
+ if (write_lock && lockres_lock_held == READ_LOCK) {
+ ocfs_release_lockres_read(inode);
+ lockres_lock_held = NO_LOCK;
+ goto retake_lock;
+ }
+
+ if (lockres->master_node_num != osb->node_num) {
/* since we pass a NULL bh, this'll only do a read if
- * we're not the master. */
+ * we're not the master. */
+ OCFS_ASSERT(lockres_lock_held == WRITE_LOCK);
status = ocfs_update_lockres (osb, NULL, inode, 1);
+
if (status < 0) {
if (status != -ETIMEDOUT)
LOG_ERROR_STATUS (status);
@@ -718,50 +744,152 @@
}
}
-no_inode_ok:
- /* fail here if no inode, unless this is a delete/rename release */
- vote_type = get_process_vote_action(osb, lockres, node_num, flags,
- status, &master_alive, inode);
-
-#ifdef VERBOSE_PROCESS_VOTE
- printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, num_ident = %u\n", current->pid,
+got_vote_type:
+
+ LOG_TRACE_PROCESS_VOTE("type: %s, lockid: %llu, action: (%u) %s, num_ident: %u, "
+ "alive: %d, write: %d, change: %d, held: %d\n",
flags & FLAG_RELEASE_LOCK ? "RELEASE" :
(flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
- vote_type, process_vote_strings[vote_type], num_ident);
+ vote_type, process_vote_strings[vote_type], num_ident,
+ master_alive, write_lock, change_master, lockres_lock_held);
+
if (vote_type == INVALID_REQUEST)
- printk("Invalid request! flags = 0x%x master=%d, level=%d\n",
- flags, lockres->master_node_num, lockres->lock_state);
+ printk("Invalid request! flags = 0x%x master=%d, readonly=%s\n",
+ flags, lockres->master_node_num,
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
-#endif
- /* get_process_vote_action will only allow CHANGE_MASTER and
- * RELEASE_CACHE, on a CACHE lock held by this node. the
- * CHANGE_MASTER/RELEASE_CACHE path needs to check the
- * readonly map to see if any nodes need to be updated. */
+ /* get_process_vote_action will only allow CHANGE_MASTER on a CACHE lock
+ * held by this node. the CHANGE_MASTER path needs to check the readonly
+ * map to see if any nodes need to be updated. */
- vote_response = 0;
+ vote_response = FLAG_VOTE_NODE;
- /* some lock requests need to be processed before a possible
- * change master. Beware however that the change_master might
- * very well send a no vote, so you can't do things here that
- * cannot be rolled back. */
- switch (vote_type) {
- case DELETE_ACQUIRE:
- LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
- if (!ocfs_process_inode_delete(inode))
- vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
- else
- vote_response = FLAG_VOTE_NODE;
- break;
- case TRUNCATE_PAGES:
- LOG_TRACE_STR("TRUNCATE_PAGES");
- if(inode) {
- status = ocfs_sync_inode(inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- goto vote;
- }
+ if (ocfs_process_vote_pre_change_master(vote_type, flags, &vote_response, &status, inode))
+ goto vote;
+ if (change_master) {
+ tmpstat = ocfs_process_vote_change_master(osb, &vote_response, &status,
+ inode, lockres, node_num, lock_id);
+ if (tmpstat < 0)
+ goto leave;
+ else if (tmpstat == 1)
+ goto vote;
+ change_master_succeeded = 1;
+ inc_inode_seq = 1;
+ }
+
+ tmpstat = ocfs_process_vote_post_change_master(osb, vote_type, flags, &vote_response, inode,
+ lockres, &status, node_num, &inc_inode_seq);
+
+ /* if we made it this far, and change_master, then it had better be voting yes */
+ if (change_master && vote_response != FLAG_VOTE_NODE)
+ BUG();
+
+ if (have_io_sem && inode)
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 0;
+
+ if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) &&
+ ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
+ (flags & FLAG_RELEASE_LOCK))) {
+ LOG_TRACE_ARGS("responding YES to %s %s request, inode=%p, node=%u\n", flags & FLAG_FILE_EXTEND ?
+ "extend" : "truncate", flags & FLAG_RELEASE_LOCK ?
+ "release" : "acquire", inode, node_num);
+
+ if (flags & FLAG_ACQUIRE_LOCK)
+ ocfs_mark_inode_for_extend(osb, inode, node_num);
+ else if (flags & FLAG_RELEASE_LOCK)
+ ocfs_clear_inode_for_extend(osb, inode, node_num,
+ num_ident);
+ else {
+ printk("uhoh, bad vote flags! 0x%x\n", flags);
+ BUG();
+ }
+ }
+
+vote:
+ status = ocfs_send_vote_reply(osb, dlm_msg, vote_response);
+
+ LOG_TRACE_PROCESS_VOTE("vote: lockid=%llu, node=%d, seqnum=%llu, response=%d\n",
+ lock_id, node_num, seq_num, vote_response);
+
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
+ else {
+ ocfs_compute_dlm_stats (0, vote_response,
+ &(OcfsGlobalCtxt.net_reply_stats));
+ ocfs_compute_dlm_stats (0, vote_response,
+ &(osb->net_reply_stats));
+ }
+
+leave:
+ if (lockres_lock_held == READ_LOCK)
+ ocfs_release_lockres_read (inode);
+ else if (lockres_lock_held == WRITE_LOCK)
+ ocfs_release_lockres_write (inode);
+ lockres_lock_held = NO_LOCK;
+
+ if (!inode)
+ goto no_inode_leave;
+
+ if (have_io_sem) {
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ have_io_sem = 0;
+ }
+
+ if (atomic_read(&OCFS_I(inode)->ip_needs_verification)
+ && vote_type == UPDATE_OIN_INODE) {
+ if (ocfs_io_sem_read_trylock(inode, (OCFS_NM_HEARTBEAT_TIME/2))) {
+ LOG_ERROR_ARGS("Could not verify_update on %llu\n",
+ OCFS_I(inode)->ip_blkno);
+ } else {
+ tmpstat = ocfs_verify_update_inode(osb, inode);
+ if (tmpstat < 0)
+ LOG_ERROR_STATUS(tmpstat);
+ up_read(&OCFS_I(inode)->ip_io_sem);
+ }
+ }
+
+ if (inc_inode_seq) {
+ ocfs_inc_inode_seq(osb, inode);
+ sync_mapping_buffers(inode->i_mapping);
+ }
+ iput(inode);
+
+no_inode_leave:
+ osb->voting_ino = 0;
+
+ up(&osb->vote_sem);
+
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_process_vote */
+
+
+/* some lock requests need to be processed before a possible
+ * change master. Beware however that the change_master might
+ * very well send a no vote, so you can't do things here that
+ * cannot be rolled back. */
+
+/* Returns: 1 if process_vote should vote immediately, 0 otherwise */
+
+static int ocfs_process_vote_pre_change_master(int vote_type, int flags, int *vote_response, int *status, struct inode *inode)
+{
+ if (vote_type == DELETE_ACQUIRE) {
+ LOG_TRACE_STR("DELETE_ACQUIRE (part one)");
+ if (!ocfs_process_inode_delete(inode)) {
+ *vote_response = FLAG_VOTE_OIN_ALREADY_INUSE;
+ return 1;
+ }
+ *vote_response = FLAG_VOTE_NODE;
+ return 0;
+ }
+ if (vote_type == TRUNCATE_PAGES) {
+ LOG_TRACE_STR("TRUNCATE_PAGES");
+ *vote_response = FLAG_VOTE_NODE;
+ if (inode) {
+ *status = ocfs_sync_inode(inode);
+ if (*status >= 0) {
ocfs_truncate_inode_pages(inode, 0);
down(&OCFS_I(inode)->ip_sem);
ocfs_extent_map_destroy(&OCFS_I(inode)->ip_ext_map);
@@ -770,119 +898,145 @@
if (flags & FLAG_FILE_UPDATE_OIN)
atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
up(&OCFS_I(inode)->ip_sem);
+ } else {
+ // if this fails, it's an EIO
+ // should probably BUG
+ LOG_ERROR_STATUS(*status);
+ *vote_response = FLAG_VOTE_UPDATE_RETRY;
+ return 1;
}
- vote_response = FLAG_VOTE_NODE;
- break;
- case INVALID_REQUEST:
- /* we catch INVALID_REQUEST up here now as we
- * don't want to do a change_master on a
- * messed up vote... */
- LOG_TRACE_STR("INVALID_REQUEST");
- goto vote;
- default:
- break;
+ }
+ return 0;
}
+
+ *vote_response = 0;
+ if (vote_type == INVALID_REQUEST) {
+ /* we catch INVALID_REQUEST up here now as we
+ * don't want to do a change_master on a
+ * messed up vote... */
+ LOG_TRACE_STR("INVALID_REQUEST");
+ return 1;
+ }
+ return 0;
+}
- if (vote_response > FLAG_VOTE_NODE) {
- /* we shouldn't even get to the other cases. */
- goto vote;
+
+
+
+static int ocfs_lock_busy(ocfs_super *osb, struct inode *inode, ocfs_lock_res *lockres)
+{
+ /* requestor will need to retry if anyone is using the lockres */
+ if (lockres->lock_holders > 0) {
+ LOG_TRACE_PROCESS_VOTE("Lock id (%llu) has %u holders\n",
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+ lockres->lock_holders);
+ // kick the commit thread
+ atomic_set(&osb->flush_event_woken, 1);
+ wake_up(&osb->flush_event);
+
+ return 1;
}
+ return 0;
+}
- if (lockres && (lockres->master_node_num == osb->node_num)
- && (flags & FLAG_CHANGE_MASTER)) {
- LOG_TRACE_STR("CHANGE_MASTER");
- status = -EFAIL;
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: doing CHANGE_MASTER for this request\n");
-#endif
- /* requestor will need to retry if anyone is using the
- * lockres */
- if (lockres->lock_holders > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: Lock id (%llu) has %u "
- "holders\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- lockres->lock_holders);
-#endif
- // kick the commit thread
- atomic_set(&osb->flush_event_woken, 1);
- wake_up(&osb->flush_event);
+/* Returns: <0 if an I/O error occurred,
+ * 1 if process_vote should vote immediately,
+ * 0 if change master succeeded */
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- status = 0;
- goto vote;
- }
+static int ocfs_process_vote_change_master(ocfs_super *osb, int *vote_response, int *status, struct inode *inode,
+ ocfs_lock_res *lockres, __s16 node_num, __u64 lock_id)
+{
+ struct buffer_head *fe_bh = NULL;
- /* this is currently a readonly cache lock.
- * need to communicate to all the nodes in the
- * map that lock will be changing to RW before we
- * continue. RETRY this request while we spawn
- * off a thread to collect up the communication */
+ /* lockres is held with down_write throughout this call */
+
+ LOG_TRACE_STR("CHANGE_MASTER");
+ LOG_TRACE_PROCESS_VOTE("doing CHANGE_MASTER for this request\n");
+
+ if (ocfs_lock_busy(osb, inode, lockres)) {
+ *vote_response = FLAG_VOTE_UPDATE_RETRY;
+ *status = 0;
+ return 1;
+ }
+
+ /* this is currently a readonly EX lock.
+ * need to communicate to all the nodes in the
+ * map that lock will be changing to RW before we
+ * continue. RETRY this request while we spawn
+ * off a thread to collect up the communication */
+ if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
+ // assumption: node asking for vote has already dropped readonly
+ ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
+ // should not be in there, but...
+ ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- // assumption: node asking for vote has already dropped readonly_node
- ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
- // should not be in there, but...
- ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
- if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
- OCFS_ASSERT(lockres->readonly_node == osb->node_num);
- OCFS_ASSERT(inode);
- status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: node map is not "
- "empty on readonly drop "
- "request\n");
-#endif
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- goto vote;
- }
- // noone left in map, so continue
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ OCFS_ASSERT(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) &&
+ lockres->master_node_num == osb->node_num);
+ OCFS_ASSERT(inode);
+ *status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
+ if (*status < 0)
+ LOG_ERROR_STATUS(*status);
+ LOG_TRACE_PROCESS_VOTE("node map not empty on RO drop request\n");
+ *vote_response = FLAG_VOTE_UPDATE_RETRY;
+ // did not change master, send response
+ return 1;
}
+ // noone left in map, so continue
+ clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
+ }
- sync_mapping_buffers(inode->i_mapping);
+ sync_mapping_buffers(inode->i_mapping);
#warning do we need a truncate_inode_pages here?
- if (lockres->master_node_num != osb->node_num) {
- printk("are we giving away a lock we don't own!?! "
- "inode %llu\n", OCFS_I(inode)->ip_blkno);
- BUG();
- }
+ if (lockres->master_node_num != osb->node_num) {
+ printk("are we giving away a lock we don't own!?! "
+ "inode %llu\n", OCFS_I(inode)->ip_blkno);
+ BUG();
+ }
- status = ocfs_read_bh(osb, lock_id, &fe_bh, OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- lockres->master_node_num = node_num;
- lockres->lock_type = OCFS_LKM_NLMODE;
- ocfs_update_disk_lock(osb, fe_bh, inode);
-
- brelse(fe_bh);
- vote_response = FLAG_VOTE_NODE;
- inc_inode_seq = 1;
- status = 0;
- change_master_succeeded = 1;
+ *status = ocfs_read_bh(osb, lock_id, &fe_bh, OCFS_BH_CACHED,
+ inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS ((*status));
+ return *status;
}
+ lockres->master_node_num = node_num;
+ lockres->lock_type = OCFS_LKM_NLMODE;
+ ocfs_update_disk_lock(osb, fe_bh, inode);
+ brelse(fe_bh);
+ *vote_response = FLAG_VOTE_NODE;
+ *status = 0;
- /* Below here, we can't have any of these cases failing if
- * there was a successfull change master request. */
+ // master successfully changed
+ return 0;
+}
+
+
+
+/* Returns: 1 if process_vote should vote immediately,
+ * 0 on success */
+
+/* we can't have any of these cases failing if the change master already succeeded */
+static int ocfs_process_vote_post_change_master(ocfs_super *osb, int vote_type, int flags, int *vote_response, struct inode *inode, ocfs_lock_res *lockres, int *status, __s16 node_num, int *inc_seq)
+{
switch (vote_type) {
+ case TRUNCATE_PAGES:
+ case CHANGE_MASTER:
+ /* we dealt with this all above. */
+ break;
+
case UPDATE_OIN_INODE:
LOG_TRACE_STR("UPDATE_OIN_INODE");
atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
- vote_response = FLAG_VOTE_OIN_UPDATED;
+ *vote_response = FLAG_VOTE_OIN_UPDATED;
break;
case RELEASE_DENTRY:
- if (!inode)
- BUG();
+ OCFS_ASSERT(inode);
/* we always vote yes on this one. */
- vote_response = FLAG_VOTE_NODE;
+ *vote_response = FLAG_VOTE_NODE;
/* do nothing in the release case... hmm,
* perhaps we should just do a verify_update
@@ -900,21 +1054,16 @@
inode->i_nlink--;
}
- LOG_TRACE_ARGS("pruned dentries for inode %lu, nlink "
- "= %u\n", inode->i_ino, inode->i_nlink);
+ LOG_TRACE_ARGS("pruned dentries for inode %lu, nlink = %u\n",
+ inode->i_ino, inode->i_nlink);
break;
- case DELETE_RELEASE:
- /* ACK and done */
- vote_response = FLAG_VOTE_NODE;
- break;
-
case DELETE_ACQUIRE:
LOG_TRACE_STR("DELETE_ACQUIRE (part two)");
/* If we got this far, then we assume we've
* done the 1st part of the DELETE_ACQUIRE
* case and we just have to commit it. */
- if (vote_response != FLAG_VOTE_NODE)
+ if (*vote_response != FLAG_VOTE_NODE)
BUG();
ocfs_commit_inode_delete(inode);
@@ -922,23 +1071,14 @@
case READONLY:
LOG_TRACE_STR("READONLY");
- OCFS_ASSERT(lockres->readonly_node==osb->node_num ||
- lockres->readonly_node==OCFS_INVALID_NODE_NUM);
+ // WRITELOCK
+ OCFS_ASSERT(!(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) ||
+ lockres->master_node_num == osb->node_num);
- if (lockres->lock_holders > 0) {
-#ifdef VERBOSE_PROCESS_VOTE
- printk("process_vote: (readonly) Lock id (%llu) has %u "
- "holders\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- lockres->lock_holders);
-#endif
- // kick the commit thread
- atomic_set(&osb->flush_event_woken, 1);
- wake_up(&osb->flush_event);
-
- vote_response = FLAG_VOTE_UPDATE_RETRY;
- status = 0;
- break;
+ if (ocfs_lock_busy(osb, inode, lockres)) {
+ *vote_response = FLAG_VOTE_UPDATE_RETRY;
+ *status = 0;
+ return 1;
}
// if the requestor just wants to do readonly, we
@@ -946,9 +1086,9 @@
sync_mapping_buffers(inode->i_mapping);
ocfs_node_map_set_bit(&lockres->readonly_map, node_num);
- lockres->readonly_node = osb->node_num;
- vote_response = FLAG_VOTE_NODE;
- status = 0;
+ set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
+ *vote_response = FLAG_VOTE_NODE;
+ *status = 0;
break;
case DROP_READONLY:
@@ -967,137 +1107,52 @@
/* this path should always succeed on the vote *
* even in the error case. do nothing for error. */
+
+ // WRITELOCK
if (lockres->master_node_num != node_num ||
lockres->lock_type != OCFS_LKM_EXMODE ||
!ocfs_node_map_is_empty(&lockres->readonly_map))
- LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d ronode=%d\n",
+ LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d readonly=%s\n",
lockres->master_node_num, node_num, lockres->lock_type,
- lockres->readonly_node);
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no");
else
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ set_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- status = 0;
- vote_response = FLAG_VOTE_NODE;
- inc_inode_seq = 1;
+ *status = 0;
+ *vote_response = FLAG_VOTE_NODE;
+ *inc_seq = 1;
break;
case NOT_MASTER:
LOG_TRACE_STR("NOT_MASTER");
- vote_response = FLAG_VOTE_UPDATE_RETRY;
+ *vote_response = FLAG_VOTE_UPDATE_RETRY;
if (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE) &&
lockres->master_node_num == node_num) {
LOG_TRACE_STR("owner is requesting extend/truncate");
- vote_response = FLAG_VOTE_NODE;
+ *vote_response = FLAG_VOTE_NODE;
}
break;
case REMASTER_THIS:
LOG_TRACE_STR("REMASTER_THIS");
- vote_response = FLAG_VOTE_UPDATE_RETRY;
+ *vote_response = FLAG_VOTE_UPDATE_RETRY;
break;
case REMASTER_REQUESTOR:
LOG_TRACE_STR("REMASTER_REQUESTOR");
- vote_response = FLAG_VOTE_NODE;
+ *vote_response = FLAG_VOTE_NODE;
break;
- case TRUNCATE_PAGES:
- case CHANGE_MASTER:
- /* we dealt with this all above. */
- break;
-
case INVALID_REQUEST:
default:
LOG_TRACE_STR("INVALID_REQUEST");
- vote_response = 0;
+ *vote_response = 0;
break;
}
+ return 0;
+}
- if (change_master_succeeded &&
- ((vote_response == 0) || (vote_response > FLAG_VOTE_OIN_UPDATED)))
- BUG();
- if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) &&
- ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
- (flags & FLAG_RELEASE_LOCK))) {
-
- LOG_TRACE_ARGS("responding YES to %s %s request, inode=%p, node=%u\n", flags & FLAG_FILE_EXTEND ?
- "extend" : "truncate", flags & FLAG_RELEASE_LOCK ?
- "release" : "acquire", inode, node_num);
-
- if (flags & FLAG_ACQUIRE_LOCK)
- ocfs_mark_inode_for_extend(osb, inode, node_num);
- else if (flags & FLAG_RELEASE_LOCK)
- ocfs_clear_inode_for_extend(osb, inode, node_num,
- num_ident);
- else {
- printk("uhoh, bad vote flags! 0x%x\n", flags);
- BUG();
- }
- }
-
-vote:
- status = ocfs_send_vote_reply(osb, dlm_msg, vote_response);
-
-#ifdef VERBOSE_PROCESS_VOTE
- printk("(%u) vote: lockid=%llu, node=%d, seqnum=%llu, response=%d\n",current->pid, lock_id, node_num, seq_num, vote_response);
-#endif
-
- if (status < 0)
- LOG_ERROR_STATUS (status);
- else {
- ocfs_compute_dlm_stats (0, vote_response,
- &(OcfsGlobalCtxt.net_reply_stats));
- ocfs_compute_dlm_stats (0, vote_response,
- &(osb->net_reply_stats));
- }
-
-leave:
- if (lockres) {
- ocfs_release_lockres (lockres); // ocfs_process_vote
- lockres = NULL;
- }
-
- if (!inode)
- goto no_inode_leave;
-
- if (atomic_read(&OCFS_I(inode)->ip_needs_verification)
- && vote_type == UPDATE_OIN_INODE) {
- have_io_sem = 1;
-#ifdef PROCESS_VOTE_TRYLOCK
- if (ocfs_io_sem_read_trylock(inode, (OCFS_NM_HEARTBEAT_TIME/2))) {
- LOG_ERROR_ARGS("Could not verify_update on %llu\n",
- OCFS_I(inode)->ip_blkno);
- have_io_sem = 0;
- }
-#else
- down_read(&OCFS_I(inode)->ip_io_sem);
-#endif
- if (have_io_sem) {
- tmpstat = ocfs_verify_update_inode(osb, inode);
- if (tmpstat < 0)
- LOG_ERROR_STATUS(tmpstat);
- up_read(&OCFS_I(inode)->ip_io_sem);
- have_io_sem = 0;
- }
- }
-
- if (inc_inode_seq) {
- ocfs_inc_inode_seq(osb, inode);
- sync_mapping_buffers(inode->i_mapping);
- }
- iput(inode);
-
-no_inode_leave:
- osb->voting_ino = 0;
-
- up(&osb->vote_sem);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_process_vote */
-
-
/* inode is definitely non NULL */
void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode)
{
@@ -1169,7 +1224,7 @@
int status = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- if (lockres->lock_state & FLAG_READONLY_DROPPING) {
+ if (test_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state)) {
// if coming from process_vote, go about our merry way
if (yield)
return 0;
@@ -1209,25 +1264,25 @@
/* this will wait until process_vote gets to the release */
if (yield)
- ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
-
+ ocfs_acquire_lockres_write(inode);
/* check these under the lock */
- if (lockres->readonly_node != osb->node_num ||
+ if (!(test_bit(LOCK_STATE_READONLY, &lockres->readonly_state)) ||
lockres->master_node_num != osb->node_num ||
lockres->lock_type != OCFS_LKM_EXMODE) {
- LOG_ERROR_ARGS("bad RO lockres! this=%d, ro_node=%d, master=%d, locktype=%u\n",
- osb->node_num, lockres->readonly_node,
+ LOG_ERROR_ARGS("bad RO lockres! this=%d, readonly=%s, master=%d, locktype=%u\n",
+ osb->node_num,
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) ? "yes" : "no",
lockres->master_node_num, lockres->lock_type);
status = -EINVAL;
goto leave;
}
- if (lockres->lock_state & FLAG_READONLY_DROPPING) {
+ if (test_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state)) {
status = 0;
goto leave;
}
- lockres->lock_state |= FLAG_READONLY_DROPPING;
+ set_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state);
/* remove this node */
ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
@@ -1241,9 +1296,9 @@
status = 0;
if (yield) {
/* from nm thread, give some time to waiters */
- ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+ ocfs_release_lockres_write(inode);
ocfs_sleep(50);
- ocfs_acquire_lockres(lockres, 0); // ocfs_process_vote ocfs_acquire_lock
+ ocfs_acquire_lockres_write(inode);
}
continue;
}
@@ -1253,14 +1308,15 @@
}
if (ocfs_node_map_is_empty(&lockres->readonly_map) &&
- lockres->readonly_node == osb->node_num)
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
+ test_bit(LOCK_STATE_READONLY, &lockres->readonly_state) &&
+ lockres->master_node_num == osb->node_num)
+ clear_bit(LOCK_STATE_READONLY, &lockres->readonly_state);
- lockres->lock_state &= ~FLAG_READONLY_DROPPING;
+ clear_bit(LOCK_STATE_READONLY_DROPPING, &lockres->readonly_state);
leave:
if (yield)
- ocfs_release_lockres(lockres); // ocfs_process_vote ocfs_acquire_lock
+ ocfs_release_lockres_write(inode); // ocfs_process_vote ocfs_acquire_lock
if (inode)
iput(inode);
Modified: trunk/src/nm.h
===================================================================
--- trunk/src/nm.h 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/nm.h 2004-09-09 08:03:26 UTC (rev 1441)
@@ -32,7 +32,7 @@
int ocfs_drop_readonly_cache_lock(ocfs_super *osb, struct inode *inode,
int yield);
void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode);
-int ocfs_process_vote(ocfs_super *osb, ocfs_vote_request_ctxt *ctxt);
+int ocfs_process_vote (ocfs_super * osb, ocfs_dlm_msg *dlm_msg);
int ocfs_recv_thread(void *unused);
void ocfs_recover_oin_locks(ocfs_super *osb, __u32 node_num);
int ocfs_volume_thread(void *arg);
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/ocfs.h 2004-09-09 08:03:26 UTC (rev 1441)
@@ -192,9 +192,22 @@
#define OCFS_INVALID_NODE_NUM -1
-/* lockres->lock_state flags */
-#define FLAG_READONLY_DROPPING 0x00000008
+/* lockres->lock_state bits */
+enum {
+ LOCK_STATE_READONLY,
+ LOCK_STATE_READONLY_DROPPING,
+ LOCK_STATE_BLOCK_EXCLUSIVE,
+ LOCK_STATE_BLOCK_READONLY
+};
+enum {
+ NO_LOCK=0,
+ READ_LOCK,
+ WRITE_LOCK
+};
+
+
+
/* osb->osb_flags flags */
#define OCFS_OSB_FLAGS_BEING_DISMOUNTED (0x00000004)
#define OCFS_OSB_FLAGS_SHUTDOWN (0x00000008)
@@ -343,12 +356,11 @@
struct _ocfs_lock_res
{
__s16 master_node_num; /* Master Node */
- __u32 lock_state;
__u32 lock_holders;
__u32 uncommitted_holders;
__u8 lock_type;
- struct semaphore lock_mutex;
- __s16 readonly_node;
+ struct rw_semaphore lock;
+ int readonly_state;
ocfs_node_map readonly_map;
};
Modified: trunk/src/ocfs_log.h
===================================================================
--- trunk/src/ocfs_log.h 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/ocfs_log.h 2004-09-09 08:03:26 UTC (rev 1441)
@@ -269,6 +269,7 @@
# define LOG_TRACE_ARGS(fmt, arg...)
# define LOG_PID_PRINTK(fmt, arg...)
# define LOG_PID_STR(str)
+# define LOG_TRACE_PROCESS_VOTE(fmt, arg...)
#endif /* !defined(TRACE) */
@@ -354,6 +355,12 @@
#define LOG_TRACE_STR(str) LOG_TRACE_ARGS("%s\n", str)
#define LOG_TRACE_STATUS(val) LOG_TRACE_ARGS("%d\n", val);
+#ifdef VERBOSE_PROCESS_VOTE
+#define LOG_TRACE_PROCESS_VOTE LOG_TRACE_ARGS
+#else
+#define LOG_TRACE_PROCESS_VOTE(fmt, arg...)
+#endif
+
#endif /* TRACE */
Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c 2004-09-09 01:45:20 UTC (rev 1440)
+++ trunk/src/vote.c 2004-09-09 08:03:26 UTC (rev 1441)
@@ -655,7 +655,6 @@
ocfs_dlm_msg *dlm_msg;
ocfs_dlm_req_master *req_master;
struct list_head *iter_osb, *temp_iter;
- ocfs_vote_request_ctxt ctxt;
__s16 src_node;
LOG_ENTRY ();
@@ -690,10 +689,7 @@
switch (dlm_msg->msg_type) {
case OCFS_VOTE_REQUEST:
- ctxt.dlm_msg = dlm_msg;
- ctxt.node_num = dlm_msg->src_node;
- ctxt.status = 0;
- ocfs_process_vote (osb, &ctxt);
+ status = ocfs_process_vote (osb, dlm_msg);
break;
case OCFS_VOTE_REPLY:
@@ -893,7 +889,7 @@
OCFS_I(inode)->ip_blkno, obj->vote_status,
obj->vote_state, lock_id, flags, lock_type,
GET_INODE_LOCKRES(inode)->master_node_num,
- GET_INODE_LOCKRES(inode)->lock_state,
+ GET_INODE_LOCKRES(inode)->readonly_state,
GET_INODE_LOCKRES(inode)->lock_type);
}
*vote_status = obj->vote_status;
More information about the Ocfs2-commits
mailing list