[Ocfs2-commits] mfasheh commits r1351 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Aug 12 15:39:11 CDT 2004
Author: mfasheh
Date: 2004-08-12 14:39:10 -0500 (Thu, 12 Aug 2004)
New Revision: 1351
Modified:
trunk/src/dlm.c
trunk/src/dlm.h
trunk/src/file.c
trunk/src/file.h
trunk/src/journal.c
trunk/src/lockres.c
trunk/src/namei.c
trunk/src/nm.c
trunk/src/ocfs.h
trunk/src/ocfs2_fs.h
trunk/src/super.c
trunk/src/vote.c
trunk/src/vote.h
Log:
* We no longer have an open_map in the on disk lock structure. This
takes the number of used fields in that structure down to basically
owner and type.
* During 1st open, we also no longer force a release_cache message
which was way to heavy as it required a lock level change, rather
than just syncing the inode. Instead we simply send the
TRUNCATE_INODE_PAGES message which does a sync anyway. The really
good thing about this is that there's no meta data I/O on the
recieving nodes now, and no locks need to be taken either.
* as a result some changes had to be made in the extend counting - we
always count now, not just when we have the file open as we no
longer force the owning node to finish all his extends before we can
open it.
* The open_map changes fix the deadlock described in bugzilla #118
* In ocfs_notify_on_open we were doing an ocfs_sleep somewhat
prematurely.
* There was an erroneous up_write in the error handling in
ocfs_file_open.
IMPORTANT NOTE:
We will be incompatible with older revisions as they
still think there's an open map, so you'd better
upgrade all your nodes :)
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/dlm.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -57,10 +57,10 @@
static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u32 num_ident, ocfs_node_map *open_map);
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map);
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u32 num_ident);
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num);
static int ocfs_reset_voting (ocfs_super * osb);
-static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map);
+static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num);
static int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct inode *inode, __u32 num_ident);
@@ -75,22 +75,7 @@
ocfs_node_map_set_from_disk(vote_map, publish->vote_map);
}
-void ocfs_set_disk_lock_open_map(ocfs_super *osb,
- ocfs2_disk_lock *lock,
- ocfs_node_map *open_map)
-{
- ocfs_node_map_set_to_disk(lock->dl_node_map, open_map);
-}
-void ocfs_get_disk_lock_open_map(ocfs_super *osb,
- ocfs2_disk_lock *lock,
- ocfs_node_map *open_map)
-{
- ocfs_node_map_init(osb, open_map);
- ocfs_node_map_set_from_disk(open_map, lock->dl_node_map);
-}
-
-
/* inode is definitely non NULL */
static int ocfs_disk_request_vote(ocfs_super *osb, __u64 lock_id,
__u32 lock_type, __u32 flags,
@@ -98,8 +83,7 @@
__u64 *lock_seq_num,
__u32 time_to_wait,
struct inode *inode,
- __u32 num_ident,
- ocfs_node_map *open_map)
+ __u32 num_ident)
{
int status = 0;
__u64 pub_off;
@@ -203,7 +187,7 @@
if (status >= 0) {
status = ocfs_wait_for_vote (osb, lock_id, lock_type, flags, vote_map,
- time_to_wait, *lock_seq_num, open_map);
+ time_to_wait, *lock_seq_num);
if (status < 0 && status != -EAGAIN)
LOG_ERROR_STATUS (status);
}
@@ -213,165 +197,6 @@
} /* ocfs_disk_request_vote */
/*
- * ocfs_wait_for_lock_release()
- * inode is definitely non NULL
- */
-int ocfs_wait_for_lock_release (ocfs_super * osb, __u64 offset, __u32 time_to_wait, __u32 lock_type, struct inode *inode)
-{
- int status = -ETIMEDOUT;
- int tmpstat = -ETIMEDOUT;
- __u32 timewaited = 0;
- ocfs2_dinode *fe = NULL;
- struct buffer_head *bh = NULL;
- __s16 curr_master;
- __u8 lock_level;
- int disk_vote = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- LOG_ENTRY_ARGS ("(0x%p, %llu, %u, 0x%p, %u)\n",
- osb, offset, time_to_wait,
- lockres, lock_type);
-
- while (time_to_wait > timewaited) {
- bh = NULL;
- fe = NULL;
-
- tmpstat = ocfs_read_bh (osb, offset, &bh, 0, inode);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (status = tmpstat);
- goto finally;
- }
- fe = (ocfs2_dinode *) bh->b_data;
- curr_master = DISK_LOCK(fe)->dl_master;
- lock_level = DISK_LOCK(fe)->dl_level;
-
- if ((curr_master == OCFS_INVALID_NODE_NUM) ||
- (curr_master == osb->node_num)) {
- goto got_it;
- }
-
- if ((!ocfs_node_is_alive(&osb->publ_map, curr_master)) &&
- (!ocfs_node_is_recovering(osb, curr_master))) {
- /* Reset the lock as not owned and return success?? */
- /* This needs to be under some sort of cluster wide lock, */
- DISK_LOCK(fe)->dl_master = OCFS_INVALID_NODE_NUM;
- DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
- tmpstat = ocfs_write_bh (osb, bh, 0, NULL);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (status = tmpstat);
- }
- goto got_it;
- }
-
- /* The local node is not the master */
- if (lock_level == OCFS_LKM_EXMODE) {
- ocfs_acquire_lockres(lockres, 0); // ocfs_file_open ocfs_symlink
- lockres->lock_type = lock_level;
- lockres->master_node_num = curr_master;
-
- if (S_ISDIR(fe->i_mode)) {
- if (lockres->readonly_node != OCFS_INVALID_NODE_NUM) {
- if (lockres->readonly_node == curr_master) {
- // readonly cachelock already on this dir
-#ifdef VERBOSE_LOCKING_TRACE
- printk("ocfs_wait_for_lock_release: ronode=master=%d\n", curr_master);
-#endif
- ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
- goto got_it;
- } else {
- LOG_ERROR_ARGS("(1) readonly node changed! was %d, now master is %d\n",
- lockres->readonly_node, curr_master);
- lockres->readonly_node = OCFS_INVALID_NODE_NUM;
- }
- }
-
- // no readonly node, need to alert owner to get readonly access
- status = new_lock_function(osb, lockres->lock_type,
- FLAG_DIR | FLAG_READDIR | FLAG_ACQUIRE_LOCK,
- bh, &disk_vote, inode);
- if (status < 0) {
- ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
- if (status == -EAGAIN) {
- if (ocfs_task_interruptible ()) {
- LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
- offset);
- status = -EINTR;
- goto finally;
- }
- goto again;
- }
- goto finally;
- }
-
-#ifdef VERBOSE_LOCKING_TRACE
- printk("waitforlockrelease: setting ronode, was=%d, now=%d\n", lockres->readonly_node, lockres->master_node_num);
- printk("ocfs_wait_for_lock_release: cache->readonly ronode=master=%d\n", curr_master);
-#endif
- lockres->readonly_node = lockres->master_node_num;
- ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
- goto got_it;
- } else {
- for (status = -EAGAIN; status==-EAGAIN; ) {
- status = new_lock_function(osb, OCFS_LKM_NLMODE,
- FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE,
- NULL, &disk_vote, inode);
- if (status == -EAGAIN)
- ocfs_sleep(500);
- }
- ocfs_release_lockres(lockres); // ocfs_file_open ocfs_symlink
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- tmpstat = ocfs_read_bh (osb, offset, &bh, 0, inode);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (tmpstat);
- status = tmpstat;
- goto finally;
- }
-#warning "isn't this broken!?!"
- LOG_TRACE_ARGS("broke cache lock, setting to NO_LOCK\n");
- fe = (ocfs2_dinode *) bh->b_data;
- DISK_LOCK(fe)->dl_level = OCFS_LKM_NLMODE;
- lock_level = OCFS_LKM_NLMODE;
- tmpstat = ocfs_write_bh (osb, bh, 0, inode);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (tmpstat);
- status = tmpstat;
- goto finally;
- }
- }
- }
-
-
- if (lock_level <= lock_type)
- goto got_it;
-
-again:
- brelse(bh);
- ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
- timewaited += WAIT_FOR_VOTE_INCREMENT;
- continue;
-got_it:
- status = 0;
- break;
- }
-
-finally:
- if (lockres && status >= 0)
- status = ocfs_update_lockres(osb, &bh, NULL, 0, inode,
- 0, 0);
-
- if (bh)
- brelse(bh);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_wait_for_lock_release */
-
-
-/*
* ocfs_get_vote_on_disk()
*
*/
@@ -379,8 +204,7 @@
__u32 lock_type, __u32 flags,
ocfs_node_map *got_vote_map,
ocfs_node_map *vote_map,
- __u64 lock_seq_num,
- ocfs_node_map *oin_open_map)
+ __u64 lock_seq_num)
{
int status = 0;
__u32 i;
@@ -426,7 +250,6 @@
ocfs_vote_reply_ctxt ctxt;
ctxt.reply_method = DISK_VOTE;
ctxt.got_vote_map = got_vote_map;
- ctxt.open_map = oin_open_map;
ctxt.status = &status;
ctxt.flags = flags;
ctxt.u.vote = vote;
@@ -455,7 +278,7 @@
* ocfs_wait_for_vote()
*
*/
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map)
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num)
{
int status = -EAGAIN;
__u32 timewaited = 0;
@@ -478,7 +301,7 @@
}
status = ocfs_get_vote_on_disk (osb, lock_id, lock_type, flags,
- &gotvotemap, vote_map, lock_seq_num, open_map);
+ &gotvotemap, vote_map, lock_seq_num);
if (status < 0) {
if (status != -EAGAIN)
LOG_ERROR_STATUS (status);
@@ -611,24 +434,6 @@
DISK_LOCK(fe)->dl_level = lockres->lock_type;
}
- if (flags & DLOCK_FLAG_OPEN_MAP) {
- ocfs_node_map disk_map;
-
- ocfs_node_map_init(osb, &disk_map);
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-
- if (flags & DLOCK_FLAG_ADD_SELF)
- ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
-
- if (!ocfs_node_map_is_equal(&disk_map, &lockres->oin_openmap)) {
- if (!changed) {
- fe = (ocfs2_dinode *) (*bh)->b_data;
- }
- changed = 1;
- ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
- }
- }
-
if (flags & DLOCK_FLAG_SEQ_NUM &&
DISK_LOCK(fe)->dl_seq_num != lockres->last_upd_seq_num) {
if (!changed) {
@@ -656,89 +461,17 @@
} /* ocfs_update_disk_lock */
/*
- * ocfs_update_master_on_open()
- * inode is definitely non NULL
+ * ocfs_notify_on_open()
+ *
+ * TODO: if we're not opening for write then lets send an additional
+ * flag over to tell the other node it's not necessary to do the
+ * truncate_inode_pages (he just has to sync).
*/
-int ocfs_update_master_on_open (ocfs_super * osb, struct inode *inode, ocfs_journal_handle *handle)
+int ocfs_notify_on_open (ocfs_super * osb, struct inode *inode)
{
int status = -EAGAIN;
int disk_vote = 0;
- struct buffer_head *bh = NULL;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- LOG_ENTRY ();
-
- /* no need to alert master for these cases */
- if (S_ISDIR (inode->i_mode) ||
- lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
- ocfs_node_is_alive(&lockres->oin_openmap,
- osb->node_num)) {
- status = 0;
- goto bail;
- }
-
- while (status == -EAGAIN) {
- if (!ocfs_node_is_alive(&osb->publ_map,
- lockres->master_node_num)) {
- LOG_TRACE_ARGS ("Master (%u) dead, lockid %llu\n",
- lockres->master_node_num,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
- status = 0;
- goto bail;
- }
-
- ocfs_acquire_lockres(lockres, 0); // ocfs_file_open ocfs_symlink
-
- if (lockres->master_node_num == osb->node_num) {
- ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
- status = ocfs_update_disk_lock (osb, DLOCK_FLAG_OPEN_MAP,
- &bh, inode, handle);
- if (bh)
- brelse(bh);
-
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
- } else {
- status = new_lock_function(osb, lockres->lock_type, FLAG_ADD_OIN_MAP,
- NULL, &disk_vote, inode);
- if (status < 0) {
- if (status != -EAGAIN)
- LOG_ERROR_STATUS (status);
- ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
- if (status == -EAGAIN) {
- ocfs_sleep (500);
- if (ocfs_task_interruptible ()) {
- LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
- status = -EINTR;
- goto bail;
- }
- continue;
- }
-
- goto bail;
- }
- ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
- }
- }
-
-bail:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_update_master_on_open */
-
-/*
- * ocfs_notify_truncate_on_wopen()
- * inode is definitely non NULL
- */
-int ocfs_notify_truncate_on_wopen (ocfs_super * osb, struct inode *inode)
-{
- int status = -EAGAIN;
- int disk_vote = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
u32 flags;
LOG_ENTRY ();
@@ -757,13 +490,13 @@
LOG_ERROR_STATUS (status);
ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
if (status == -EAGAIN) {
- ocfs_sleep (50);
if (ocfs_task_interruptible ()) {
LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
status = -EINTR;
goto bail;
}
+ ocfs_sleep (50);
continue;
}
@@ -775,7 +508,7 @@
bail:
LOG_EXIT_STATUS (status);
return status;
-} /* ocfs_notify_truncate_on_wopen */
+} /* ocfs_notify_on_open */
enum {
invalid_path = 0,
@@ -867,8 +600,6 @@
lockres->master_node_num != DISK_LOCK(disklock)->dl_master) {
lockres->master_node_num = DISK_LOCK(disklock)->dl_master;
lockres->lock_type = DISK_LOCK(disklock)->dl_level;
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(disklock),
- &lockres->oin_openmap);
}
updated = 1;
}
@@ -1011,14 +742,19 @@
}
goto finally;
}
-
+
/* We got the lock */
status = 0;
skip_lock_write:
OCFS_ASSERT(status == 0);
lockres->lock_holders++;
+ if (extra_lock_flags == FLAG_FAST_PATH_LOCK
+ && ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_TRUNCATE)))
+ lockres->uncommitted_holders++;
LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
+ LOG_TRACE_ARGS("lockres->uncommitted_holders = %u\n",
+ lockres->uncommitted_holders);
ocfs_release_lockres (lockres); // ocfs_acquire_lock
finally:
@@ -1044,32 +780,11 @@
int disk_vote = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- /* TODO: the ifdefed out code here used to be used
- * right now, we *broadcast* on lock releases
- * because of the issues surrounding lookup_file_allocation
- * need to restrict this to only the openmap
- * at some later date */
-#if 0
- ocfs_node_map openmap;
-#endif
-
LOG_ENTRY_ARGS ("(0x%p, %llu, %u, %u, 0x%p)\n",
osb, lock_id, lock_type, flags, lockres);
/* TODO: figure out how to properly handle inode updates w/no oin */
- ocfs_node_map_dup(osb, &votemap, &osb->publ_map); // temporary hack, forces broadcast
-
-#if 0
- ocfs_node_map_init(osb, &openmap);
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &openmap);
-
- /* publish_map & openmap for operations that dont need broadcast */
- if (!(flags & (FLAG_FILE_RENAME | FLAG_FILE_DELETE |
- FLAG_FILE_RELEASE_DENTRY | FLAG_FILE_TRUNCATE |
- FLAG_FILE_EXTEND)))
- ocfs_node_map_and(&votemap, &openmap);
-#endif
-
+ ocfs_node_map_dup(osb, &votemap, &osb->publ_map);
ocfs_node_map_clear_bit(&votemap, osb->node_num);
if (ocfs_node_map_is_empty(&votemap))
goto finally;
@@ -1089,7 +804,7 @@
if (!disk_vote) {
LOG_TRACE_STR ("Network vote");
status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags,
- &votemap, NULL, inode, num_ident, &vote_status);
+ &votemap, inode, num_ident, &vote_status);
if (status >= 0 || status == -EAGAIN) {
if (status != -EAGAIN)
status = vote_status;
@@ -1133,7 +848,7 @@
LOG_TRACE_STR ("Disk vote");
disk_vote = 1;
- status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, num_ident, NULL);
+ status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, num_ident);
tmpstat = ocfs_reset_voting (osb);
if (status >=0) {
@@ -1167,6 +882,7 @@
int status = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
__u64 lock_id;
+ __u32 num_to_send = num_ident;
LOG_ENTRY_ARGS ("(0x%p, %u, %u, 0x%p)\n",
osb, lock_type, flags, lockres);
@@ -1201,14 +917,28 @@
if (flags & FLAG_FILE_RELEASE_MASTER)
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
- status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, inode, num_ident);
+ OCFS_ASSERT(lockres->uncommitted_holders <= num_ident);
+
+ /* we don't want to send over a count for any size change
+ * which includes acquires which we also didn't broadcast. */
+ if ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_TRUNCATE)) {
+ num_to_send -= lockres->uncommitted_holders;
+ lockres->uncommitted_holders = 0;
+ if (!num_to_send) {
+ /* it breaks the rules to send zero lock releases! */
+ status = 0;
+ goto finally;
+ }
+ }
+
+ status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, inode, num_to_send);
if (status < 0) {
/* Disable the vol */
LOG_ERROR_STATUS (status);
}
finally:
- if (!lockres->lock_holders) {
+ if (lockres->lock_holders - num_ident < 0) {
printk("About to decrement lock_holders one too many! lockid "
"= %llu\n", lock_id);
BUG();
@@ -1225,10 +955,9 @@
int new_lock_function(ocfs_super * osb, __u32 requested_lock, __u32 flags, struct buffer_head *bh, int *disk_vote, struct inode *inode)
{
ocfs_node_map vote_map;
- ocfs_node_map open_map;
ocfs2_dinode *fe = NULL;
__u64 lock_id;
- __u32 lock_write_flags = DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | DLOCK_FLAG_OPEN_MAP;
+ __u32 lock_write_flags = DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK;
__u32 lock_type = requested_lock;
int need_to_zap_buffers = 0, need_lock_write = 1;
int is_readdir = (flags & FLAG_READDIR) ? 1 : 0;
@@ -1256,8 +985,7 @@
/* on a master change... */
need_to_zap_buffers = 1; /* need to dump local buffers */
need_lock_write = 1; /* and rewrite the lock */
- } else if (flags & (FLAG_ADD_OIN_MAP | FLAG_DROP_READONLY |
- FLAG_TRUNCATE_PAGES)) {
+ } else if (flags & (FLAG_DROP_READONLY | FLAG_TRUNCATE_PAGES)) {
need_lock_write = 0;
need_to_zap_buffers = 0;
} else if (flags & (FLAG_READDIR | FLAG_FILE_RELEASE_CACHE)) {
@@ -1274,8 +1002,6 @@
need_to_zap_buffers = 0;
}
- ocfs_node_map_init(osb, &open_map);
-
/* that's why it's called fast path */
if (flags & FLAG_FAST_PATH_LOCK)
goto vote_success;
@@ -1302,14 +1028,12 @@
if (ocfs_node_map_is_empty(&vote_map)) {
/* As this is the only node alive, make it master of the lock */
- /* no need to update open map */
- lock_write_flags &= ~(DLOCK_FLAG_OPEN_MAP);
goto vote_success;
}
status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type,
flags, &vote_map,
- &open_map, inode, 1,
+ inode, 1,
&vote_status);
if (status >= 0) {
status = vote_status;
@@ -1324,14 +1048,11 @@
}
vote_success:
- if (status >= 0 && flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE))
- ocfs_node_map_set(&lockres->oin_openmap, &open_map);
-
if (need_to_zap_buffers)
ocfs_inc_inode_seq(osb, inode);
/* just alerting owner on open */
- if (flags & (FLAG_ADD_OIN_MAP | FLAG_TRUNCATE_PAGES))
+ if (flags & FLAG_TRUNCATE_PAGES)
goto bail;
/* drop readonly should remove anyone who has responded */
@@ -1358,12 +1079,6 @@
LOG_ERROR_STATUS (status);
}
- /* pick up any change to the open map */
- if (bh) {
- fe = (ocfs2_dinode *) bh->b_data;
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe),
- &lockres->oin_openmap);
- }
bail:
/* if we removed FLAG_READDIR above, set the readonly_node now */
if (is_readdir && !(flags & FLAG_READDIR)) {
Modified: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/dlm.h 2004-08-12 19:39:10 UTC (rev 1351)
@@ -44,17 +44,8 @@
int ocfs_update_disk_lock(ocfs_super *osb, __u32 flags,
struct buffer_head **bh, struct inode *inode,
ocfs_journal_handle *handle);
-int ocfs_update_master_on_open(ocfs_super *osb, struct inode *inode,
- ocfs_journal_handle *handle);
-int ocfs_notify_truncate_on_wopen(ocfs_super *osb, struct inode *inode);
-int ocfs_wait_for_lock_release(ocfs_super *osb, __u64 offset,
- __u32 time_to_wait, __u32 lock_type,
- struct inode *inode);
+int ocfs_notify_on_open(ocfs_super *osb, struct inode *inode);
void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
ocfs_node_map *vote_map);
-void ocfs_set_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
- ocfs_node_map *open_map);
-void ocfs_get_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
- ocfs_node_map *open_map);
#endif /* OCFS2_DLM_H */
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/file.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -60,7 +60,6 @@
static int ocfs_truncate_file(ocfs_super *osb, __u64 file_size,
struct inode *inode);
-
static void ocfs_fe_set_attributes(ocfs2_dinode *fe, struct iattr *attr)
{
if (attr->ia_valid & ATTR_SIZE)
@@ -89,92 +88,91 @@
return sync_mapping_buffers(inode->i_mapping);
}
-/*
- * ocfs_inode_notify_open()
- *
- * you should be holding io_sem and ip_sem in this function.
- * If needed add ourselves to the open map. Only call this
- * on 1st open of a file. Marks the oin as "in use"
- */
-int ocfs_inode_notify_open(ocfs_super *osb, struct buffer_head *fe_bh,
- ocfs_journal_handle *handle, struct inode *inode)
+static void ocfs_block_sigs(sigset_t *oldsigs)
{
- int status = 0;
- ocfs2_dinode *fe = NULL;
- int local_handle = 1;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ sigset_t tmpsig;
- LOG_ENTRY ();
+ siginitsetinv(&tmpsig, SHUTDOWN_SIGS);
+ sigprocmask(SIG_BLOCK, &tmpsig, oldsigs);
+#else
+#ifdef HAVE_NPTL
+ spin_lock_irq (¤t->sighand->siglock);
+ *oldsigs = current->blocked;
+ siginitsetinv (¤t->blocked, SHUTDOWN_SIGS);
+ recalc_sigpending ();
+ spin_unlock_irq (¤t->sighand->siglock);
+#else
+ spin_lock_irq (¤t->sigmask_lock);
+ *oldsigs = current->blocked;
+ siginitsetinv (¤t->blocked, SHUTDOWN_SIGS);
+ recalc_sigpending (current);
+ spin_unlock_irq (¤t->sigmask_lock);
+#endif
+#endif
+}
- if (handle)
- local_handle = 0;
+static void ocfs_unblock_sigs(sigset_t newsig)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ sigprocmask(SIG_SETMASK, newsig, NULL);
+#else
+#ifdef HAVE_NPTL
+ spin_lock_irq (¤t->sighand->siglock);
+ current->blocked = newsig;
+ recalc_sigpending ();
+ spin_unlock_irq (¤t->sighand->siglock);
+#else
+ spin_lock_irq (¤t->sigmask_lock);
+ current->blocked = newsig;
+ recalc_sigpending (current);
+ spin_unlock_irq (¤t->sigmask_lock);
+#endif
+#endif
+}
- fe = (ocfs2_dinode *) fe_bh->b_data;
- if (!IS_VALID_FILE_ENTRY(fe)) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto leave;
- }
+static inline int ocfs_wait_on_first_open(ocfs_super *osb,
+ struct inode *inode)
+{
+ int status = 0;
+ sigset_t tmpsig;
- /* why do we update these here? */
- OCFS_I(inode)->ip_alloc_size =
- (u64)fe->i_clusters << osb->s_clustersize_bits;
- OCFS_I(inode)->ip_chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
- if (fe->i_blkno == 0) {
- LOG_ERROR_STR ("this_sector=0");
- goto leave;
+ ocfs_block_sigs(&tmpsig);
+again:
+ if (signal_pending(current)) {
+ status = -EINTR;
+ goto bail;
}
- if (inode == osb->root_inode)
- goto leave;
+ down(&(OCFS_I(inode)->ip_sem));
+ if (!(OCFS_I(inode)->ip_open_flags & OCFS_IN_FIRST_OPEN))
+ goto bail;
+ up(&(OCFS_I(inode)->ip_sem));
- if (local_handle) {
- handle = ocfs_start_trans(osb, OCFS_OPEN_CREDITS);
- if (!handle) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto leave;
- }
- ocfs_handle_set_always_commits(handle, 1);
- }
+ interruptible_sleep_on(&osb->open_event);
+ goto again;
- /* this will update the lock info from disk while also
- * ensuring that the lock level drops to something
- * compatible with a first-open of the file */
- status = ocfs_wait_for_lock_release(osb,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- 30000,
- (S_ISDIR (inode->i_mode) ?
- OCFS_LKM_EXMODE :
- OCFS_LKM_NLMODE), inode);
- if (status < 0) {
- if (status != -EINTR) {
- LOG_ERROR_STR ("Lock owner is alive and taking too much time");
- LOG_ERROR_STATUS(status);
- }
- goto leave;
- }
+bail:
+ up(&(OCFS_I(inode)->ip_sem));
+ ocfs_unblock_sigs(tmpsig);
- status = ocfs_update_master_on_open (osb, inode, handle);
- if (status < 0 && status != -EINTR)
- LOG_ERROR_STATUS (status);
+ return(status);
+}
-leave:
- if (local_handle && handle)
- ocfs_commit_trans(handle);
+static inline void ocfs_notify_openers(ocfs_super *osb)
+{
+ wake_up(&osb->open_event);
+}
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_inode_notify_open */
-
/*
* ocfs_file_open()
*
*/
static int ocfs_file_open(struct inode *inode, struct file *file)
{
- int ret =0, err = 0, status = 0;
+ int ret =0, err = 0, status = 0, first_open = 0;
int mode = file->f_flags;
ocfs_super *osb = NULL;
- int have_oin_sem = 0;
struct buffer_head *fe_bh = NULL;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
@@ -194,86 +192,71 @@
status = ocfs_verify_update_inode (osb, inode, 0);
up_read (&(OCFS_I(inode)->ip_io_sem));
if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
LOG_ERROR_STATUS (status);
goto leave;
}
}
- down_write (&(OCFS_I(inode)->ip_io_sem));
- down (&(OCFS_I(inode)->ip_sem));
- have_oin_sem = 1;
-
- /* first open, we've gotta update the lock state. */
+ down(&(OCFS_I(inode)->ip_sem));
if (!OCFS_I(inode)->ip_open_cnt) {
- status = ocfs_read_bh(osb,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- &fe_bh, OCFS_BH_CACHED, inode);
- if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
- LOG_ERROR_STATUS(status);
- goto leave;
- }
+ first_open = 1;
+ OCFS_I(inode)->ip_open_flags |= OCFS_IN_FIRST_OPEN;
+ }
+ OCFS_I(inode)->ip_open_cnt++;
+ up(&(OCFS_I(inode)->ip_sem));
- status = ocfs_inode_notify_open(osb, fe_bh, NULL, inode);
- if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
+ if (!first_open)
+ status = ocfs_wait_on_first_open(osb, inode);
+
+ if (status < 0) {
+ if (status != -EINTR)
LOG_ERROR_STATUS(status);
- if (status != -EINTR) {
- LOG_ERROR_ARGS("Status returned %d trying to "
- "notify open on file '%*s'\n",
- status,
- file->f_dentry->d_name.len,
- file->f_dentry->d_name.name);
- status = -ENOENT;
- }
- goto leave;
- }
+ down(&(OCFS_I(inode)->ip_sem));
+ OCFS_I(inode)->ip_open_cnt--;
+ goto leave_unlock;
+ }
- if(OCFS_I(inode)->ip_ext_map.initialized &&
- OCFS_I(inode)->ip_ext_map.count == 0) {
- status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
- if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- }
+ status = 0;
+ if (!(mode & O_DIRECT)
+ && (first_open || (mode & (O_WRONLY|O_RDWR))))
+ status = ocfs_notify_on_open(osb, inode);
+
+ down (&(OCFS_I(inode)->ip_sem));
+ if (first_open) {
+ OCFS_I(inode)->ip_open_flags &= ~OCFS_IN_FIRST_OPEN;
+ ocfs_notify_openers(osb);
}
- if((mode & (O_WRONLY|O_RDWR)) && !(mode & O_DIRECT)) {
- status = ocfs_notify_truncate_on_wopen(osb, inode);
- if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
- LOG_ERROR_STATUS(status);
- goto leave;
- }
+ if (status < 0) {
+ OCFS_I(inode)->ip_open_cnt--;
+ LOG_ERROR_STATUS(status);
+ goto leave_unlock;
}
- up_write(&OCFS_I(inode)->ip_io_sem);
- /* yes, hold onto ip_sem. */
+ if (OCFS_I(inode)->ip_open_cnt > 1) {
+ /* We're not the only person who has it open right
+ * now so lets check whether the requested
+ * access/share access conflicts with the existing
+ * open operations. */
- if (OCFS_I(inode)->ip_open_cnt > 0) {
- /* The OIN is currently in use by some thread. */
- /* We must check whether the requested access/share access */
- /* conflicts with the existing open operations. */
-
LOG_TRACE_ARGS ("oin->ip_open_cnt > 0! : %u\n",
OCFS_I(inode)->ip_open_cnt);
if (!(mode & O_DIRECT)) {
if ((OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) && !(mode & O_RDONLY)) {
+ OCFS_I(inode)->ip_open_cnt--;
status = -EACCES;
LOG_TRACE_STR("file is already open O_DIRECT, "
"cannot open non O_DIRECT");
- goto leave;
+ goto leave_unlock;
}
} else if (mode & O_DIRECT) {
if (!(OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
+ OCFS_I(inode)->ip_open_cnt--;
status = -EACCES;
LOG_TRACE_STR("file is already open non " \
"O_DIRECT, cannot open " \
"O_DIRECT");
- goto leave;
+ goto leave_unlock;
}
}
status = 0;
@@ -284,18 +267,9 @@
OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
}
- OCFS_I(inode)->ip_open_cnt++;
-
- if (have_oin_sem) {
- up (&(OCFS_I(inode)->ip_sem));
- have_oin_sem = 0;
- }
-
+leave_unlock:
+ up (&(OCFS_I(inode)->ip_sem));
leave:
- if (have_oin_sem) {
- up (&(OCFS_I(inode)->ip_sem));
- have_oin_sem = 0;
- }
if (fe_bh)
brelse(fe_bh);
@@ -358,29 +332,10 @@
OCFS_I(inode)->ip_open_cnt, osb->file_open_cnt,
atomic_read(&dentry->d_count));
- if (last_close) {
- if (OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)
- OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
-
- /* we might still be holding ip_extend_sem on
- * behalf of another node, so release it here. */
- down(&recovery_list_sem);
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(inode)->ip_num_extends) {
- LOG_TRACE_ARGS("ocfs2: closing inode %llu -- had %u "
- "extends pending from another node!\n",
- OCFS_I(inode)->ip_blkno,
- OCFS_I(inode)->ip_num_extends);
- OCFS_I(inode)->ip_num_extends = 0;
- list_del(&OCFS_I(inode)->ip_recovery_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->ip_recovery_list);
- up_write(&OCFS_I(inode)->ip_extend_sem);
-
- dec = 1;
- }
- spin_unlock(&oin_num_ext_lock);
- up(&recovery_list_sem);
- }
+ if (last_close
+ && (OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO))
+ OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags,
+ OCFS_OIN_OPEN_FOR_DIRECTIO);
up (&(OCFS_I(inode)->ip_sem));
bail:
Modified: trunk/src/file.h
===================================================================
--- trunk/src/file.h 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/file.h 2004-08-12 19:39:10 UTC (rev 1351)
@@ -39,9 +39,6 @@
int system_file, struct buffer_head *fe_bh);
int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
struct inode *inode);
-int ocfs_inode_notify_open(ocfs_super *osb, struct buffer_head *fe_bh,
- ocfs_journal_handle *handle,
- struct inode *inode);
int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs_sync_inode(struct inode *inode);
int ocfs2_zero_extend(struct inode *inode);
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/journal.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -1090,16 +1090,6 @@
LOG_ERROR_ARGS("inode and fe i_size's differ! (%llu != %llu",
inode->i_size, fe->i_size);
- /* add this node to openmap and update disk lock */
- status = ocfs_update_disk_lock (osb,
- DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
- &bh, inode, NULL);
- if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
-
- LOG_ERROR_STATUS(status);
- goto done;
- }
OCFS_I(inode)->ip_open_cnt++;
up_write(&OCFS_I(inode)->ip_io_sem);
@@ -1532,18 +1522,6 @@
OCFS_I(inode)->ip_alloc_size = alloc_size;
- /* add this node to openmap and update disk lock */
- down_write(&OCFS_I(inode)->ip_io_sem);
-
- status = ocfs_update_disk_lock (osb,
- DLOCK_FLAG_OPEN_MAP|DLOCK_FLAG_ADD_SELF,
- &bh, inode, NULL);
- up_write(&OCFS_I(inode)->ip_io_sem);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto done;
- }
-
status = ocfs_force_read_journal(osb, inode->i_size, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/lockres.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -103,7 +103,6 @@
lockres->lock_type = DISK_LOCK(fe)->dl_level;
lockres->master_node_num = DISK_LOCK(fe)->dl_master;
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
lockres->readonly_node != lockres->master_node_num) {
@@ -205,7 +204,6 @@
lockres->lock_type = OCFS_LKM_NLMODE;
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
lockres->last_upd_seq_num = 0;
- ocfs_node_map_init(osb, &lockres->oin_openmap);
lockres->lock_state = 0;
init_MUTEX(&lockres->lock_mutex);
@@ -214,7 +212,7 @@
lockres->readonly_node = OCFS_INVALID_NODE_NUM;
lockres->lock_holders = 0;
- LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
+ lockres->uncommitted_holders = 0;
LOG_EXIT ();
return;
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/namei.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -375,7 +375,6 @@
u64 fe_blkno = 0;
__u64 fileOffset = 0;
struct inode *inode_alloc_inode = NULL;
- ocfs_node_map just_me;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
@@ -383,9 +382,6 @@
OCFS_ASSERT(new_fe_bh);
*new_fe_bh = NULL;
- ocfs_node_map_init(osb, &just_me);
- ocfs_node_map_set_bit(&just_me, osb->node_num);
-
inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
if (!inode_alloc_inode) {
status = -EFAIL;
@@ -452,7 +448,6 @@
DISK_LOCK(fe)->dl_seq_num = 0;
DISK_LOCK(fe)->dl_master = osb->node_num;
DISK_LOCK(fe)->dl_level = OCFS_LKM_EXMODE;
- ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &just_me);
fe->i_atime = fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
fe->i_dtime = 0;
@@ -1512,12 +1507,6 @@
inode->i_size = newsize;
inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
- status = ocfs_inode_notify_open(osb, new_fe_bh, handle, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto abort_trans;
- }
-
status = ocfs_inode_fill_ext_map(osb, new_fe_bh, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/nm.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -88,7 +88,6 @@
"DELETE_RELEASE",// delete or rename release request
"RELEASE_CACHE", // release a cache lock I hold
"CHANGE_MASTER", // request to change master to requestor
- "ADD_OIN_MAP", // add requestor into oin map
"NOT_MASTER", // I am not master, retry
"REMASTER_THIS", // remaster lock to me
"REMASTER_REQUESTOR", // remaster lock to requestor
@@ -527,8 +526,6 @@
} else if (lockres->master_node_num == osb->node_num) {
if (flags & FLAG_CHANGE_MASTER)
vote_type = CHANGE_MASTER;
- else if (flags & FLAG_ADD_OIN_MAP)
- vote_type = ADD_OIN_MAP;
else {
LOG_TRACE_STR("(INVALID_REQUEST) am master, but no more types");
vote_type = INVALID_REQUEST;
@@ -550,18 +547,11 @@
return vote_type;
}
+/* this function requires that callers to it be serialized (isn't
+ * really a problem as vote_sem does that for us. */
static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
__u32 node_num)
{
-
- down(&OCFS_I(inode)->ip_sem);
-
- /* if we don't have it open, then don't bother with this. */
- if (!OCFS_I(inode)->ip_open_cnt) {
- up(&OCFS_I(inode)->ip_sem);
- return;
- }
-
spin_lock(&oin_num_ext_lock);
if (OCFS_I(inode)->ip_num_extends < 0)
@@ -571,8 +561,11 @@
* the counter. */
if (OCFS_I(inode)->ip_num_extends > 0) {
OCFS_I(inode)->ip_num_extends++;
+
+ printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
+ OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
+
spin_unlock(&oin_num_ext_lock);
- up(&OCFS_I(inode)->ip_sem);
return;
}
@@ -580,7 +573,6 @@
* this holding ip_sem so we drop it and recheck after we've
* got it. */
spin_unlock(&oin_num_ext_lock);
- up(&OCFS_I(inode)->ip_sem);
/* take the extend_sem on behalf of
* this other node. It won't be
@@ -591,26 +583,26 @@
* inode. */
down_write(&OCFS_I(inode)->ip_extend_sem);
- down(&OCFS_I(inode)->ip_sem);
- if (!OCFS_I(inode)->ip_open_cnt) {
- up_write(&OCFS_I(inode)->ip_extend_sem);
- up(&OCFS_I(inode)->ip_sem);
- return;
- }
-
atomic_inc(&inode->i_count);
/* Ok, we've still got it open. Put this guy on the recovery
* list in case the extending node dies. */
down(&recovery_list_sem);
spin_lock(&oin_num_ext_lock);
+
+ if (OCFS_I(inode)->ip_num_extends < 0)
+ BUG();
+
OCFS_I(inode)->ip_num_extends++;
list_add_tail(&OCFS_I(inode)->ip_recovery_list,
&osb->lock_recovery_lists[node_num]);
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("ocfs_mark_inode_for_extend: inode %llu, num = %d\n",
+ OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
+#endif
spin_unlock(&oin_num_ext_lock);
up(&recovery_list_sem);
- up(&OCFS_I(inode)->ip_sem);
return;
}
@@ -619,21 +611,32 @@
{
int dec = 0;
- down(&OCFS_I(inode)->ip_sem);
-
- /* if we no longer have it open, then the close path has dealt
- * with this. */
- if (!OCFS_I(inode)->ip_open_cnt)
- goto done;
-
down(&recovery_list_sem);
spin_lock(&oin_num_ext_lock);
- OCFS_I(inode)->ip_num_extends -= num_rel;
+ if ((OCFS_I(inode)->ip_num_extends - (s32) num_rel) < 0) {
+ /* We don't force to zero here in order to cover up a
+ * bug, but rather because it's perfectly valid for us
+ * to get a release with a count > what we've had if
+ * we mount after the acquires have been sent. */
- if (OCFS_I(inode)->ip_num_extends < 0)
- BUG();
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("ocfs_clear_inode_for_extend: inode %llu, num_rel of "
+ "%d would result in negative count (ip_num_extends "
+ "= %d)\n",
+ OCFS_I(inode)->ip_blkno, num_rel,
+ OCFS_I(inode)->ip_num_extends);
+#endif
+ OCFS_I(inode)->ip_num_extends = 0;
+ } else {
+ OCFS_I(inode)->ip_num_extends -= num_rel;
+ }
+#ifdef VERBOSE_PROCESS_VOTE
+ printk("ocfs_clear_inode_for_extend: inode %llu, num = %d\n",
+ OCFS_I(inode)->ip_blkno, OCFS_I(inode)->ip_num_extends);
+#endif
+
if (!OCFS_I(inode)->ip_num_extends) {
list_del(&OCFS_I(inode)->ip_recovery_list);
INIT_LIST_HEAD(&OCFS_I(inode)->ip_recovery_list);
@@ -646,9 +649,6 @@
spin_unlock(&oin_num_ext_lock);
up(&recovery_list_sem);
-done:
- up(&OCFS_I(inode)->ip_sem);
-
/* we want iputs to happen outside of as many locks as possible. */
if (dec)
iput(inode);
@@ -763,7 +763,6 @@
ocfs_dlm_msg *dlm_msg = (comm_vote ? ctxt->u.dlm_msg : NULL);
__s16 node_num = ctxt->node_num;
__u64 lock_id, seq_num;
- ocfs_node_map disk_map;
LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, ctxt);
@@ -841,24 +840,18 @@
* Please see the note in ocfs_delete_inode. */
osb->voting_ino = inode->i_ino;
-#warning "FLAG_ADD_OIN_MAP shouldn't do io anyways!"
#ifdef PROCESS_VOTE_TRYLOCK
- if (flags & (FLAG_FILE_RELEASE_CACHE | FLAG_CHANGE_MASTER
- | FLAG_ADD_OIN_MAP)) {
-
+ if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
if (ocfs_io_sem_write_trylock(inode,
(OCFS_NM_HEARTBEAT_TIME/2))) {
LOG_ERROR_ARGS("Could not get i/o lock on "
- "inode %llu\n",
- OCFS_I(inode)->ip_blkno);
- if (flags & FLAG_ADD_OIN_MAP)
- goto leave;
+ "inode %llu, flags 0x%x\n",
+ OCFS_I(inode)->ip_blkno, flags);
} else
have_io_sem = 1;
}
#else
- if (flags & (FLAG_FILE_RELEASE_CACHE | FLAG_CHANGE_MASTER
- | FLAG_ADD_OIN_MAP)) {
+ if (flags & (FLAG_FILE_RELEASE_CACHE|FLAG_CHANGE_MASTER)) {
down_write(&OCFS_I(inode)->ip_io_sem);
have_io_sem = 1;
}
@@ -899,13 +892,10 @@
printk("Invalid request! flags = 0x%x\n", flags);
#endif
- /* get_process_vote_action will only allow CHANGE_MASTER,
- * RELEASE_CACHE, and ADD_OIN_MAP on a CACHE lock held by this
- * node. the CHANGE_MASTER/RELEASE_CACHE path needs to check
- * the readonly map to see if any nodes need to be updated.
- * this is not necessary for the ADD_OIN_MAP path since it
- * cannot actually modify any data or metadata under the lock.
- */
+ /* get_process_vote_action will only allow CHANGE_MASTER and
+ * RELEASE_CACHE, on a CACHE lock held by this node. the
+ * CHANGE_MASTER/RELEASE_CACHE path needs to check the
+ * readonly map to see if any nodes need to be updated. */
if (disk_vote) {
/* Zero out the vote for everybody, if any already set
@@ -1135,12 +1125,6 @@
is_locked = DISK_LOCK(fe)->dl_level > OCFS_LKM_NLMODE;
if (vote_type == CHANGE_MASTER) {
fe = (ocfs2_dinode *) fe_bh->b_data;
- if (OCFS_I(inode)->ip_open_cnt) {
- ocfs_node_map_init(osb, &disk_map);
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
- ocfs_node_map_set_bit(&disk_map, osb->node_num);
- ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
- }
DISK_LOCK(fe)->dl_master = node_num;
}
@@ -1172,39 +1156,6 @@
status = 0;
break;
- case ADD_OIN_MAP:
- LOG_TRACE_STR("ADD_OIN_MAP");
- status = ocfs_read_bh(osb, lock_id, &fe_bh, lockflags, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- break;
- }
- fe = (ocfs2_dinode *) fe_bh->b_data;
-
- if (!(fe->i_flags & OCFS2_VALID_FL)) {
- vote_response = FLAG_VOTE_FILE_DEL;
- } else {
- ocfs_node_map_init(osb, &disk_map);
- fe = (ocfs2_dinode *) fe_bh->b_data;
- ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
- ocfs_node_map_set_bit(&disk_map, node_num);
- ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
-
- /* Write new map on the disk */
- status = ocfs_write_bh(osb, fe_bh, 0, inode);
- if (status < 0) {
- brelse(fe_bh);
- LOG_ERROR_STATUS (status);
- break;
- }
-
- /* Add this node to the oin map on the file entry */
- ocfs_node_map_set(&lockres->oin_openmap, &disk_map);
- vote_response = FLAG_VOTE_NODE;
- }
- brelse(fe_bh);
- break;
-
case DROP_READONLY:
/* TODO: may need locking in here to lock out
* the actual IO that a readdir may have in
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/ocfs.h 2004-08-12 19:39:10 UTC (rev 1351)
@@ -83,7 +83,6 @@
DELETE_RELEASE,// delete or rename release request
RELEASE_CACHE, // release a cache lock I hold
CHANGE_MASTER, // request to change master to requestor
- ADD_OIN_MAP, // add requestor into oin map
NOT_MASTER, // I am not master, retry
REMASTER_THIS, // remaster lock to me
REMASTER_REQUESTOR, // remaster lock to requestor
@@ -132,7 +131,7 @@
#define FLAG_FILE_RELEASE_MASTER 0x00000100
#define FLAG_RELEASE_DENTRY 0x00000200
#define FLAG_CHANGE_MASTER 0x00000400
-#define FLAG_ADD_OIN_MAP 0x00000800
+#define FLAG_FILE_UNUSED5 0x00000800
#define FLAG_DIR 0x00001000
#define FLAG_REMASTER 0x00002000
#define FLAG_FAST_PATH_LOCK 0x00004000
@@ -315,13 +314,13 @@
#define OCFS2_SB(sb) ((ocfs_super *)OCFS_GENERIC_SB_MEMBER(sb))
-#define DLOCK_FLAG_OPEN_MAP (0x1)
+#define DLOCK_FLAG_UNUSED1 (0x1)
#define DLOCK_FLAG_LOCK (0x2)
#define DLOCK_FLAG_SEQ_NUM (0x4)
#define DLOCK_FLAG_MASTER (0x8)
#define DLOCK_FLAG_LAST_UPDATE (0x10)
-#define DLOCK_FLAG_ADD_SELF (0x20)
-#define DLOCK_FLAG_ALL (DLOCK_FLAG_OPEN_MAP | DLOCK_FLAG_LOCK | \
+#define DLOCK_FLAG_UNUSED2 (0x20)
+#define DLOCK_FLAG_ALL (DLOCK_FLAG_LOCK | \
DLOCK_FLAG_SEQ_NUM | DLOCK_FLAG_MASTER | \
DLOCK_FLAG_LAST_UPDATE)
@@ -390,11 +389,12 @@
__s16 master_node_num; /* Master Node */
__u32 lock_state;
__u32 lock_holders;
+ __u32 uncommitted_holders;
__u8 lock_type;
struct semaphore lock_mutex;
__s16 readonly_node;
ocfs_node_map readonly_map;
- ocfs_node_map oin_openmap;
+ //ocfs_node_map oin_openmap;
__u64 last_upd_seq_num;
};
@@ -436,7 +436,7 @@
struct rw_semaphore ip_extend_sem;
struct list_head ip_recovery_list;/* protected by recovery_list_sem */
- __u32 ip_num_extends; /* protected by oin_num_ext_lock */
+ __s32 ip_num_extends; /* protected by oin_num_ext_lock */
atomic_t ip_clean_buffer_seq;
__u32 ip_flags; /* see below */
@@ -472,7 +472,8 @@
/* Eventually, the 'flags' and 'open_flags' fields need to be
* merged. */
/* open flags */
-#define OCFS_OIN_OPEN_FOR_DIRECTIO (0x00000008)
+#define OCFS_OIN_OPEN_FOR_DIRECTIO (0x00000001)
+#define OCFS_IN_FIRST_OPEN (0x00000002)
/* 'flags' flags. */
/* has this inode been deleted, either from this node or from another node. */
@@ -655,6 +656,7 @@
struct list_head vote_obj_queue;
spinlock_t vote_obj_queue_lock;
unsigned long voting_ino; /* only safe from the process_vote pid */
+ wait_queue_head_t open_event;
};
typedef struct _ocfs_comm_info
@@ -773,7 +775,7 @@
int vote_status;
ocfs_node_map req_vote_map;
ocfs_node_map got_vote_map;
- ocfs_node_map tmp_openmap;
+ //ocfs_node_map tmp_openmap;
__u64 seq_num;
pid_t pid;
ocfs_dlm_msg m;
@@ -853,7 +855,6 @@
int reply_method;
int *status;
ocfs_node_map *got_vote_map;
- ocfs_node_map *open_map;
__u32 flags;
union {
ocfs_dlm_reply_master *reply;
Modified: trunk/src/ocfs2_fs.h
===================================================================
--- trunk/src/ocfs2_fs.h 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/ocfs2_fs.h 2004-08-12 19:39:10 UTC (rev 1351)
@@ -277,8 +277,7 @@
__u8 dl_level; /* Lock level */
__u8 dl_reserved2[3]; /* Pad to u64 */
__u64 dl_seq_num; /* Lock transaction seqnum */
-/*10*/ __u32 dl_node_map[8]; /* Bitmap of interested nodes,
- was __u32 */
+/*10*/ __u32 dl_reserved3[8]; /* was dl_node_map */
/*30*/
} ocfs2_disk_lock;
Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/super.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -1510,6 +1510,7 @@
osb->have_local_alloc = 0;
osb->local_alloc_bh = NULL;
+ init_waitqueue_head (&osb->open_event);
/* not using any of these sb fields yet */
#if 0
di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/vote.c 2004-08-12 19:39:10 UTC (rev 1351)
@@ -418,7 +418,7 @@
int max = data->u.proc.max - *len;
char *p = data->u.proc.page + *len;
int ret = 0;
- char *reqstr=NULL, *gotstr=NULL, *openstr=NULL;
+ char *reqstr=NULL, *gotstr=NULL;
/* just run thru everything to populate /proc */
/* return -ENOENT to keep going */
@@ -432,17 +432,14 @@
break;
if (ocfs_node_map_stringify(&obj->got_vote_map, &gotstr) < 0)
break;
- if (ocfs_node_map_stringify(&obj->tmp_openmap, &openstr) < 0)
- break;
- ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s %s\n",
+ ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s\n",
obj->pid,
vote_state_str[obj->vote_state],
obj->vote_status,
reqstr,
request->lock_id,
request->lock_seq_num,
- request->flags,
- gotstr, openstr);
+ request->flags, gotstr);
break;
case OCFS_VOTE_REPLY:
reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
@@ -474,8 +471,6 @@
kfree(reqstr);
if (gotstr)
kfree(gotstr);
- if (openstr)
- kfree(openstr);
return status;
}
@@ -608,7 +603,6 @@
ctxt.reply_method = COMM_VOTE;
ctxt.got_vote_map = &(obj->got_vote_map);
- ctxt.open_map = &(obj->tmp_openmap);
ctxt.status = &(obj->vote_status);
ctxt.flags = reply_msg->flags;
ctxt.u.reply = reply;
@@ -837,7 +831,6 @@
else
ocfs_node_map_init(osb, &obj->req_vote_map);
ocfs_node_map_init(osb, &obj->got_vote_map);
- ocfs_node_map_init(osb, &obj->tmp_openmap);
obj->seq_num = 0ULL;
obj->req_lock_type = reqlock;
@@ -853,7 +846,7 @@
* ocfs_send_dlm_request_msg()
* inode is definitely non NULL
*/
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, __u32 num_ident, int *vote_status)
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, struct inode *inode, __u32 num_ident, int *vote_status)
{
int status = 0;
ocfs_dlm_msg *dlm_msg = NULL;
@@ -904,10 +897,6 @@
spin_lock (&obj->lock);
if (obj->vote_status >= 0 && obj->vote_state == VOTE_OBJ_STATE_FULL_REPLY) {
- // extend and update need to update the openmap
- if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE))
- if (openmap)
- ocfs_node_map_set(openmap, &obj->tmp_openmap);
LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
} else if (obj->vote_status != -EAGAIN || obj->vote_state != VOTE_OBJ_STATE_FULL_REPLY) {
#warning "should we even be erroring here at all!"
@@ -966,9 +955,6 @@
switch (reply_status) {
case FLAG_VOTE_NODE:
ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
- if (ctxt->flags & (FLAG_FILE_EXTEND|FLAG_FILE_UPDATE) &&
- open_handle && ctxt->open_map)
- ocfs_node_map_set_bit(ctxt->open_map, node_num);
break;
case FLAG_VOTE_OIN_ALREADY_INUSE:
ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
Modified: trunk/src/vote.h
===================================================================
--- trunk/src/vote.h 2004-08-11 18:27:57 UTC (rev 1350)
+++ trunk/src/vote.h 2004-08-12 19:39:10 UTC (rev 1351)
@@ -44,7 +44,7 @@
int ocfs_send_dismount_msg(ocfs_super *osb);
int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id,
__u32 lock_type, __u32 flags,
- ocfs_node_map *votemap, ocfs_node_map *openmap,
+ ocfs_node_map *votemap,
struct inode *inode, __u32 num_ident,
int *vote_status);
int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
More information about the Ocfs2-commits
mailing list