[Ocfs2-commits] mfasheh commits r1294 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Wed Jul 21 17:38:12 CDT 2004
Author: mfasheh
Date: 2004-07-21 16:38:10 -0500 (Wed, 21 Jul 2004)
New Revision: 1294
Modified:
trunk/src/dlm.c
trunk/src/inode.c
trunk/src/journal.c
trunk/src/namei.c
trunk/src/ocfs.h
trunk/src/ocfs_journal.h
trunk/src/proc.c
trunk/src/super.c
Log:
* better lock release statistics in
/proc/fs/ocfs2/nodenum/statistics. I think this ought to go in a
different file though.
* change over to using transaction ids in the journal code. This is
faster and makes the next set of changes possible:
* Nothing is optimized yet, but all the locks for ocfs_commit_cache
are in one big list, with all locks for any given inode right
next to each other... this should make optimizing just a matter of
scanning the list...
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/dlm.c 2004-07-21 21:38:10 UTC (rev 1294)
@@ -737,7 +737,6 @@
int disk_vote = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
u32 flags;
- u64 lock_id = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
LOG_ENTRY ();
@@ -745,7 +744,7 @@
ocfs_acquire_lockres(lockres, 0); // ocfs_file_open
flags = FLAG_TRUNCATE_PAGES;
- if (!ocfs_journal_new_file_search(osb, lock_id))
+ if (!ocfs_inode_is_new(osb, inode))
flags |= FLAG_FAST_PATH_LOCK;
status = new_lock_function(osb, lockres->lock_type,
@@ -949,7 +948,7 @@
extra_lock_flags = FLAG_REMASTER;
} else if (flags & (FLAG_FILE_DELETE | FLAG_FILE_RENAME
| FLAG_RELEASE_DENTRY)) {
- if (ocfs_journal_new_file_search(osb, lock_id)!=0) {
+ if (ocfs_inode_is_new(osb, inode)!=0) {
extra_lock_flags = 0;
} else if (lockres->lock_type == OCFS_LKM_EXMODE)
extra_lock_flags = FLAG_FAST_PATH_LOCK;
@@ -957,7 +956,7 @@
extra_lock_flags = FLAG_CHANGE_MASTER;
} else if (lockres->master_node_num == osb->node_num) {
if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_TRUNCATE) &&
- ocfs_journal_new_file_search(osb, lock_id)!=0)
+ ocfs_inode_is_new(osb, inode)!=0)
extra_lock_flags = FLAG_REMASTER;
else if (lockres->lock_type == OCFS_LKM_EXMODE)
extra_lock_flags = FLAG_FAST_PATH_LOCK;
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/inode.c 2004-07-21 21:38:10 UTC (rev 1294)
@@ -347,6 +347,8 @@
init_rwsem(&i->ip_io_sem);
atomic_set(&i->ip_needs_verification, 0);
+ INIT_LIST_HEAD(&i->ip_pending_locks);
+ INIT_LIST_HEAD(&i->ip_j_inode);
/* These should be set in read_inode2. */
i->ip_alloc_size = 0ULL;
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/journal.c 2004-07-21 21:38:10 UTC (rev 1294)
@@ -53,6 +53,8 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_JOURNAL
+spinlock_t trans_inc_lock = SPIN_LOCK_UNLOCKED;
+
typedef enum _release_locks_action {
TRANS_COMMIT,
TRANS_ABORT,
@@ -68,6 +70,8 @@
static int ocfs_commit_cache (ocfs_super * osb, int data_flush);
static int ocfs_wait_on_mount(ocfs_super *osb);
static int ocfs_journal_flush(ocfs_journal *journal);
+static void ocfs_handle_move_locks(ocfs_journal *journal,
+ ocfs_journal_handle *handle);
static int ocfs_journal_flush(ocfs_journal *journal)
{
@@ -78,6 +82,8 @@
retval = journal_flush(journal->k_journal);
+ ocfs_inc_trans_id(journal);
+
journal_unlock_updates(journal->k_journal);
up_write(&journal->trans_barrier);
@@ -95,17 +101,17 @@
int status = 0, tmpstat;
ocfs_journal * journal = NULL;
struct list_head *p, *n;
- ocfs_journal_handle *handle = NULL;
- ocfs_commit_task *commit = osb->commit;
+ ocfs_journal_lock *lock = NULL;
+ struct inode *inode;
+ ocfs_inode_private *ip;
LOG_ENTRY_ARGS("(data_flush = %u)\n", data_flush);
journal = osb->journal;
- /* flush all pending commits and checkpoint the journal. */
+ /* Step 1: flush all pending commits and checkpoint the journal. */
down_write(&journal->trans_barrier);
- /* check again, this time locked :) */
if (atomic_read(&journal->num_trans) == 0) {
up_write(&journal->trans_barrier);
LOG_TRACE_STR("No transactions for me to flush!");
@@ -114,48 +120,99 @@
journal_lock_updates(journal->k_journal);
status = journal_flush(journal->k_journal);
-
-#ifdef VERBOSE_COMMIT_THREAD
- printk("(%u) commit_thread: flushing %u transactions, %u locks\n",
- current->pid,
- atomic_read(&journal->num_trans),
- atomic_read(&journal->j_locks));
-#endif
- atomic_set(&journal->num_trans, 0);
- up_write(&journal->trans_barrier);
if (status < 0) {
journal_unlock_updates(journal->k_journal);
-
+ up_write(&journal->trans_barrier);
LOG_ERROR_STATUS(status);
goto finally;
}
- /* now we can run an unlock against any pending handles and
- * release them. */
+ ocfs_inc_trans_id(journal);
+
+#ifdef VERBOSE_COMMIT_THREAD
+ printk("(%u) commit_thread: flushed %u transactions, releasing %u "
+ "locks\n", current->pid, atomic_read(&journal->num_trans),
+ atomic_read(&journal->num_cmt_locks));
+#endif
+ atomic_set(&journal->num_trans, 0);
+
+ /* Step 2: Drop any locks acquired during transactions which
+ * have just been checkpointed. */
spin_lock(&journal->cmt_lock);
- /* Once we've got cmt_lock, we can let transactions start
- * again -- it should protect us against people mucking with
- * the committed list... */
- journal_unlock_updates(journal->k_journal);
+ atomic_add(atomic_read(&journal->num_cmt_locks),
+ &journal->num_chkpt_locks);
+ atomic_set(&journal->num_cmt_locks, 0);
- /* we want to take everything off the committed list and
- * process it independently, so we can drop the trans_lock
- * earlier. */
- if (!list_empty(&journal->committed)) {
- list_splice(&journal->committed , &commit->c_list);
- INIT_LIST_HEAD(&journal->committed);
+ /* move the locks off each inode onto the commit threads list. */
+ list_for_each_safe(p, n, &journal->committing_inodes) {
+ ip = list_entry(p, ocfs_inode_private, ip_j_inode);
+ inode = ip->ip_inode;
+
+ if (!list_empty(&OCFS_I(inode)->ip_pending_locks))
+ list_splice_init(&OCFS_I(inode)->ip_pending_locks,
+ (&journal->checkpointing_locks)->prev);
+
+ /* we can now remove the inode from the committing
+ * list. */
+ list_del_init(&OCFS_I(inode)->ip_j_inode);
}
osb->needs_flush = 0;
+
spin_unlock(&journal->cmt_lock);
- list_for_each_safe(p, n, &commit->c_list) {
- handle = list_entry(p, ocfs_journal_handle, h_list);
- tmpstat = ocfs_handle_release_locks(handle, TRANS_CACHE);
+ /* TODO: Can we assert this anymore and move these lock
+ * releases back up?
+ *
+ * Once we've got cmt_lock, we can let
+ * transactions start again -- it should protect us against
+ * people mucking with the committed list... */
+ journal_unlock_updates(journal->k_journal);
+ up_write(&journal->trans_barrier);
+
+ p = n = NULL;
+ list_for_each_safe(p, n, &journal->checkpointing_locks) {
+ if (!atomic_read(&journal->num_chkpt_locks))
+ BUG();
+
+ lock = list_entry(p, ocfs_journal_lock, lock_list);
+
+ list_del(&(lock->lock_list));
+
+ if (!lock->inode)
+ BUG();
+
+#if 0
+ /* enable this for tons of output, which will likely
+ * hang your box :) */
+ printk("commit_thread: release lock %u (inode %llu)\n",
+ atomic_read(&journal->num_chkpt_locks),
+ OCFS_I(lock->inode)->ip_blkno);
+#endif
+ down_write(&OCFS_I(lock->inode)->ip_io_sem);
+ tmpstat = 0;
+ if (!INODE_DELETED(lock->inode))
+ tmpstat = ocfs_release_lock(osb,
+ lock->type,
+ lock->flags,
+ lock->bh,
+ lock->inode);
+ else
+ LOG_ERROR_ARGS("commit_thread: Skipping delete for "
+ "inode %llu!\n",
+ OCFS_I(lock->inode)->ip_blkno);
+ up_write(&OCFS_I(lock->inode)->ip_io_sem);
if (tmpstat < 0)
- LOG_ERROR_STATUS((status = tmpstat));
- list_del(&(handle->h_list));
- kfree(handle);
+ LOG_ERROR_ARGS("commit_thread: release_lock status is"
+ " %d releasing lock on inode %llu!\n",
+ tmpstat, OCFS_I(lock->inode)->ip_blkno);
+
+ if (lock->bh != NULL)
+ brelse(lock->bh);
+ iput(lock->inode);
+
+ atomic_dec(&journal->num_chkpt_locks);
+ kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
}
flush_data:
@@ -405,9 +462,10 @@
return;
}
-/* This does no locking of the handle. If the handle is on
- * journal->committed, then you want to be holding the cmt_lock
- * before calling this. */
+/*
+ * Called from commit / abort only for the checkpointing case. When we
+ * stop doing that there, we can kill this function.
+ */
static int ocfs_handle_release_locks(ocfs_journal_handle *handle,
release_locks_action action)
{
@@ -461,7 +519,7 @@
iput(lock->inode);
list_del(&(lock->lock_list));
handle->num_locks--;
- atomic_dec(&journal->j_locks);
+ atomic_dec(&journal->num_cmt_locks);
kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
}
@@ -469,31 +527,6 @@
return(status);
}
-int ocfs_journal_new_file_search(ocfs_super *osb, __u64 lockid)
-{
- ocfs_journal_handle *handle=NULL;
- ocfs_journal *journal=NULL;
- int status = -ENOENT;
- struct list_head *p1;
-
- LOG_ENTRY_ARGS("(%llu)\n", lockid);
-
- journal = osb->journal;
- spin_lock(&journal->cmt_lock);
- list_for_each(p1, &(journal->committed)) {
- handle = list_entry(p1, ocfs_journal_handle, h_list);
- if (handle->new_file_lockid == lockid) {
- status = 0;
- break;
- }
- }
- spin_unlock(&journal->cmt_lock);
-
- LOG_EXIT_STATUS(status);
- return status;
-}
-
-
/* This for loop is for debug purposes. Basically we want to check the
* BH_JBD bit on our buffers. If the handle was checkpointed, then
* none of them should have that bit set after the revoke
@@ -562,7 +595,12 @@
* the commit thread to do on our behalf. */
if (checkpoint)
atomic_dec(&(osb->journal->num_trans));
-
+ else {
+ ocfs_handle_move_locks(osb->journal, handle);
+ spin_lock(&osb->journal->cmt_lock);
+ osb->needs_flush = 1;
+ spin_unlock(&osb->journal->cmt_lock);
+ }
up_read(&journal->trans_barrier);
handle->k_handle = NULL; /* it's been free'd in journal_stop */
@@ -577,8 +615,6 @@
LOG_ERROR_STATUS(retval);
}
- /* Do the next few steps before we put the handle on any lists
- * where it might be freed! */
for(i = 0; i < handle->num_buffs; i++) {
brelse(handle->buffs[i]);
handle->buffs[i] = NULL;
@@ -601,25 +637,13 @@
retval = ocfs_handle_release_locks(handle, TRANS_COMMIT);
if (retval < 0)
LOG_ERROR_STATUS(retval);
- } else {
- /* If we're not going to checkpoint the handle on
- * commit then we need to add it to our journals list
- * so it can be done later */
- spin_lock(&journal->cmt_lock);
- list_add_tail(&(handle->h_list), &(journal->committed));
- osb->needs_flush = 1;
- spin_unlock(&journal->cmt_lock);
- /* Ok, any references to the handle after this are
- * unsafe as it might be processed (and free'd from
- * memory) by the commit thread! */
}
if (commit_head && (retval == 0))
ocfs_process_bitmap_free_head(osb, commit_head);
ocfs_free_bitmap_free_head(commit_head);
- if (checkpoint)
- kfree(handle);
+ kfree(handle);
LOG_EXIT();
@@ -978,12 +1002,50 @@
list_add_tail(&(lock->lock_list), &(handle->locks));
handle->num_locks++;
- atomic_inc(&handle->journal->j_locks);
+ atomic_inc(&handle->journal->num_cmt_locks);
LOG_EXIT();
return;
}
+/* move the locks off a journal handle and onto the
+ * inode->ip_pending_locks. makes sure the inodes are on
+ * journal->committing_inodes so that the commit thread can get them
+ * after checkpoint.
+ *
+ * You want to do this before dropping trans_barrier to prevent the commit
+ * thread from missing the locks.
+ *
+ * TODO: When we get rid of the last checkpointed transactions, we can
+ * just put locks right on the inode in ocfs_handle_add_lock...
+ */
+static void ocfs_handle_move_locks(ocfs_journal *journal,
+ ocfs_journal_handle *handle)
+{
+ struct list_head *p, *n;
+ ocfs_journal_lock *lock;
+ struct inode *inode;
+
+ list_for_each_safe(p, n, &(handle->locks)) {
+ lock = list_entry(p, ocfs_journal_lock, lock_list);
+ list_del(&lock->lock_list);
+ handle->num_locks--;
+
+ inode = lock->inode;
+
+ spin_lock(&journal->cmt_lock);
+ /* add the lock to the inode */
+ list_add_tail(&lock->lock_list,
+ &OCFS_I(inode)->ip_pending_locks);
+ /* and make sure the inode is on the journals list */
+ if (list_empty(&OCFS_I(inode)->ip_j_inode))
+ list_add_tail(&OCFS_I(inode)->ip_j_inode,
+ &journal->committing_inodes);
+ spin_unlock(&journal->cmt_lock);
+ }
+ return;
+}
+
#define OCFS_DEFAULT_COMMIT_INTERVAL (HZ * 5)
/*
* Setup the journal using the journal system file
@@ -1105,9 +1167,13 @@
osb->journal->lockbh = bh;
osb->journal->lock_id = lock_id;
atomic_set(&(osb->journal->num_trans), 0);
- atomic_set(&(osb->journal->j_locks), 0);
+ atomic_set(&(osb->journal->num_cmt_locks), 0);
+ atomic_set(&(osb->journal->num_chkpt_locks), 0);
init_rwsem(&(osb->journal->trans_barrier));
osb->journal->state = OCFS_JOURNAL_LOADED;
+ osb->journal->trans_id = (unsigned long) 1;
+ INIT_LIST_HEAD(&(osb->journal->committing_inodes));
+ INIT_LIST_HEAD(&(osb->journal->checkpointing_locks));
status = 0;
done:
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/namei.c 2004-07-21 21:38:10 UTC (rev 1294)
@@ -267,7 +267,7 @@
}
file_off = fe->i_blkno << dir->i_sb->s_blocksize_bits;
- handle->new_file_lockid = file_off;
+ ocfs_inode_set_new(osb, inode);
ocfs_init_lockres(osb, inode);
@@ -1556,6 +1556,7 @@
BUG();
}
+ ocfs_inode_set_new(osb, inode);
ocfs_init_lockres(osb, inode);
status = ocfs_extend_file(osb, newsize, handle, inode, NULL, 0,
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/ocfs.h 2004-07-21 21:38:10 UTC (rev 1294)
@@ -449,6 +449,18 @@
ocfs_lock_res ip_lockres;
__u32 ip_dir_start_lookup;
+
+ /* ip_pending_locks and ip_j_inode are protected by the
+ * journals cmt_lock.
+ * ip_pending_locks: disk locks for this inode which have to be
+ * released once their transaction checkpoints
+ * ip_j_inode: list_head for journal->committing_inodes. */
+ struct list_head ip_pending_locks;
+ struct list_head ip_j_inode;
+
+ /* protected by trans_inc_lock, which transaction were we
+ * created on? Zero if none. */
+ unsigned long ip_created_trans;
} ocfs_inode_private;
/* Eventually, the 'flags' and 'open_flags' fields need to be
@@ -506,7 +518,6 @@
{
struct completion c_complete;
struct task_struct *c_task;
- struct list_head c_list;
} ocfs_commit_task;
typedef struct _ocfs_dlm_stats
Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/ocfs_journal.h 2004-07-21 21:38:10 UTC (rev 1294)
@@ -69,18 +69,82 @@
to access file entry */
atomic_t num_trans; /* Number of transactions
* currently in the system. */
+ unsigned long trans_id;
/* locking order: trans_lock -> cmt_lock */
spinlock_t cmt_lock; /* protects the committed list */
struct list_head committed; /* doubly linked list of all
* committed handles awaiting
* checkpointing. */
- atomic_t j_locks; /* number of delayed
- * locks, used only for
- * statistics -- don't
- * trust this! */
+ atomic_t num_cmt_locks; /* number of delayed
+ * locks */
+ atomic_t num_chkpt_locks;
struct rw_semaphore trans_barrier;
+
+ struct list_head committing_inodes; /* list of all
+ * inodes that
+ * have committed
+ * and are
+ * awaiting a
+ * checkpoint. Protected
+ * by cmt_lock. */
+ struct list_head checkpointing_locks; /* locks
+ * pending release
+ * after a checkpoint
+ * -- this variable
+ * is unlocked as
+ * commit_thread is
+ * the only guy who
+ * looks at it! */
};
+extern spinlock_t trans_inc_lock;
+
+/* wrap trans_id so we never have it equal to zero. */
+static inline void ocfs_inc_trans_id(ocfs_journal *j)
+{
+ spin_lock(&trans_inc_lock);
+ j->trans_id++;
+ if (!j->trans_id)
+ j->trans_id = 1;
+ spin_unlock(&trans_inc_lock);
+}
+
+static inline int ocfs_trans_checkpointed(ocfs_journal *j,
+ unsigned long trans_id)
+{
+ int ret;
+ spin_lock(&trans_inc_lock);
+ ret = time_after(trans_id, j->trans_id);
+ spin_unlock(&trans_inc_lock);
+ return ret;
+}
+
+/* convenience function to check if an inode has been checkpointed
+ * yet. Replaces ocfs_journal_new_file_search. Will do you a favor and
+ * set created_trans = 0 when you've been checkpointed.
+ * returns '0' if the inode hasn't been checkpointed yet, 1 otherwise
+ */
+static inline int ocfs_inode_is_new(ocfs_super *osb,
+ struct inode *inode)
+{
+ int ret;
+ spin_lock(&trans_inc_lock);
+ ret = time_after(osb->journal->trans_id,
+ OCFS_I(inode)->ip_created_trans);
+ if (ret)
+ OCFS_I(inode)->ip_created_trans = 0;
+ spin_unlock(&trans_inc_lock);
+ return ret;
+}
+
+static inline void ocfs_inode_set_new(ocfs_super *osb,
+ struct inode *inode)
+{
+ spin_lock(&trans_inc_lock);
+ OCFS_I(inode)->ip_created_trans = osb->journal->trans_id;
+ spin_unlock(&trans_inc_lock);
+}
+
typedef struct _ocfs_journal_lock ocfs_journal_lock;
struct _ocfs_journal_lock {
__u32 type;
@@ -88,6 +152,11 @@
struct buffer_head *bh;
struct inode *inode;
int req_io_sem;
+ /* lock_list: we are either on
+ * - handle->locks: if still running
+ * - inode->ip_pending_locks: if waiting for checkpoint
+ * - journal->checkpointing_locks: awaiting release after checkpoint
+ */
struct list_head lock_list;
};
@@ -133,11 +202,6 @@
* freed ONLY if
* we commit the
* handle. */
- __u64 new_file_lockid; /* offset for the
- * most recently
- * created file
- * sitting on this
- * journal handle */
struct list_head inode_list;
};
@@ -212,7 +276,6 @@
int ocfs_journal_wipe(ocfs_journal *journal, int full);
int ocfs_journal_load(ocfs_journal *journal);
void ocfs_recovery_thread(struct _ocfs_super *osb, int node_num);
-int ocfs_journal_new_file_search(struct _ocfs_super *osb, __u64 lockid);
/*
* Transaction Handling:
Modified: trunk/src/proc.c
===================================================================
--- trunk/src/proc.c 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/proc.c 2004-07-21 21:38:10 UTC (rev 1294)
@@ -511,7 +511,8 @@
"Failed Large Allocs : %u\n" \
"Retry Large Allocs : %u\n" \
"Open Transactions: : %u\n" \
- "Delayed Journal Locks : %u\n"
+ "Delayed Locks : %u\n" \
+ "Checkpointing Locks : %u\n"
len = sprintf (page, PROC_STATS, osb->file_open_cnt, pubmap,
osb->num_nodes, osb->s_clustersize,
@@ -519,7 +520,8 @@
osb->cluster_bitmap.failed,
osb->cluster_bitmap.ok_retries,
atomic_read(&osb->journal->num_trans),
- atomic_read(&osb->journal->j_locks));
+ atomic_read(&osb->journal->num_cmt_locks),
+ atomic_read(&osb->journal->num_chkpt_locks));
ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c 2004-07-20 23:18:27 UTC (rev 1293)
+++ trunk/src/super.c 2004-07-21 21:38:10 UTC (rev 1294)
@@ -1029,7 +1029,6 @@
goto leave;
}
memset(osb->commit, 0, sizeof(ocfs_commit_task));
- INIT_LIST_HEAD(&osb->commit->c_list);
child_pid = kernel_thread (ocfs_commit_thread, osb,
CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
if (child_pid < 0) {
More information about the Ocfs2-commits
mailing list