[Ocfs2-commits] mfasheh commits r1296 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Jul 22 16:55:03 CDT 2004
Author: mfasheh
Date: 2004-07-22 15:55:01 -0500 (Thu, 22 Jul 2004)
New Revision: 1296
Modified:
trunk/src/dlm.c
trunk/src/dlm.h
trunk/src/journal.c
trunk/src/nm.c
trunk/src/ocfs.h
trunk/src/ocfs2_disk_dlm.h
trunk/src/ocfs_journal.h
trunk/src/vote.c
trunk/src/vote.h
Log:
* optimize the lock list in commit_cache and send over significantly
fewer messages than before. We only optimize UPDATE_OIN messages today
but can plug in more later. This required a change to voting so older
svn revivisions of the code will no longer work with this one.
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/dlm.c 2004-07-22 20:55:01 UTC (rev 1296)
@@ -57,11 +57,11 @@
static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, ocfs_node_map *open_map);
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u32 num_ident, ocfs_node_map *open_map);
static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map);
static int ocfs_reset_voting (ocfs_super * osb);
static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map);
-static int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode);
+static int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode, __u32 num_ident);
static void ocfs_set_publish_vote_map(ocfs_super *osb, ocfs_publish *publish, ocfs_node_map *vote_map)
@@ -98,6 +98,7 @@
__u64 *lock_seq_num,
__u32 time_to_wait,
struct inode *inode,
+ __u32 num_ident,
ocfs_node_map *open_map)
{
int status = 0;
@@ -171,6 +172,7 @@
ocfs_set_publish_vote_map(osb, pubsect, vote_map);
pubsect->vote_type = flags;
pubsect->lock_id = lock_id;
+ pubsect->num_ident = num_ident;
#ifdef VERBOSE_LOCKING_TRACE
printk("ocfs_disk_request_vote: lockid = %llu\n",lock_id);
@@ -1029,7 +1031,7 @@
*/
static int ocfs_disk_release_lock(ocfs_super * osb, __u64 lock_id,
__u32 lock_type, __u32 flags, struct buffer_head *bh,
- struct inode *inode)
+ struct inode *inode, __u32 num_ident)
{
ocfs_node_map votemap;
int status = 0, vote_status = 0;
@@ -1117,7 +1119,7 @@
if (!disk_vote) {
LOG_TRACE_STR ("Network vote");
status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags,
- &votemap, NULL, inode, &vote_status);
+ &votemap, NULL, inode, num_ident, &vote_status);
if (status >= 0 || status == -EAGAIN) {
if (status != -EAGAIN)
status = vote_status;
@@ -1156,7 +1158,7 @@
LOG_TRACE_STR ("Disk vote");
disk_vote = 1;
- status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, NULL);
+ status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, num_ident, NULL);
tmpstat = ocfs_reset_voting (osb);
if (status >=0) {
@@ -1216,7 +1218,7 @@
* ocfs_release_lock()
* inode is definitely non NULL
*/
-int ocfs_release_lock (ocfs_super * osb, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode)
+int ocfs_release_lock_full (ocfs_super * osb, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode, __u32 num_ident)
{
int status = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
@@ -1226,6 +1228,7 @@
osb, lock_type, flags, lockres);
OCFS_ASSERT(inode);
+ OCFS_ASSERT(num_ident);
lock_id = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
LOG_TRACE_ARGS("lock_id = %llu", lock_id);
@@ -1255,7 +1258,7 @@
if (flags & FLAG_FILE_RELEASE_MASTER)
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
- status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, bh, inode);
+ status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, bh, inode, num_ident);
if (status < 0) {
/* Disable the vol */
LOG_ERROR_STATUS (status);
@@ -1267,7 +1270,7 @@
"= %llu\n", lock_id);
BUG();
}
- lockres->lock_holders--;
+ lockres->lock_holders -= num_ident;
LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
ocfs_release_lockres (lockres); // ocfs_release_lock
LOG_EXIT_STATUS (status);
@@ -1366,8 +1369,10 @@
if (!*disk_vote) {
LOG_TRACE_STR ("Network vote");
- status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags,
- &vote_map, &open_map, inode, &vote_status);
+ status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type,
+ flags, &vote_map,
+ &open_map, inode, 1,
+ &vote_status);
if (status >= 0) {
status = vote_status;
if (status >= 0)
@@ -1403,7 +1408,7 @@
/* disk voting */
LOG_TRACE_STR ("Disk vote");
*disk_vote = 1;
- status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &vote_map, &lockseqnum, 5000, inode, &open_map);
+ status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &vote_map, &lockseqnum, 5000, inode, 1, &open_map);
if (status < 0) {
if (status != -EAGAIN)
LOG_ERROR_STATUS (status);
Modified: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/dlm.h 2004-07-22 20:55:01 UTC (rev 1296)
@@ -37,9 +37,11 @@
struct inode *inode);
void ocfs_compute_dlm_stats(int status, int vote_status,
ocfs_dlm_stats *stats);
-int ocfs_release_lock(ocfs_super *osb, __u32 lock_type,
- __u32 flags, struct buffer_head *bh,
- struct inode *inode);
+#define ocfs_release_lock(osb, lock_type, flags, bh, inode) \
+ ocfs_release_lock_full(osb, lock_type, flags, bh, inode, 1)
+int ocfs_release_lock_full(ocfs_super *osb, __u32 lock_type,
+ __u32 flags, struct buffer_head *bh,
+ struct inode *inode, __u32 num_ident);
int ocfs_update_disk_lock(ocfs_super *osb, __u32 flags,
struct buffer_head **bh, struct inode *inode,
ocfs_journal_handle *handle);
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/journal.c 2004-07-22 20:55:01 UTC (rev 1296)
@@ -41,6 +41,7 @@
#include "extmap.h"
#include "inode.h"
#include "journal.h"
+#include "lockres.h"
#include "namei.h"
#include "nm.h"
#include "super.h"
@@ -72,6 +73,7 @@
static int ocfs_journal_flush(ocfs_journal *journal);
static void ocfs_handle_move_locks(ocfs_journal *journal,
ocfs_journal_handle *handle);
+static void ocfs_journal_optimize_lock_list(ocfs_journal *journal);
static int ocfs_journal_flush(ocfs_journal *journal)
{
@@ -90,6 +92,84 @@
return(retval);
}
+static void ocfs_journal_optimize_lock_list(ocfs_journal *journal)
+{
+ ocfs_journal_lock *lock = NULL;
+ ocfs_journal_lock *first = NULL; /* 1st update_oin release for
+ * an inode. */
+ struct inode *curr_inode = NULL;
+ struct list_head *p, *n;
+
+ /* 1st pass: optimize away all UPDATE_OIN messages into one.
+ * 2 types of UPDATE_OIN messages as of now: with size change
+ * (truncate / extend) or without size change. We will take
+ * *all* of them (regardless of type) and cram it into one
+ * message. */
+ curr_inode = NULL;
+ list_for_each_safe(p, n, &journal->checkpointing_locks) {
+ lock = list_entry(p, ocfs_journal_lock, lock_list);
+
+ if (lock->inode != curr_inode) {
+ /* Ok, new inode. */
+ first = NULL;
+
+ curr_inode = lock->inode;
+ }
+
+ /* if it's not an update oin then just continue */
+ if (!(lock->flags & FLAG_FILE_UPDATE_OIN))
+ continue;
+
+ if (!first) {
+ first = lock;
+ /* nothing else to do to the 1st update release. */
+ continue;
+ }
+
+ /* only the 1st update_oin lock stays -- the
+ * rest are either num_ident into it or
+ * dropped */
+ list_del(&lock->lock_list);
+ atomic_dec(&journal->num_chkpt_locks);
+
+ /* we only incement first->num_ident if the current
+ * lock has a size change AND first has recorded at
+ * least one size change (which can include
+ * itself). Otherwise we'll have an off by one as a
+ * first with no size change recorded will keep at
+ * least 1 reference for itself. */
+ if ((lock->flags & FLAG_FILE_EXTEND)
+ || (lock->flags & FLAG_FILE_TRUNCATE)) {
+ if ((first->flags & FLAG_FILE_EXTEND)
+ || (first->flags & FLAG_FILE_TRUNCATE))
+ first->num_ident++;
+ } else {
+ /* Ok, no size change on this particular lock,
+ * so we're discarding it without updating the
+ * num_ident value -- that's fine but we need
+ * to manually drop the lockres->lock_holders
+ * value on it's behalf */
+ first->drop_holders++;
+ }
+
+ /* record a size change in first if there was one. */
+ if (lock->flags & FLAG_FILE_EXTEND)
+ first->flags = first->flags | FLAG_FILE_EXTEND;
+ if (lock->flags & FLAG_FILE_TRUNCATE)
+ first->flags = first->flags | FLAG_FILE_TRUNCATE;
+
+ if (!first->bh)
+ first->bh = lock->bh;
+ else
+ brelse(lock->bh);
+ iput(lock->inode);
+
+ kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
+ }
+
+ return;
+}
+
/*
* ocfs_commit_cache()
*
@@ -104,6 +184,7 @@
ocfs_journal_lock *lock = NULL;
struct inode *inode;
ocfs_inode_private *ip;
+ ocfs_lock_res *lockres;
LOG_ENTRY_ARGS("(data_flush = %u)\n", data_flush);
@@ -170,6 +251,12 @@
journal_unlock_updates(journal->k_journal);
up_write(&journal->trans_barrier);
+ ocfs_journal_optimize_lock_list(journal);
+#ifdef VERBOSE_COMMIT_THREAD
+ printk("(%u) commit_thread: after optimization, %u locks to release\n",
+ current->pid, atomic_read(&journal->num_chkpt_locks));
+#endif
+
p = n = NULL;
list_for_each_safe(p, n, &journal->checkpointing_locks) {
if (!atomic_read(&journal->num_chkpt_locks))
@@ -192,11 +279,12 @@
down_write(&OCFS_I(lock->inode)->ip_io_sem);
tmpstat = 0;
if (!INODE_DELETED(lock->inode))
- tmpstat = ocfs_release_lock(osb,
- lock->type,
- lock->flags,
- lock->bh,
- lock->inode);
+ tmpstat = ocfs_release_lock_full(osb,
+ lock->type,
+ lock->flags,
+ lock->bh,
+ lock->inode,
+ lock->num_ident);
else
LOG_ERROR_ARGS("commit_thread: Skipping delete for "
"inode %llu!\n",
@@ -207,6 +295,14 @@
" %d releasing lock on inode %llu!\n",
tmpstat, OCFS_I(lock->inode)->ip_blkno);
+ if (lock->drop_holders) {
+ lockres = GET_INODE_LOCKRES(lock->inode);
+ ocfs_acquire_lockres(lockres, 0);
+ OCFS_ASSERT(lockres->lock_holders >= lock->drop_holders);
+ lockres->lock_holders -= lock->drop_holders;
+ ocfs_release_lockres(lockres);
+ }
+
if (lock->bh != NULL)
brelse(lock->bh);
iput(lock->inode);
@@ -994,6 +1090,12 @@
lock->inode = inode;
lock->req_io_sem = req_io_sem;
+ /* stuff for commit thread optimization. */
+ lock->num_ident = 1;
+ /* this is for *additional* decrements of lock_holders, not
+ * the one given by ocfs_release_lock... */
+ lock->drop_holders = 0;
+
if (bh)
get_bh(bh);
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/nm.c 2004-07-22 20:55:01 UTC (rev 1296)
@@ -77,7 +77,7 @@
static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
__u32 node_num);
static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
- __u32 node_num);
+ __u32 node_num, u32 num_rel);
static void ocfs_process_vote_worker(void *val);
@@ -616,7 +616,7 @@
}
static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
- __u32 node_num)
+ __u32 node_num, u32 num_rel)
{
int dec = 0;
@@ -630,7 +630,7 @@
down(&recovery_list_sem);
spin_lock(&oin_num_ext_lock);
- OCFS_I(inode)->ip_num_extends--;
+ OCFS_I(inode)->ip_num_extends -= num_rel;
if (OCFS_I(inode)->ip_num_extends < 0)
BUG();
@@ -704,7 +704,7 @@
int status = 0;
int tmpstat = 0;
ocfs_lock_res *lockres = NULL;
- __u32 flags;
+ __u32 flags, num_ident;
__u16 num_nodes;
ocfs2_dinode *fe = NULL;
ocfs_vote *vote = NULL;
@@ -741,13 +741,21 @@
flags = publish->vote_type;
lock_id = publish->lock_id;
seq_num = publish->publ_seq_num;
+ num_ident = publish->num_ident;
} else {
ocfs_dlm_req_master *req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
flags = req_master->flags;
lock_id = req_master->lock_id;
seq_num = req_master->lock_seq_num;
+ num_ident = req_master->num_ident;
}
-
+
+ if (!num_ident) {
+ printk("flags = 0x%x, lock_id = %llu, node_num = %u\n",
+ flags, lock_id, node_num);
+ BUG();
+ }
+
LOG_TRACE_ARGS ("node=%u, id=%llu, seq=%llu\n", node_num,
lock_id, seq_num);
@@ -822,10 +830,10 @@
status, &master_alive, inode);
#ifdef VERBOSE_PROCESS_VOTE
- printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s\n", current->pid,
+ printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s, num_ident = %u\n", current->pid,
flags & FLAG_RELEASE_LOCK ? "RELEASE" :
(flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
- vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote" );
+ vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote", num_ident);
if (vote_type == INVALID_REQUEST)
printk("Invalid request! flags = 0x%x\n", flags);
@@ -1254,7 +1262,8 @@
if (flags & FLAG_ACQUIRE_LOCK)
ocfs_mark_inode_for_extend(osb, inode, node_num);
else if (flags & FLAG_RELEASE_LOCK)
- ocfs_clear_inode_for_extend(osb, inode, node_num);
+ ocfs_clear_inode_for_extend(osb, inode, node_num,
+ num_ident);
else {
printk("uhoh, bad vote flags! 0x%x\n", flags);
BUG();
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/ocfs.h 2004-07-22 20:55:01 UTC (rev 1296)
@@ -723,6 +723,7 @@
__u32 flags;
__u8 open_handle;
__u8 odmh_pad[3];
+ __u32 num_ident; /* number of identical messages, always >= 1 */
} ocfs_dlm_msg_hdr;
typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master;
Modified: trunk/src/ocfs2_disk_dlm.h
===================================================================
--- trunk/src/ocfs2_disk_dlm.h 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/ocfs2_disk_dlm.h 2004-07-22 20:55:01 UTC (rev 1296)
@@ -114,7 +114,8 @@
__u64 lock_id; /* Lock vote is requested for */
/* last seq num used in comm voting */
/*60*/ __u64 comm_seq_num;
-/*68*/
+ __u32 num_ident;
+/*72*/
} ocfs_publish;
typedef struct _ocfs_vote
Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/ocfs_journal.h 2004-07-22 20:55:01 UTC (rev 1296)
@@ -147,11 +147,17 @@
typedef struct _ocfs_journal_lock ocfs_journal_lock;
struct _ocfs_journal_lock {
+ /* release_lock arguments. */
__u32 type;
__u32 flags;
struct buffer_head *bh;
struct inode *inode;
+ unsigned int num_ident;
+
+ /* used only for immediate checkpointing transactions */
int req_io_sem;
+ /* used by commit_cache */
+ unsigned int drop_holders;
/* lock_list: we are either on
* - handle->locks: if still running
* - inode->ip_pending_locks: if waiting for checkpoint
Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/vote.c 2004-07-22 20:55:01 UTC (rev 1296)
@@ -853,7 +853,7 @@
* ocfs_send_dlm_request_msg()
* inode is definitely non NULL
*/
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, int *vote_status)
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, __u32 num_ident, int *vote_status)
{
int status = 0;
ocfs_dlm_msg *dlm_msg = NULL;
@@ -883,6 +883,7 @@
req->lock_id = lock_id;
req->flags = flags;
+ req->num_ident = num_ident;
#ifdef VERBOSE_LOCKING_TRACE
printk("ocfs_send_dlm_request_msg: inode=%p, lockid = %llu\n",
Modified: trunk/src/vote.h
===================================================================
--- trunk/src/vote.h 2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/vote.h 2004-07-22 20:55:01 UTC (rev 1296)
@@ -45,7 +45,8 @@
int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id,
__u32 lock_type, __u32 flags,
ocfs_node_map *votemap, ocfs_node_map *openmap,
- struct inode *inode, int *vote_status);
+ struct inode *inode, __u32 num_ident,
+ int *vote_status);
int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
__u32 vote_status, int inode_open);
int ocfs_lookup_vote_request_obj (ocfs_super *osb,
More information about the Ocfs2-commits
mailing list