[Ocfs2-commits] mfasheh commits r1296 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Jul 22 16:55:03 CDT 2004


Author: mfasheh
Date: 2004-07-22 15:55:01 -0500 (Thu, 22 Jul 2004)
New Revision: 1296

Modified:
   trunk/src/dlm.c
   trunk/src/dlm.h
   trunk/src/journal.c
   trunk/src/nm.c
   trunk/src/ocfs.h
   trunk/src/ocfs2_disk_dlm.h
   trunk/src/ocfs_journal.h
   trunk/src/vote.c
   trunk/src/vote.h
Log:
* optimize the lock list in commit_cache and send over significantly
  fewer messages than before. We only optimize UPDATE_OIN messages today
  but can plug in more later. This required a change to voting so older
  svn revivisions of the code will no longer work with this one.



Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/dlm.c	2004-07-22 20:55:01 UTC (rev 1296)
@@ -57,11 +57,11 @@
 
 static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
 
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, ocfs_node_map *open_map);
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u32 num_ident, ocfs_node_map *open_map);
 static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map);
 static int ocfs_reset_voting (ocfs_super * osb);
 static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map);
-static int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode);
+static int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode, __u32 num_ident);
 
 
 static void ocfs_set_publish_vote_map(ocfs_super *osb, ocfs_publish *publish, ocfs_node_map *vote_map)
@@ -98,6 +98,7 @@
 				  __u64 *lock_seq_num,
 				  __u32 time_to_wait,
 				  struct inode *inode,
+				  __u32 num_ident,
 				  ocfs_node_map *open_map)
 {
 	int status = 0;
@@ -171,6 +172,7 @@
 	ocfs_set_publish_vote_map(osb, pubsect, vote_map);
 	pubsect->vote_type = flags;
 	pubsect->lock_id = lock_id;
+	pubsect->num_ident = num_ident;
 
 #ifdef VERBOSE_LOCKING_TRACE
 	printk("ocfs_disk_request_vote: lockid = %llu\n",lock_id);
@@ -1029,7 +1031,7 @@
  */
 static int ocfs_disk_release_lock(ocfs_super * osb, __u64 lock_id,
 		__u32 lock_type, __u32 flags, struct buffer_head *bh,
-		struct inode *inode)
+		struct inode *inode, __u32 num_ident)
 {
 	ocfs_node_map votemap;
 	int status = 0, vote_status = 0;
@@ -1117,7 +1119,7 @@
 		if (!disk_vote) {
 			LOG_TRACE_STR ("Network vote");
 			status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags, 
-							    &votemap, NULL, inode, &vote_status);
+							    &votemap, NULL, inode, num_ident, &vote_status);
 			if (status >= 0 || status == -EAGAIN) {
 				if (status != -EAGAIN)
 					status = vote_status;
@@ -1156,7 +1158,7 @@
 		LOG_TRACE_STR ("Disk vote");
 		disk_vote = 1;
 
-		status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, NULL);
+		status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &votemap, &lockseqno, 5000, inode, num_ident, NULL);
 		tmpstat = ocfs_reset_voting (osb);
 
 		if (status >=0) {
@@ -1216,7 +1218,7 @@
  * ocfs_release_lock()
  * inode is definitely non NULL
  */
-int ocfs_release_lock (ocfs_super * osb, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode)
+int ocfs_release_lock_full (ocfs_super * osb, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode, __u32 num_ident)
 {
 	int status = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
@@ -1226,6 +1228,7 @@
 			osb, lock_type, flags, lockres);
 
 	OCFS_ASSERT(inode);
+	OCFS_ASSERT(num_ident);
 
 	lock_id = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
 	LOG_TRACE_ARGS("lock_id = %llu", lock_id);
@@ -1255,7 +1258,7 @@
 	if (flags & FLAG_FILE_RELEASE_MASTER)
 		lockres->master_node_num = OCFS_INVALID_NODE_NUM;
 
-	status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, bh, inode);
+	status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, bh, inode, num_ident);
 	if (status < 0) {
 		/* Disable the vol */
 		LOG_ERROR_STATUS (status);
@@ -1267,7 +1270,7 @@
 		       "= %llu\n", lock_id);
 		BUG();
 	}
-	lockres->lock_holders--;
+	lockres->lock_holders -= num_ident;
 	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
 	ocfs_release_lockres (lockres); // ocfs_release_lock
 	LOG_EXIT_STATUS (status);
@@ -1366,8 +1369,10 @@
 
 	if (!*disk_vote) {
 		LOG_TRACE_STR ("Network vote");
-		status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags, 
-						    &vote_map, &open_map, inode, &vote_status);
+		status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, 
+						    flags, &vote_map, 
+						    &open_map, inode, 1, 
+						    &vote_status);
 		if (status >= 0) {
 			status = vote_status;
 			if (status >= 0)
@@ -1403,7 +1408,7 @@
 	/* disk voting */
 	LOG_TRACE_STR ("Disk vote");
 	*disk_vote = 1;
-	status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &vote_map, &lockseqnum, 5000, inode, &open_map);
+	status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, &vote_map, &lockseqnum, 5000, inode, 1, &open_map);
 	if (status < 0) {
 		if (status != -EAGAIN)
 			LOG_ERROR_STATUS (status);

Modified: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/dlm.h	2004-07-22 20:55:01 UTC (rev 1296)
@@ -37,9 +37,11 @@
 		      struct inode *inode);
 void ocfs_compute_dlm_stats(int status, int vote_status,
 			    ocfs_dlm_stats *stats);
-int ocfs_release_lock(ocfs_super *osb, __u32 lock_type,
-		      __u32 flags, struct buffer_head *bh,
-		      struct inode *inode);
+#define ocfs_release_lock(osb, lock_type, flags, bh, inode)  \
+	ocfs_release_lock_full(osb, lock_type, flags, bh, inode, 1)
+int ocfs_release_lock_full(ocfs_super *osb, __u32 lock_type,
+			   __u32 flags, struct buffer_head *bh,
+			   struct inode *inode, __u32 num_ident);
 int ocfs_update_disk_lock(ocfs_super *osb, __u32 flags,
 			  struct buffer_head **bh, struct inode *inode,
 			  ocfs_journal_handle *handle);

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/journal.c	2004-07-22 20:55:01 UTC (rev 1296)
@@ -41,6 +41,7 @@
 #include "extmap.h"
 #include "inode.h"
 #include "journal.h"
+#include "lockres.h"
 #include "namei.h"
 #include "nm.h"
 #include "super.h"
@@ -72,6 +73,7 @@
 static int ocfs_journal_flush(ocfs_journal *journal);
 static void ocfs_handle_move_locks(ocfs_journal *journal, 
 				   ocfs_journal_handle *handle);
+static void ocfs_journal_optimize_lock_list(ocfs_journal *journal);
 
 static int ocfs_journal_flush(ocfs_journal *journal) 
 {
@@ -90,6 +92,84 @@
 	return(retval);
 }
 
+static void ocfs_journal_optimize_lock_list(ocfs_journal *journal)
+{
+	ocfs_journal_lock *lock = NULL;
+	ocfs_journal_lock *first = NULL; /* 1st update_oin release for
+					  * an inode. */
+	struct inode *curr_inode = NULL;
+	struct list_head *p, *n;
+
+	/* 1st pass: optimize away all UPDATE_OIN messages into one.
+	 * 2 types of UPDATE_OIN messages as of now: with size change
+	 * (truncate / extend) or without size change. We will take
+	 * *all* of them (regardless of type) and cram it into one
+	 * message. */
+	curr_inode = NULL;
+	list_for_each_safe(p, n, &journal->checkpointing_locks) {
+		lock = list_entry(p, ocfs_journal_lock, lock_list);
+
+		if (lock->inode != curr_inode) {
+			/* Ok, new inode. */
+			first = NULL;
+
+			curr_inode = lock->inode;
+		}
+
+		/* if it's not an update oin then just continue */
+		if (!(lock->flags & FLAG_FILE_UPDATE_OIN))
+			continue;
+
+		if (!first) {
+			first = lock;
+			/* nothing else to do to the 1st update release. */
+			continue;
+		}
+
+		/* only the 1st update_oin lock stays -- the
+		 * rest are either num_ident into it or
+		 * dropped */
+		list_del(&lock->lock_list);
+		atomic_dec(&journal->num_chkpt_locks);
+
+		/* we only incement first->num_ident if the current
+		 * lock has a size change AND first has recorded at
+		 * least one size change (which can include
+		 * itself). Otherwise we'll have an off by one as a
+		 * first with no size change recorded will keep at
+		 * least 1 reference for itself. */
+		if ((lock->flags & FLAG_FILE_EXTEND) 
+		    || (lock->flags & FLAG_FILE_TRUNCATE)) {
+			if ((first->flags & FLAG_FILE_EXTEND) 
+			    || (first->flags & FLAG_FILE_TRUNCATE))
+				first->num_ident++;
+		} else {
+			/* Ok, no size change on this particular lock,
+			 * so we're discarding it without updating the
+			 * num_ident value -- that's fine but we need
+			 * to manually drop the lockres->lock_holders
+			 * value on it's behalf */
+			first->drop_holders++;
+		}
+
+		/* record a size change in first if there was one. */
+		if (lock->flags & FLAG_FILE_EXTEND)
+			first->flags = first->flags | FLAG_FILE_EXTEND;
+		if (lock->flags & FLAG_FILE_TRUNCATE)
+			first->flags = first->flags | FLAG_FILE_TRUNCATE;
+
+		if (!first->bh)
+			first->bh = lock->bh;
+		else
+			brelse(lock->bh);
+		iput(lock->inode);
+
+		kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
+	}
+
+	return;
+}
+
 /* 
  * ocfs_commit_cache()
  *
@@ -104,6 +184,7 @@
 	ocfs_journal_lock *lock = NULL;
 	struct inode *inode;
 	ocfs_inode_private *ip;
+	ocfs_lock_res *lockres;
 
 	LOG_ENTRY_ARGS("(data_flush = %u)\n", data_flush);
 
@@ -170,6 +251,12 @@
 	journal_unlock_updates(journal->k_journal);
 	up_write(&journal->trans_barrier);
 
+	ocfs_journal_optimize_lock_list(journal);
+#ifdef VERBOSE_COMMIT_THREAD
+	printk("(%u) commit_thread: after optimization, %u locks to release\n",
+	       current->pid, atomic_read(&journal->num_chkpt_locks));
+#endif
+
 	p = n = NULL;
 	list_for_each_safe(p, n, &journal->checkpointing_locks) {
 		if (!atomic_read(&journal->num_chkpt_locks))
@@ -192,11 +279,12 @@
 		down_write(&OCFS_I(lock->inode)->ip_io_sem);
 		tmpstat = 0;
 		if (!INODE_DELETED(lock->inode))
-			tmpstat = ocfs_release_lock(osb, 
-						    lock->type,
-						    lock->flags,
-						    lock->bh,
-						    lock->inode);
+			tmpstat = ocfs_release_lock_full(osb, 
+							 lock->type,
+							 lock->flags,
+							 lock->bh,
+							 lock->inode,
+							 lock->num_ident);
 		else
 			LOG_ERROR_ARGS("commit_thread: Skipping delete for "
 				       "inode %llu!\n", 
@@ -207,6 +295,14 @@
 				       " %d releasing lock on inode %llu!\n",
 				       tmpstat, OCFS_I(lock->inode)->ip_blkno);
 
+		if (lock->drop_holders) {
+			lockres = GET_INODE_LOCKRES(lock->inode);
+			ocfs_acquire_lockres(lockres, 0);
+			OCFS_ASSERT(lockres->lock_holders >= lock->drop_holders);
+			lockres->lock_holders -= lock->drop_holders;
+			ocfs_release_lockres(lockres);
+		}
+
 		if (lock->bh != NULL)
 			brelse(lock->bh);
 		iput(lock->inode);
@@ -994,6 +1090,12 @@
 	lock->inode = inode;
 	lock->req_io_sem  = req_io_sem;
 
+	/* stuff for commit thread optimization. */
+	lock->num_ident = 1;
+	/* this is for *additional* decrements of lock_holders, not
+	 * the one given by ocfs_release_lock... */
+	lock->drop_holders = 0;
+
 	if (bh)
 		get_bh(bh);
 

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/nm.c	2004-07-22 20:55:01 UTC (rev 1296)
@@ -77,7 +77,7 @@
 static void ocfs_mark_inode_for_extend(ocfs_super *osb, struct inode *inode,
 				       __u32 node_num);
 static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
-					__u32 node_num);
+					__u32 node_num, u32 num_rel);
 
 static void ocfs_process_vote_worker(void *val);
 
@@ -616,7 +616,7 @@
 }
 
 static void ocfs_clear_inode_for_extend(ocfs_super *osb, struct inode *inode,
-					__u32 node_num)
+					__u32 node_num, u32 num_rel)
 {
 	int dec = 0;
 
@@ -630,7 +630,7 @@
 	down(&recovery_list_sem);
 	spin_lock(&oin_num_ext_lock);
 
-	OCFS_I(inode)->ip_num_extends--;
+	OCFS_I(inode)->ip_num_extends -= num_rel;
 
 	if (OCFS_I(inode)->ip_num_extends < 0)
 		BUG();
@@ -704,7 +704,7 @@
 	int status = 0;
 	int tmpstat = 0;
 	ocfs_lock_res *lockres = NULL;
-	__u32 flags;
+	__u32 flags, num_ident;
 	__u16 num_nodes;
 	ocfs2_dinode *fe = NULL;
 	ocfs_vote *vote = NULL;
@@ -741,13 +741,21 @@
 		flags = publish->vote_type;
 		lock_id = publish->lock_id;
 		seq_num = publish->publ_seq_num;
+		num_ident = publish->num_ident;
 	} else {
 		ocfs_dlm_req_master *req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
 		flags = req_master->flags;
 		lock_id = req_master->lock_id;
 		seq_num = req_master->lock_seq_num;
+		num_ident = req_master->num_ident;
 	}
-	
+
+	if (!num_ident) {
+		printk("flags = 0x%x, lock_id = %llu, node_num = %u\n",
+		       flags, lock_id, node_num);
+		BUG();
+	}
+
 	LOG_TRACE_ARGS ("node=%u, id=%llu, seq=%llu\n", node_num,
 			lock_id, seq_num);
 
@@ -822,10 +830,10 @@
 					    status, &master_alive, inode);
 
 #ifdef VERBOSE_PROCESS_VOTE
-	printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s\n", current->pid,
+	printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s, num_ident = %u\n", current->pid,
 	       flags & FLAG_RELEASE_LOCK ? "RELEASE" : 
 	       (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
- 	       vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote" );
+ 	       vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote", num_ident);
 	if (vote_type == INVALID_REQUEST)
 		printk("Invalid request! flags = 0x%x\n", flags);
 
@@ -1254,7 +1262,8 @@
 		if (flags & FLAG_ACQUIRE_LOCK)
 			ocfs_mark_inode_for_extend(osb, inode, node_num);
 		else if (flags & FLAG_RELEASE_LOCK)
-			ocfs_clear_inode_for_extend(osb, inode, node_num);
+			ocfs_clear_inode_for_extend(osb, inode, node_num, 
+						    num_ident);
 		else {
 			printk("uhoh, bad vote flags! 0x%x\n", flags);
 			BUG();

Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/ocfs.h	2004-07-22 20:55:01 UTC (rev 1296)
@@ -723,6 +723,7 @@
 	__u32 flags;
 	__u8 open_handle;
 	__u8 odmh_pad[3];
+	__u32 num_ident; /* number of identical messages, always >= 1 */
 } ocfs_dlm_msg_hdr;
 
 typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master;

Modified: trunk/src/ocfs2_disk_dlm.h
===================================================================
--- trunk/src/ocfs2_disk_dlm.h	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/ocfs2_disk_dlm.h	2004-07-22 20:55:01 UTC (rev 1296)
@@ -114,7 +114,8 @@
 	__u64 lock_id;		/* Lock vote is requested for */
 	/* last seq num used in comm voting */
 /*60*/	__u64 comm_seq_num;
-/*68*/
+	__u32 num_ident;
+/*72*/
 } ocfs_publish;
 
 typedef struct _ocfs_vote

Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/ocfs_journal.h	2004-07-22 20:55:01 UTC (rev 1296)
@@ -147,11 +147,17 @@
 
 typedef struct _ocfs_journal_lock ocfs_journal_lock;
 struct _ocfs_journal_lock {
+	/* release_lock arguments. */
 	__u32 type;
 	__u32 flags;
 	struct buffer_head *bh;
 	struct inode *inode;
+	unsigned int num_ident;
+
+	/* used only for immediate checkpointing transactions */
 	int req_io_sem;
+	/* used by commit_cache */
+	unsigned int drop_holders;
 	/* lock_list: we are either on 
 	 *  - handle->locks: if still running
 	 *  - inode->ip_pending_locks: if waiting for checkpoint

Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/vote.c	2004-07-22 20:55:01 UTC (rev 1296)
@@ -853,7 +853,7 @@
  * ocfs_send_dlm_request_msg()
  * inode is definitely non NULL
  */
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, int *vote_status)
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, __u32 num_ident, int *vote_status)
 {
 	int status = 0;
 	ocfs_dlm_msg *dlm_msg = NULL;
@@ -883,6 +883,7 @@
 
 	req->lock_id = lock_id;
 	req->flags = flags;
+	req->num_ident = num_ident;
 
 #ifdef VERBOSE_LOCKING_TRACE
 	printk("ocfs_send_dlm_request_msg: inode=%p, lockid = %llu\n",

Modified: trunk/src/vote.h
===================================================================
--- trunk/src/vote.h	2004-07-22 20:49:45 UTC (rev 1295)
+++ trunk/src/vote.h	2004-07-22 20:55:01 UTC (rev 1296)
@@ -45,7 +45,8 @@
 int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, 
 			       __u32 lock_type, __u32 flags, 
 			       ocfs_node_map *votemap, ocfs_node_map *openmap, 
-			       struct inode *inode, int *vote_status);
+			       struct inode *inode, __u32 num_ident, 
+			       int *vote_status);
 int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
 			 __u32 vote_status, int inode_open);
 int ocfs_lookup_vote_request_obj (ocfs_super *osb, 



More information about the Ocfs2-commits mailing list