[Ocfs2-commits] khackel commits r2209 - trunk/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Sun May 1 17:14:23 CDT 2005


Author: khackel
Signed-off-by: manish
Date: 2005-05-01 17:14:21 -0500 (Sun, 01 May 2005)
New Revision: 2209

Modified:
   trunk/fs/ocfs2/dlm/dlmcommon.h
   trunk/fs/ocfs2/dlm/dlmmaster.c
   trunk/fs/ocfs2/dlm/dlmrecovery.c
Log:
* fix bug in dlm_assert_master_handler that would prevent mounting >2 nodes

Signed-off-by: manish



Modified: trunk/fs/ocfs2/dlm/dlmcommon.h
===================================================================
--- trunk/fs/ocfs2/dlm/dlmcommon.h	2005-04-30 02:45:31 UTC (rev 2208)
+++ trunk/fs/ocfs2/dlm/dlmcommon.h	2005-05-01 22:14:21 UTC (rev 2209)
@@ -159,6 +159,7 @@
 {
 	dlm_lock_resource *lockres;
 	u8 request_from;
+	u32 flags;
 	unsigned ignore_higher:1;
 } dlm_assert_master_priv;
 
@@ -387,6 +388,9 @@
 	u8 name[NM_MAX_NAME_LEN];
 } dlm_master_request;
 
+#define DLM_ASSERT_MASTER_MLE_CLEANUP      0x00000001
+#define DLM_ASSERT_MASTER_REQUERY          0x00000002
+#define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004
 typedef struct _dlm_assert_master
 {
 	u8 node_idx;
@@ -881,7 +885,8 @@
 int dlm_finalize_reco_handler(net_msg *msg, u32 len, void *data);
 
 int dlm_dispatch_assert_master(dlm_ctxt *dlm, dlm_lock_resource *res,
-			       int ignore_higher, u8 request_from);
+			       int ignore_higher, u8 request_from, 
+			       u32 flags);
 void dlm_assert_master_worker(dlm_work_item *item, void *data);
 
 
@@ -911,7 +916,8 @@
 void dlm_mle_node_up(dlm_ctxt *dlm, dlm_master_list_entry *mle,
 		       struct nm_node *node, int idx);
 int dlm_do_assert_master(dlm_ctxt *dlm, const char *lockname, 
-			 unsigned int namelen, void *nodemap);
+			 unsigned int namelen, void *nodemap,
+			 u32 flags);
 int dlm_do_migrate_request(dlm_ctxt *dlm, dlm_lock_resource *res, 
 			   u8 master, u8 new_master, dlm_node_iter *iter);
 void dlm_clean_master_list(dlm_ctxt *dlm, u8 dead_node);

Modified: trunk/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmaster.c	2005-04-30 02:45:31 UTC (rev 2208)
+++ trunk/fs/ocfs2/dlm/dlmmaster.c	2005-05-01 22:14:21 UTC (rev 2209)
@@ -358,6 +358,14 @@
 	DLM_ASSERT(mle->dlm);
 	dlm = mle->dlm;
 
+	if (mle->type == DLM_MLE_BLOCK) {
+		mlog(0, "calling mle_release for %.*s, type %d\n", 
+		     mle->u.name.len, mle->u.name.name, mle->type);
+	} else {
+		mlog(0, "calling mle_release for %.*s, type %d\n", 
+		     mle->u.res->lockname.len, 
+		     mle->u.res->lockname.name, mle->type);
+	}
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&dlm->master_lock);
 
@@ -785,7 +793,7 @@
 	if (assert) {
 		m = dlm->node_num;
 		ret = dlm_do_assert_master(dlm, res->lockname.name, 
-					   res->lockname.len, mle->vote_map);
+					   res->lockname.len, mle->vote_map, 0);
 		if (ret) {
 			mlog_errno(ret);
 
@@ -938,6 +946,7 @@
 		}
 
 		if (res->owner == dlm->node_num) {
+			u32 flags = DLM_ASSERT_MASTER_MLE_CLEANUP;
 			spin_unlock(&res->spinlock);
 			// mlog(0, "this node is the master\n");
 			response = DLM_MASTER_RESP_YES;
@@ -951,7 +960,8 @@
 			 * create mles.  this node now needs to
 			 * go back and clean those up. */
 			ret = dlm_dispatch_assert_master(dlm, res, 1, 
-							 request->node_idx);
+							 request->node_idx,
+							 flags);
 			if (ret < 0) {
 				mlog(ML_ERROR, "failed to dispatch assert "
 				     "master work\n");
@@ -1096,7 +1106,8 @@
  * and re-assert across the cluster...
  */
 int dlm_do_assert_master(dlm_ctxt *dlm, const char *lockname, 
-			 unsigned int namelen, void *nodemap)
+			 unsigned int namelen, void *nodemap,
+			 u32 flags)
 {
 	dlm_assert_master assert;
 	int to, tmpret;
@@ -1112,11 +1123,13 @@
 	dlm_node_iter_init(nodemap, &iter);
 	while ((to = dlm_node_iter_next(&iter)) >= 0) {
 		int r = 0;
-		// mlog(0, "sending assert master to %d\n", to);
+		mlog(0, "sending assert master to %d (%.*s)\n", to,
+		     namelen, lockname);
 		memset(&assert, 0, sizeof(assert));
 		assert.node_idx = dlm->node_num;
 		assert.namelen = namelen;
 		strncpy(assert.name, lockname, namelen);
+		assert.flags = flags;
 
 		dlm_assert_master_to_net(&assert);
 		tmpret = net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 
@@ -1155,6 +1168,7 @@
 	dlm_lock_resource *res;
 	char *name;
 	unsigned int namelen;
+	u32 flags;
 
 	if (!dlm_grab(dlm))
 		return 0;
@@ -1162,6 +1176,7 @@
 	dlm_assert_master_to_host(assert);	
 	name = assert->name;
 	namelen = assert->namelen;
+	flags = assert->flags;
 
 	if (namelen > DLM_LOCKID_NAME_MAX) {
 		mlog(ML_ERROR, "Invalid name length!");
@@ -1170,22 +1185,32 @@
 
 	spin_lock(&dlm->spinlock);
 
+	if (flags)
+		mlog(0, "assert_master with flags: %u\n", flags);
+
 	/* find the MLE */
 	spin_lock(&dlm->master_lock);
 	if (!dlm_find_mle(dlm, &mle, name, namelen)) {
 		mlog(0, "just got an assert_master from %u, but no "
-		     "MLE for it!\n", assert->node_idx);
+		     "MLE for it! (%.*s)\n", assert->node_idx,
+		     namelen, name);
 	} else {
 		int bit = find_next_bit (mle->maybe_map, NM_MAX_NODES, 0);
 		if (bit >= NM_MAX_NODES) {
 			mlog(ML_ERROR, "no bits set in the maybe_map, but %u "
-			     "is asserting!\n", assert->node_idx);
+			     "is asserting! (%.*s)\n", assert->node_idx,
+			     namelen, name);
 			BUG();
 		} else if (bit != assert->node_idx) {
-			/* TODO: is this ok?  */
-			mlog(ML_ERROR, "expected %u to be the master, but %u "
-			     "is asserting!\n", bit, assert->node_idx);
-			BUG();
+			if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
+				mlog(0, "master %u was found, %u should "
+				     "back off\n", assert->node_idx, bit);
+			} else {
+				mlog(ML_ERROR, "expected %u to be the master, "
+				     "but %u is asserting! (%.*s)\n", bit, 
+				     assert->node_idx, namelen, name);
+				BUG();
+			}
 		}
 	}
 	spin_unlock(&dlm->master_lock);
@@ -1199,21 +1224,26 @@
 		if (!mle) {
 			if (res->owner != assert->node_idx) {
 				mlog(ML_ERROR, "assert_master from "
-					  "%u, but current owner is %u!\n",
-				       assert->node_idx, res->owner);
+					  "%u, but current owner is "
+					  "%u! (%.*s)\n",
+				       assert->node_idx, res->owner,
+				       namelen, name);
 				BUG();
 			}
 		} else if (mle->type != DLM_MLE_MIGRATION) {
 			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
 				mlog(ML_ERROR, "got assert_master from "
-				     "node %u, but %u is the owner!\n",
-				     assert->node_idx, res->owner);
+				     "node %u, but %u is the owner! "
+				     "(%.*s)\n", assert->node_idx, 
+				     res->owner, namelen, name);
 				BUG();
 			}
 			if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
 				mlog(ML_ERROR, "got assert from %u, but lock "
 				     "with no owner should be "
-				     "in-progress!\n", assert->node_idx);
+				     "in-progress! (%.*s)\n", 
+				     assert->node_idx,
+				     namelen, name);
 				BUG();
 			}
 		} else /* mle->type == DLM_MLE_MIGRATION */ {
@@ -1221,8 +1251,9 @@
 			if (assert->node_idx != mle->new_master) {
 				mlog(ML_ERROR, "got assert from %u, but "
 				     "new master is %u, and old master "
-				     "was %u\n", assert->node_idx, 
-				     mle->new_master, mle->master);
+				     "was %u (%.*s)\n", 
+				     assert->node_idx, mle->new_master,
+				     mle->master, namelen, name);
 				BUG();
 			}
 
@@ -1263,7 +1294,7 @@
 }
 
 int dlm_dispatch_assert_master(dlm_ctxt *dlm, dlm_lock_resource *res, 
-			       int ignore_higher, u8 request_from)
+			       int ignore_higher, u8 request_from, u32 flags)
 {
 	dlm_work_item *item;
 	item = (dlm_work_item *)kmalloc(sizeof(dlm_work_item), GFP_KERNEL);
@@ -1279,6 +1310,7 @@
 	/* can optionally ignore node numbers higher than this node */
 	item->u.am.ignore_higher = ignore_higher;
 	item->u.am.request_from = request_from;
+	item->u.am.flags = flags;
 	
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
@@ -1297,6 +1329,7 @@
 	int ignore_higher;
 	int bit;
 	u8 request_from;
+	u32 flags;
 
 	DLM_ASSERT(item);
 	dlm = item->dlm;
@@ -1307,6 +1340,7 @@
 
 	ignore_higher = item->u.am.ignore_higher;
 	request_from = item->u.am.request_from;
+	flags = item->u.am.flags;
 
 	do {
 		spin_lock(&dlm->spinlock);
@@ -1331,7 +1365,7 @@
 
 		ret = dlm_do_assert_master(dlm, res->lockname.name,
 					   res->lockname.len, 
-					   nodemap);
+					   nodemap, flags);
 		if (ret < 0) {
 			/* no choice but to try again.
 			 * maybe a node died. */ 
@@ -2015,7 +2049,8 @@
 retry:
 	mlog(0, "doing assert master to all except the original node\n");
 	ret = dlm_do_assert_master(dlm, res->lockname.name, 
-				   res->lockname.len, iter.node_map);
+				   res->lockname.len, iter.node_map,
+				   DLM_ASSERT_MASTER_FINISH_MIGRATION);
 	if (ret < 0) {
 		mlog_errno(ret);
 
@@ -2032,7 +2067,8 @@
 	set_bit(old_master, iter.node_map);
 	mlog(0, "doing assert master back to %u\n", old_master);
 	ret = dlm_do_assert_master(dlm, res->lockname.name,
-				   res->lockname.len, iter.node_map);
+				   res->lockname.len, iter.node_map,
+				   DLM_ASSERT_MASTER_FINISH_MIGRATION);
 	if (ret < 0) {
 		mlog(0, "assert master to original master failed "
 		     "with %d.\n", ret);

Modified: trunk/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-04-30 02:45:31 UTC (rev 2208)
+++ trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-05-01 22:14:21 UTC (rev 2209)
@@ -1253,6 +1253,7 @@
 	dlm_master_requery *req = (dlm_master_requery *)msg->buf;
 	dlm_lock_resource *res = NULL;
 	int master = DLM_LOCK_RES_OWNER_UNKNOWN;
+	u32 flags = DLM_ASSERT_MASTER_REQUERY;
 	
 	if (!dlm_grab(dlm)) {
 		/* since the domain has gone away on this 
@@ -1268,7 +1269,8 @@
 		spin_lock(&res->spinlock);
 		master = res->owner;
 		if (master == dlm->node_num) {
-			int ret = dlm_dispatch_assert_master(dlm, res, 0, 0);
+			int ret = dlm_dispatch_assert_master(dlm, res, 
+							     0, 0, flags);
 			if (ret < 0) {
 				mlog_errno(-ENOMEM);
 				/* retry!? */



More information about the Ocfs2-commits mailing list