[Ocfs2-commits] khackel commits r2730 - branches/ocfs2-1.2/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Fri Dec 9 16:40:10 CST 2005


Author: khackel
Signed-off-by: mfasheh
Date: 2005-12-09 16:40:09 -0600 (Fri, 09 Dec 2005)
New Revision: 2730

Modified:
   branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c
Log:
* Merge revision 2729 from trunk
	- fixes a hang in lock mastery that occurs when the "expected master" goes
	  down during mastery

Signed-off-by: mfasheh



Modified: branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c	2005-12-09 22:39:03 UTC (rev 2729)
+++ branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c	2005-12-09 22:40:09 UTC (rev 2730)
@@ -217,7 +217,7 @@
 static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
 				     struct dlm_lock_resource *res,
 				     struct dlm_master_list_entry *mle,
-				     int blocked);
+				     int *blocked);
 static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
 				    struct dlm_lock_resource *res,
 				    struct dlm_master_list_entry *mle,
@@ -670,6 +670,7 @@
 	int ret, nodenum;
 	struct dlm_node_iter iter;
 	unsigned int namelen;
+	int tries = 0;
 
 	BUG_ON(!lockid);
 
@@ -791,20 +792,21 @@
 
 wait:
 	/* keep going until the response map includes all nodes */
-	ret = dlm_wait_for_lock_mastery(dlm, res, mle, blocked);
+	ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
 	if (ret < 0) {
-		if (blocked) {
-		       	if (mle->type == DLM_MLE_MASTER) {
-				mlog(0, "mle changed to a MASTER due "
-				     "to node death. restart.\n");
-				goto redo_request;
-			}
-			/* should never happen for a BLOCK */
-			mlog(ML_ERROR, "mle type=%d\n", mle->type);
-			BUG();
+		mlog(0, "%s:%.*s: node map changed, redo the "
+		     "master request now, blocked=%d\n",
+		     dlm->name, res->lockname.len,
+		     res->lockname.name, blocked);
+		if (++tries > 20) {
+			mlog(ML_ERROR, "%s:%.*s: spinning on "
+			     "dlm_wait_for_lock_mastery, blocked=%d\n", 
+			     dlm->name, res->lockname.len, 
+			     res->lockname.name, blocked);
+			dlm_print_one_lock_resource(res);
+			dlm_print_one_mle(mle);
+			tries = 0;
 		}
-		mlog(0, "node map changed, redo the "
-		     "master request now\n");
 		goto redo_request;
 	}
 
@@ -838,7 +840,7 @@
 static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
 				     struct dlm_lock_resource *res,
 				     struct dlm_master_list_entry *mle,
-				     int blocked)
+				     int *blocked)
 {
 	u8 m;
 	int ret, bit;
@@ -867,21 +869,24 @@
 	/* restart if we hit any errors */
 	if (map_changed) {
 		int b;
-		mlog(0, "node map changed, restarting\n");
-		ret = dlm_restart_lock_mastery(dlm, res, mle, blocked);
+		mlog(0, "%s: %.*s: node map changed, restarting\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
+		ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
 		b = (mle->type == DLM_MLE_BLOCK);
-		if ((blocked && !b) || (!blocked && b)) {
+		if ((*blocked && !b) || (!*blocked && b)) {
 			mlog(0, "%s:%.*s: status change: old=%d new=%d\n", 
 			     dlm->name, res->lockname.len, res->lockname.name,
-			     blocked, b);
-			blocked = b;
+			     *blocked, b);
+			*blocked = b;
 		}
 		spin_unlock(&mle->spinlock);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto leave;
 		}
-		mlog(0, "restart lock mastery succeeded, rechecking now\n");
+		mlog(0, "%s:%.*s: restart lock mastery succeeded, "
+		     "rechecking now\n", dlm->name, res->lockname.len,
+		     res->lockname.name);
 		goto recheck;
 	}
 
@@ -892,7 +897,7 @@
 	} else {
 		sleep = 1;
 		/* have all nodes responded? */
-		if (voting_done && !blocked) {
+		if (voting_done && !*blocked) {
 			bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
 			if (dlm->node_num <= bit) {
 				/* my node number is lowest.



More information about the Ocfs2-commits mailing list