[Ocfs2-commits] khackel commits r2428 - trunk/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Tue Jun 28 16:07:48 CDT 2005


Author: khackel
Signed-off-by: mfasheh
Date: 2005-06-24 16:56:03 -0500 (Fri, 24 Jun 2005)
New Revision: 2428

Modified:
   trunk/fs/ocfs2/dlm/dlmmaster.c
   trunk/fs/ocfs2/dlm/dlmrecovery.c
Log:
* adds -EINVAL (from net code when socket is gone) to dlm_is_host_down
  error codes
* fixes __dlm_hb_node_down: make sure dead node hasn't already been
  removed from live_nodes_map, make sure dead node is in domain_map
* adds some extra debugging to the non-master end of the begin-
  recovery message

Signed-off-by: mfasheh



Modified: trunk/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmaster.c	2005-06-24 18:31:18 UTC (rev 2427)
+++ trunk/fs/ocfs2/dlm/dlmmaster.c	2005-06-24 21:56:03 UTC (rev 2428)
@@ -166,6 +166,8 @@
 		case -ENETUNREACH:
 		case -ENETRESET:
 		case -ESHUTDOWN:
+		case -EINVAL:   /* if returned from our tcp code,
+				   this means there is no socket */
 			return 1;
 	}
 	return 0;

Modified: trunk/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-06-24 18:31:18 UTC (rev 2427)
+++ trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-06-24 21:56:03 UTC (rev 2428)
@@ -1697,6 +1697,22 @@
 
 	assert_spin_locked(&dlm->spinlock);
 
+	/* check to see if the node is already considered dead */
+	if (!test_bit(idx, dlm->live_nodes_map)) {
+		mlog(0, "for domain %s, node %d is already dead. "
+		     "another node likely did recovery already.\n",
+		     dlm->name, idx);
+		return;
+	}
+
+	/* check to see if we do not care about this node */
+	if (!test_bit(idx, dlm->domain_map)) {
+		/* This also catches the case that we get a node down
+		 * but haven't joined the domain yet. */
+		mlog(0, "node %u already removed from domain!\n", idx);
+		return;
+	}
+
 	clear_bit(idx, dlm->live_nodes_map);
 
 	/* Clean up join state on node death. */
@@ -1715,18 +1731,13 @@
 		dlm_mle_node_down(dlm, mle, NULL, idx);
 	}
 
-	if (!test_bit(idx, dlm->domain_map)) {
-		/* This also catches the case that we get a node down
-		 * but haven't joined the domain yet. */
-		mlog(0, "node %u already removed from domain!\n", idx);
-		return;
-	}
 
 	mlog(0, "node %u being removed from domain map!\n", idx);
 	clear_bit(idx, dlm->domain_map);
 
 	if (test_bit(idx, dlm->recovery_map))
-		mlog(0, "node %u already added to recovery map!\n", idx);
+		mlog(0, "domain %s, node %u already added "
+		     "to recovery map!\n", dlm->name, idx);
 	else
 		set_bit(idx, dlm->recovery_map);
 }
@@ -1897,7 +1908,18 @@
 		if (ret >= 0)
 			ret = status;
 		if (ret < 0) {
+			dlm_lock_resource *res;
 			mlog_errno(ret);
+			mlog(ML_ERROR, "begin reco of dlm %s to node %u "
+			    " returned %d\n", dlm->name, nodenum, ret);
+			res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
+						 DLM_RECOVERY_LOCK_NAME_LEN);
+			if (res) {
+				dlm_print_one_lock_resource(res);
+				dlm_lockres_put(res);
+			} else {
+				mlog(ML_ERROR, "recovery lock not found\n");
+			}
 			break;
 		}
 	}



More information about the Ocfs2-commits mailing list