[Ocfs2-commits] khackel commits r2718 - trunk/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Dec 7 01:40:00 CST 2005


Author: khackel
Signed-off-by: mfasheh
Date: 2005-12-07 01:39:58 -0600 (Wed, 07 Dec 2005)
New Revision: 2718

Modified:
   trunk/fs/ocfs2/dlm/dlmrecovery.c
Log:
* fixes Bug 577 - System crash at dlm_reco_data_done_handler 	
* in dlm_remaster_locks, make sure to handle node-down cases of 
  dlm_request_all_locks separately from other errors
* node death during recovery can be recovered, versus other more serious errors

Signed-off-by: mfasheh



Modified: trunk/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-12-07 07:34:21 UTC (rev 2717)
+++ trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-12-07 07:39:58 UTC (rev 2718)
@@ -411,8 +411,13 @@
 
 		status = dlm_request_all_locks(dlm, ndata->node_num, dead_node);
 		if (status < 0) {
-			destroy = 1;
-			goto leave;
+			mlog_errno(status);
+			if (dlm_is_host_down(status))
+				ndata->state = DLM_RECO_NODE_DATA_DEAD;
+			else {
+				destroy = 1;
+				goto leave;
+			}
 		}
 
 		switch (ndata->state) {
@@ -609,6 +614,8 @@
 	ret = DLM_NOLOCKMGR;
 	ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
 				 &lr, sizeof(lr), request_from, NULL);
+
+	/* negative status is handled by caller */
 	if (ret < 0)
 		mlog_errno(ret);
 
@@ -720,6 +727,7 @@
 
 	ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
 				 sizeof(done_msg), send_to, &tmpret);
+	/* negative status is ignored by the caller */
 	if (ret >= 0)
 		ret = tmpret;
 	return ret;
@@ -858,6 +866,8 @@
 	ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
 				 sz, send_to, &status);
 	if (ret < 0) {
+		/* XXX: negative status is not handled.
+		 * this will end up killing this node. */
 		mlog_errno(ret);
 	} else {
 		/* might get an -ENOMEM back here */
@@ -1278,6 +1288,7 @@
 
 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
 				 &req, sizeof(req), nodenum, &status);
+	/* XXX: negative status not handled properly here. */
 	if (ret < 0)
 		mlog_errno(ret);
 	else {
@@ -1977,6 +1988,7 @@
 			  nodenum);
 		ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
 					 &br, sizeof(br), nodenum, &status);
+		/* negative status is handled ok by caller here */
 		if (ret >= 0)
 			ret = status;
 		if (ret < 0) {
@@ -2061,8 +2073,17 @@
 			continue;
 		ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
 					 &fr, sizeof(fr), nodenum, &status);
-		if (ret >= 0)
+		if (ret >= 0) {
 			ret = status;
+			if (dlm_is_host_down(ret)) {
+				/* this has no effect on this recovery 
+				 * session, so set the status to zero to 
+				 * finish out the last recovery */
+				mlog(ML_ERROR, "node %u went down after this "
+				     "node finished recovery.\n", nodenum);
+				ret = 0;
+			}
+		}
 		if (ret < 0) {
 			mlog_errno(ret);
 			break;



More information about the Ocfs2-commits mailing list