[Ocfs2-commits] khackel commits r2719 - branches/ocfs2-1.2/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Dec 7 01:53:59 CST 2005


Author: khackel
Signed-off-by: mfasheh
Date: 2005-12-07 01:53:57 -0600 (Wed, 07 Dec 2005)
New Revision: 2719

Modified:
   branches/ocfs2-1.2/fs/ocfs2/dlm/dlmdomain.c
   branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c
   branches/ocfs2-1.2/fs/ocfs2/dlm/dlmrecovery.c
Log:
* Merge r2714:2718 from trunk
	- 2718: fixes Bug 577 - System crash at dlm_reco_data_done_handler
	- 2717: grab a reference to each lockres to avoid null pointer deref
	- 2716: fix bug 572, hang on umount
	- 2715: fixes crash in debug mle printing code
	- 2714: fixes bug621, do not send message to self to avoid -ELOOP

Signed-off-by: mfasheh



Modified: branches/ocfs2-1.2/fs/ocfs2/dlm/dlmdomain.c
===================================================================
--- branches/ocfs2-1.2/fs/ocfs2/dlm/dlmdomain.c	2005-12-07 07:39:58 UTC (rev 2718)
+++ branches/ocfs2-1.2/fs/ocfs2/dlm/dlmdomain.c	2005-12-07 07:53:57 UTC (rev 2719)
@@ -314,6 +314,8 @@
 		while (!list_empty(&dlm->resources[i])) {
 			res = list_entry(dlm->resources[i].next,
 				     struct dlm_lock_resource, list);
+			/* need reference when manually grabbing lockres */
+			dlm_lockres_get(res);
 			/* this should unhash the lockres
 			 * and exit with dlm->spinlock */
 			mlog(0, "purging res=%p\n", res);
@@ -327,9 +329,11 @@
 				spin_unlock(&dlm->spinlock);
 				dlm_kick_thread(dlm, res);
 				wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
+				dlm_lockres_put(res);
 				goto restart;
 			}
 			dlm_purge_lockres(dlm, res);
+			dlm_lockres_put(res);
 		}
 	}
 	spin_unlock(&dlm->spinlock);

Modified: branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c	2005-12-07 07:39:58 UTC (rev 2718)
+++ branches/ocfs2-1.2/fs/ocfs2/dlm/dlmmaster.c	2005-12-07 07:53:57 UTC (rev 2719)
@@ -134,12 +134,17 @@
 	struct kref *k;
 
 	k = &mle->mle_refs;
-	type = (mle->type == DLM_MLE_BLOCK ? "BLK" : "MAS");
+	if (mle->type == DLM_MLE_BLOCK)
+		type = "BLK";
+	else if (mle->type == DLM_MLE_MASTER)
+		type = "MAS";
+	else
+		type = "MIG";
 	refs = atomic_read(&k->refcount);
 	master = mle->master;
 	attached = (list_empty(&mle->hb_events) ? 'N' : 'Y');
 
-	if (mle->type == DLM_MLE_BLOCK) {
+	if (mle->type != DLM_MLE_MASTER) {
 		namelen = mle->u.name.len;
 		name = mle->u.name.name;
 	} else {
@@ -474,7 +479,7 @@
 	mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
 	dlm = mle->dlm;
 
-	if (mle->type == DLM_MLE_BLOCK) {
+	if (mle->type != DLM_MLE_MASTER) {
 		mlog(0, "calling mle_release for %.*s, type %d\n",
 		     mle->u.name.len, mle->u.name.name, mle->type);
 	} else {
@@ -748,6 +753,7 @@
 		/* make sure this does not get freed below */
 		alloc_mle = NULL;
 		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
+		set_bit(dlm->node_num, mle->maybe_map);
 		list_add(&mle->list, &dlm->master_list);
 	}
 
@@ -860,8 +866,16 @@
 
 	/* restart if we hit any errors */
 	if (map_changed) {
+		int b;
 		mlog(0, "node map changed, restarting\n");
 		ret = dlm_restart_lock_mastery(dlm, res, mle, blocked);
+		b = (mle->type == DLM_MLE_BLOCK);
+		if ((blocked && !b) || (!blocked && b)) {
+			mlog(0, "%s:%.*s: status change: old=%d new=%d\n", 
+			     dlm->name, res->lockname.len, res->lockname.name,
+			     blocked, b);
+			blocked = b;
+		}
 		spin_unlock(&mle->spinlock);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -1092,6 +1106,7 @@
 				memcpy(mle->vote_map, mle->node_map,
 				       sizeof(mle->node_map));
 				mle->u.res = res;
+				set_bit(dlm->node_num, mle->maybe_map);
 
 				ret = -EAGAIN;
 				goto next;
@@ -1136,7 +1151,7 @@
 
 	BUG_ON(mle->type == DLM_MLE_MIGRATION);
 
-	if (mle->type == DLM_MLE_BLOCK) {
+	if (mle->type != DLM_MLE_MASTER) {
 		request.namelen = mle->u.name.len;
 		memcpy(request.name, mle->u.name.name, request.namelen);
 	} else {

Modified: branches/ocfs2-1.2/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- branches/ocfs2-1.2/fs/ocfs2/dlm/dlmrecovery.c	2005-12-07 07:39:58 UTC (rev 2718)
+++ branches/ocfs2-1.2/fs/ocfs2/dlm/dlmrecovery.c	2005-12-07 07:53:57 UTC (rev 2719)
@@ -411,8 +411,13 @@
 
 		status = dlm_request_all_locks(dlm, ndata->node_num, dead_node);
 		if (status < 0) {
-			destroy = 1;
-			goto leave;
+			mlog_errno(status);
+			if (dlm_is_host_down(status))
+				ndata->state = DLM_RECO_NODE_DATA_DEAD;
+			else {
+				destroy = 1;
+				goto leave;
+			}
 		}
 
 		switch (ndata->state) {
@@ -608,6 +613,8 @@
 	ret = DLM_NOLOCKMGR;
 	ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
 				 &lr, sizeof(lr), request_from, NULL);
+
+	/* negative status is handled by caller */
 	if (ret < 0)
 		mlog_errno(ret);
 
@@ -719,6 +726,7 @@
 
 	ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
 				 sizeof(done_msg), send_to, &tmpret);
+	/* negative status is ignored by the caller */
 	if (ret >= 0)
 		ret = tmpret;
 	return ret;
@@ -857,6 +865,8 @@
 	ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
 				 sz, send_to, &status);
 	if (ret < 0) {
+		/* XXX: negative status is not handled.
+		 * this will end up killing this node. */
 		mlog_errno(ret);
 	} else {
 		/* might get an -ENOMEM back here */
@@ -1244,6 +1254,9 @@
 	spin_unlock(&dlm->spinlock);
 
 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+		/* do not send to self */
+		if (nodenum == dlm->node_num)
+			continue;
 		ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -1274,6 +1287,7 @@
 
 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
 				 &req, sizeof(req), nodenum, &status);
+	/* XXX: negative status not handled properly here. */
 	if (ret < 0)
 		mlog_errno(ret);
 	else {
@@ -1989,6 +2003,7 @@
 			  nodenum);
 		ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
 					 &br, sizeof(br), nodenum, &status);
+		/* negative status is handled ok by caller here */
 		if (ret >= 0)
 			ret = status;
 		if (ret < 0) {
@@ -2073,8 +2088,17 @@
 			continue;
 		ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
 					 &fr, sizeof(fr), nodenum, &status);
-		if (ret >= 0)
+		if (ret >= 0) {
 			ret = status;
+			if (dlm_is_host_down(ret)) {
+				/* this has no effect on this recovery 
+				 * session, so set the status to zero to 
+				 * finish out the last recovery */
+				mlog(ML_ERROR, "node %u went down after this "
+				     "node finished recovery.\n", nodenum);
+				ret = 0;
+			}
+		}
 		if (ret < 0) {
 			mlog_errno(ret);
 			break;



More information about the Ocfs2-commits mailing list