[Ocfs2-commits] khackel commits r2720 - branches/ocfs2-1.0/fs/ocfs2/dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Dec 7 02:00:32 CST 2005


Author: khackel
Signed-off-by: mfasheh
Date: 2005-12-07 02:00:30 -0600 (Wed, 07 Dec 2005)
New Revision: 2720

Modified:
   branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c
   branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c
   branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c
Log:
* Merge r2714:2718 from trunk
        - 2718: fixes Bug 577 - System crash at dlm_reco_data_done_handler
        - 2717: grab a reference to each lockres to avoid null pointer deref
        - 2716: fix bug 572, hang on umount
        - 2715: fixes crash in debug mle printing code
        - 2714: fixes bug621, do not send message to self to avoid -ELOOP

Signed-off-by: mfasheh



Modified: branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c
===================================================================
--- branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c	2005-12-07 07:53:57 UTC (rev 2719)
+++ branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c	2005-12-07 08:00:30 UTC (rev 2720)
@@ -314,6 +314,8 @@
 		while (!list_empty(&dlm->resources[i])) {
 			res = list_entry(dlm->resources[i].next,
 				     struct dlm_lock_resource, list);
+			/* need reference when manually grabbing lockres */
+			dlm_lockres_get(res);
 			/* this should unhash the lockres
 			 * and exit with dlm->spinlock */
 			mlog(0, "purging res=%p\n", res);
@@ -327,9 +329,11 @@
 				spin_unlock(&dlm->spinlock);
 				dlm_kick_thread(dlm, res);
 				wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
+				dlm_lockres_put(res);
 				goto restart;
 			}
 			dlm_purge_lockres(dlm, res);
+			dlm_lockres_put(res);
 		}
 	}
 	spin_unlock(&dlm->spinlock);

Modified: branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c	2005-12-07 07:53:57 UTC (rev 2719)
+++ branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c	2005-12-07 08:00:30 UTC (rev 2720)
@@ -64,12 +64,17 @@
 	struct kref *k;
 
 	k = &mle->mle_refs;
-	type = (mle->type == DLM_MLE_BLOCK ? "BLK" : "MAS");
+	if (mle->type == DLM_MLE_BLOCK)
+		type = "BLK";
+	else if (mle->type == DLM_MLE_MASTER)
+		type = "MAS";
+	else
+		type = "MIG";
 	refs = atomic_read(&k->refcount);
 	master = mle->master;
 	attached = (list_empty(&mle->hb_events) ? 'N' : 'Y');
 
-	if (mle->type == DLM_MLE_BLOCK) {
+	if (mle->type != DLM_MLE_MASTER) {
 		namelen = mle->u.name.len;
 		name = mle->u.name.name;
 	} else {
@@ -402,7 +407,7 @@
 	mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
 	dlm = mle->dlm;
 
-	if (mle->type == DLM_MLE_BLOCK) {
+	if (mle->type != DLM_MLE_MASTER) {
 		mlog(0, "calling mle_release for %.*s, type %d\n",
 		     mle->u.name.len, mle->u.name.name, mle->type);
 	} else {
@@ -676,6 +681,7 @@
 		/* make sure this does not get freed below */
 		alloc_mle = NULL;
 		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
+		set_bit(dlm->node_num, mle->maybe_map);
 		list_add(&mle->list, &dlm->master_list);
 	}
 
@@ -788,8 +794,16 @@
 
 	/* restart if we hit any errors */
 	if (map_changed) {
+		int b;
 		mlog(0, "node map changed, restarting\n");
 		ret = dlm_restart_lock_mastery(dlm, res, mle, blocked);
+		b = (mle->type == DLM_MLE_BLOCK);
+		if ((blocked && !b) || (!blocked && b)) {
+			mlog(0, "%s:%.*s: status change: old=%d new=%d\n", 
+			     dlm->name, res->lockname.len, res->lockname.name,
+			     blocked, b);
+			blocked = b;
+		}
 		spin_unlock(&mle->spinlock);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -1020,6 +1034,7 @@
 				memcpy(mle->vote_map, mle->node_map,
 				       sizeof(mle->node_map));
 				mle->u.res = res;
+				set_bit(dlm->node_num, mle->maybe_map);
 
 				ret = -EAGAIN;
 				goto next;
@@ -1064,7 +1079,7 @@
 
 	BUG_ON(mle->type == DLM_MLE_MIGRATION);
 
-	if (mle->type == DLM_MLE_BLOCK) {
+	if (mle->type != DLM_MLE_MASTER) {
 		request.namelen = mle->u.name.len;
 		memcpy(request.name, mle->u.name.name, request.namelen);
 	} else {

Modified: branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c	2005-12-07 07:53:57 UTC (rev 2719)
+++ branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c	2005-12-07 08:00:30 UTC (rev 2720)
@@ -411,8 +411,13 @@
 
 		status = dlm_request_all_locks(dlm, ndata->node_num, dead_node);
 		if (status < 0) {
-			destroy = 1;
-			goto leave;
+			mlog_errno(status);
+			if (dlm_is_host_down(status))
+				ndata->state = DLM_RECO_NODE_DATA_DEAD;
+			else {
+				destroy = 1;
+				goto leave;
+			}
 		}
 
 		switch (ndata->state) {
@@ -609,6 +614,8 @@
 	dlm_lock_request_to_net(&lr);
 	ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
 				 &lr, sizeof(lr), request_from, NULL);
+
+	/* negative status is handled by caller */
 	if (ret < 0)
 		mlog_errno(ret);
 
@@ -722,6 +729,7 @@
 
 	ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
 				 sizeof(done_msg), send_to, &tmpret);
+	/* negative status is ignored by the caller */
 	if (ret >= 0)
 		ret = tmpret;
 	return ret;
@@ -864,6 +872,8 @@
 	ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
 				 sz, send_to, &status);
 	if (ret < 0) {
+		/* XXX: negative status is not handled.
+		 * this will end up killing this node. */
 		mlog_errno(ret);
 	} else {
 		/* might get an -ENOMEM back here */
@@ -1253,6 +1263,9 @@
 	spin_unlock(&dlm->spinlock);
 
 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+		/* do not send to self */
+		if (nodenum == dlm->node_num)
+			continue;
 		ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -1284,6 +1297,7 @@
 	dlm_master_requery_to_net(&req);
 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
 				 &req, sizeof(req), nodenum, &status);
+	/* XXX: negative status not handled properly here. */
 	if (ret < 0)
 		mlog_errno(ret);
 	else {
@@ -2013,6 +2027,7 @@
 			  nodenum);
 		ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
 					 &br, sizeof(br), nodenum, &status);
+		/* negative status is handled ok by caller here */
 		if (ret >= 0)
 			ret = status;
 		if (ret < 0) {
@@ -2100,8 +2115,17 @@
 			continue;
 		ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
 					 &fr, sizeof(fr), nodenum, &status);
-		if (ret >= 0)
+		if (ret >= 0) {
 			ret = status;
+			if (dlm_is_host_down(ret)) {
+				/* this has no effect on this recovery 
+				 * session, so set the status to zero to 
+				 * finish out the last recovery */
+				mlog(ML_ERROR, "node %u went down after this "
+				     "node finished recovery.\n", nodenum);
+				ret = 0;
+			}
+		}
 		if (ret < 0) {
 			mlog_errno(ret);
 			break;



More information about the Ocfs2-commits mailing list