[Ocfs2-commits] khackel commits r2720 -
branches/ocfs2-1.0/fs/ocfs2/dlm
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Wed Dec 7 02:00:32 CST 2005
Author: khackel
Signed-off-by: mfasheh
Date: 2005-12-07 02:00:30 -0600 (Wed, 07 Dec 2005)
New Revision: 2720
Modified:
branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c
branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c
branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c
Log:
* Merge r2714:2718 from trunk
- 2718: fixes Bug 577 - System crash at dlm_reco_data_done_handler
- 2717: grab a reference to each lockres to avoid null pointer deref
- 2716: fix bug 572, hang on umount
- 2715: fixes crash in debug mle printing code
- 2714: fixes bug621, do not send message to self to avoid -ELOOP
Signed-off-by: mfasheh
Modified: branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c
===================================================================
--- branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c 2005-12-07 07:53:57 UTC (rev 2719)
+++ branches/ocfs2-1.0/fs/ocfs2/dlm/dlmdomain.c 2005-12-07 08:00:30 UTC (rev 2720)
@@ -314,6 +314,8 @@
while (!list_empty(&dlm->resources[i])) {
res = list_entry(dlm->resources[i].next,
struct dlm_lock_resource, list);
+ /* need reference when manually grabbing lockres */
+ dlm_lockres_get(res);
/* this should unhash the lockres
* and exit with dlm->spinlock */
mlog(0, "purging res=%p\n", res);
@@ -327,9 +329,11 @@
spin_unlock(&dlm->spinlock);
dlm_kick_thread(dlm, res);
wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
+ dlm_lockres_put(res);
goto restart;
}
dlm_purge_lockres(dlm, res);
+ dlm_lockres_put(res);
}
}
spin_unlock(&dlm->spinlock);
Modified: branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c 2005-12-07 07:53:57 UTC (rev 2719)
+++ branches/ocfs2-1.0/fs/ocfs2/dlm/dlmmaster.c 2005-12-07 08:00:30 UTC (rev 2720)
@@ -64,12 +64,17 @@
struct kref *k;
k = &mle->mle_refs;
- type = (mle->type == DLM_MLE_BLOCK ? "BLK" : "MAS");
+ if (mle->type == DLM_MLE_BLOCK)
+ type = "BLK";
+ else if (mle->type == DLM_MLE_MASTER)
+ type = "MAS";
+ else
+ type = "MIG";
refs = atomic_read(&k->refcount);
master = mle->master;
attached = (list_empty(&mle->hb_events) ? 'N' : 'Y');
- if (mle->type == DLM_MLE_BLOCK) {
+ if (mle->type != DLM_MLE_MASTER) {
namelen = mle->u.name.len;
name = mle->u.name.name;
} else {
@@ -402,7 +407,7 @@
mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
dlm = mle->dlm;
- if (mle->type == DLM_MLE_BLOCK) {
+ if (mle->type != DLM_MLE_MASTER) {
mlog(0, "calling mle_release for %.*s, type %d\n",
mle->u.name.len, mle->u.name.name, mle->type);
} else {
@@ -676,6 +681,7 @@
/* make sure this does not get freed below */
alloc_mle = NULL;
dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
+ set_bit(dlm->node_num, mle->maybe_map);
list_add(&mle->list, &dlm->master_list);
}
@@ -788,8 +794,16 @@
/* restart if we hit any errors */
if (map_changed) {
+ int b;
mlog(0, "node map changed, restarting\n");
ret = dlm_restart_lock_mastery(dlm, res, mle, blocked);
+ b = (mle->type == DLM_MLE_BLOCK);
+ if ((blocked && !b) || (!blocked && b)) {
+ mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ blocked, b);
+ blocked = b;
+ }
spin_unlock(&mle->spinlock);
if (ret < 0) {
mlog_errno(ret);
@@ -1020,6 +1034,7 @@
memcpy(mle->vote_map, mle->node_map,
sizeof(mle->node_map));
mle->u.res = res;
+ set_bit(dlm->node_num, mle->maybe_map);
ret = -EAGAIN;
goto next;
@@ -1064,7 +1079,7 @@
BUG_ON(mle->type == DLM_MLE_MIGRATION);
- if (mle->type == DLM_MLE_BLOCK) {
+ if (mle->type != DLM_MLE_MASTER) {
request.namelen = mle->u.name.len;
memcpy(request.name, mle->u.name.name, request.namelen);
} else {
Modified: branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c 2005-12-07 07:53:57 UTC (rev 2719)
+++ branches/ocfs2-1.0/fs/ocfs2/dlm/dlmrecovery.c 2005-12-07 08:00:30 UTC (rev 2720)
@@ -411,8 +411,13 @@
status = dlm_request_all_locks(dlm, ndata->node_num, dead_node);
if (status < 0) {
- destroy = 1;
- goto leave;
+ mlog_errno(status);
+ if (dlm_is_host_down(status))
+ ndata->state = DLM_RECO_NODE_DATA_DEAD;
+ else {
+ destroy = 1;
+ goto leave;
+ }
}
switch (ndata->state) {
@@ -609,6 +614,8 @@
dlm_lock_request_to_net(&lr);
ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
&lr, sizeof(lr), request_from, NULL);
+
+ /* negative status is handled by caller */
if (ret < 0)
mlog_errno(ret);
@@ -722,6 +729,7 @@
ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
sizeof(done_msg), send_to, &tmpret);
+ /* negative status is ignored by the caller */
if (ret >= 0)
ret = tmpret;
return ret;
@@ -864,6 +872,8 @@
ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
sz, send_to, &status);
if (ret < 0) {
+ /* XXX: negative status is not handled.
+ * this will end up killing this node. */
mlog_errno(ret);
} else {
/* might get an -ENOMEM back here */
@@ -1253,6 +1263,9 @@
spin_unlock(&dlm->spinlock);
while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+ /* do not send to self */
+ if (nodenum == dlm->node_num)
+ continue;
ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
if (ret < 0) {
mlog_errno(ret);
@@ -1284,6 +1297,7 @@
dlm_master_requery_to_net(&req);
ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
&req, sizeof(req), nodenum, &status);
+ /* XXX: negative status not handled properly here. */
if (ret < 0)
mlog_errno(ret);
else {
@@ -2013,6 +2027,7 @@
nodenum);
ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
&br, sizeof(br), nodenum, &status);
+ /* negative status is handled ok by caller here */
if (ret >= 0)
ret = status;
if (ret < 0) {
@@ -2100,8 +2115,17 @@
continue;
ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
&fr, sizeof(fr), nodenum, &status);
- if (ret >= 0)
+ if (ret >= 0) {
ret = status;
+ if (dlm_is_host_down(ret)) {
+ /* this has no effect on this recovery
+ * session, so set the status to zero to
+ * finish out the last recovery */
+ mlog(ML_ERROR, "node %u went down after this "
+ "node finished recovery.\n", nodenum);
+ ret = 0;
+ }
+ }
if (ret < 0) {
mlog_errno(ret);
break;
More information about the Ocfs2-commits
mailing list