<div dir="ltr">I see no need for a separate function. Just do....<br><div><br> } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {<br> if (test_bit(node, res->refmap))<br> dlm_lockres_clear_refmap_bit(dlm, res, node);<br>
}<br><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Aug 1, 2013 at 5:05 AM, Xue jiufei <span dir="ltr"><<a href="mailto:xuejiufei@huawei.com" target="_blank">xuejiufei@huawei.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Function dlm_do_local_recovery_cleanup() should force clean refmap if<br>
the owner of lockres is UNKNOWN. Otherwise node may hang when umounting<br>
filesystems.<br>
Here's the situation:<br>
<br>
Node1 Node2<br>
dlmlock()<br>
-> dlm_get_lock_resource()<br>
send DLM_MASTER_REQUEST_MSG to<br>
other nodes.<br>
<br>
trying to master this lockres,<br>
return MAYBE.<br>
<br>
selected as the master of lockresA,<br>
set mle->master to Node1,<br>
and do assert_master,<br>
send DLM_ASSERT_MASTER_MSG to Node2.<br>
Node 2 has interest on lockresA<br>
and return<br>
DLM_ASSERT_RESPONSE_MASTERY_REF<br>
then something happened and<br>
Node2 crashed.<br>
<br>
receiving DLM_ASSERT_RESPONSE_MASTERY_REF,<br>
set Node2 into refmap, and keep sending<br>
DLM_ASSERT_MASTER_MSG to other nodes<br>
<br>
o2hb found node2 down, calling<br>
dlm_hb_node_down()<br>
--> dlm_do_local_recovery_cleanup()<br>
the master of lockresA is still UNKNOWN,<br>
no need to call dlm_free_dead_locks().<br>
<br>
set the master of lockresA to Node1, but<br>
Node2 stills remains in refmap.<br>
<br>
when Node1 umount, it found that the refmap of lockresA is not empty<br>
and attempted to migrate it to Node2, But Node2 is already down,<br>
so umount hang, trying to migrate lockresA again and again.<br>
<br>
Signed-off-by: joyce <<a href="mailto:xuejiufei@huawei.com">xuejiufei@huawei.com</a>><br>
---<br>
fs/ocfs2/dlm/dlmrecovery.c | 18 +++++++++++++++++-<br>
1 file changed, 17 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c<br>
index 773bd32..7b4413d 100644<br>
--- a/fs/ocfs2/dlm/dlmrecovery.c<br>
+++ b/fs/ocfs2/dlm/dlmrecovery.c<br>
@@ -2191,6 +2191,21 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,<br>
}<br>
}<br>
<br>
+static void dlm_force_clean_refmap(struct dlm_ctxt *dlm,<br>
+ struct dlm_lock_resource *res, u16 dead_node)<br>
+{<br>
+ assert_spin_locked(&dlm->spinlock);<br>
+ assert_spin_locked(&res->spinlock);<br>
+<br>
+ if (test_bit(dead_node, res->refmap)) {<br>
+ mlog(0, "%s:%.*s: dead node %u had a ref, but had "<br>
+ "no locks and had not purged before dying\n",<br>
+ dlm->name, res->lockname.len,<br>
+ res-><a href="http://lockname.name" target="_blank">lockname.name</a>, dead_node);<br>
+ dlm_lockres_clear_refmap_bit(dlm, res, dead_node);<br>
+ }<br>
+}<br>
+<br>
static void dlm_free_dead_locks(struct dlm_ctxt *dlm,<br>
struct dlm_lock_resource *res, u8 dead_node)<br>
{<br>
@@ -2328,7 +2343,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)<br>
} else if (res->owner == dlm->node_num) {<br>
dlm_free_dead_locks(dlm, res, dead_node);<br>
__dlm_lockres_calc_usage(dlm, res);<br>
- }<br>
+ } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN)<br>
+ dlm_force_clean_refmap(dlm, res, dead_node);<br>
spin_unlock(&res->spinlock);<br>
}<br>
}<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.9.7<br>
<br>
<br>
_______________________________________________<br>
Ocfs2-devel mailing list<br>
<a href="mailto:Ocfs2-devel@oss.oracle.com">Ocfs2-devel@oss.oracle.com</a><br>
<a href="https://oss.oracle.com/mailman/listinfo/ocfs2-devel" target="_blank">https://oss.oracle.com/mailman/listinfo/ocfs2-devel</a><br>
</font></span></blockquote></div><br></div>