<div dir="ltr">I see no need for a separate function. Just do....<br><div><br> } else if (res-&gt;owner == DLM_LOCK_RES_OWNER_UNKNOWN) {<br>    if (test_bit(node, res-&gt;refmap))<br>          dlm_lockres_clear_refmap_bit(dlm, res, node);<br>
}<br><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Aug 1, 2013 at 5:05 AM, Xue jiufei <span dir="ltr">&lt;<a href="mailto:xuejiufei@huawei.com" target="_blank">xuejiufei@huawei.com</a>&gt;</span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Function dlm_do_local_recovery_cleanup() should force clean refmap if<br>
the owner of lockres is UNKNOWN. Otherwise node may hang when umounting<br>
filesystems.<br>
Here&#39;s the situation:<br>
<br>
        Node1                                    Node2<br>
dlmlock()<br>
  -&gt; dlm_get_lock_resource()<br>
send DLM_MASTER_REQUEST_MSG to<br>
other nodes.<br>
<br>
                                       trying to master this lockres,<br>
                                       return MAYBE.<br>
<br>
selected as the master of lockresA,<br>
set mle-&gt;master to Node1,<br>
and do assert_master,<br>
send DLM_ASSERT_MASTER_MSG to Node2.<br>
                                       Node 2 has interest on lockresA<br>
                                       and return<br>
                                       DLM_ASSERT_RESPONSE_MASTERY_REF<br>
                                       then something happened and<br>
                                       Node2 crashed.<br>
<br>
receiving DLM_ASSERT_RESPONSE_MASTERY_REF,<br>
set Node2 into refmap, and keep sending<br>
DLM_ASSERT_MASTER_MSG to other nodes<br>
<br>
o2hb found node2 down, calling<br>
dlm_hb_node_down()<br>
--&gt; dlm_do_local_recovery_cleanup()<br>
the master of lockresA is still UNKNOWN,<br>
no need to call dlm_free_dead_locks().<br>
<br>
set the master of lockresA to Node1, but<br>
Node2 stills remains in refmap.<br>
<br>
when Node1 umount, it found that the refmap of lockresA is not empty<br>
and attempted to migrate it to Node2, But Node2 is already down,<br>
so umount hang, trying to migrate lockresA again and again.<br>
<br>
Signed-off-by: joyce &lt;<a href="mailto:xuejiufei@huawei.com">xuejiufei@huawei.com</a>&gt;<br>
---<br>
 fs/ocfs2/dlm/dlmrecovery.c |   18 +++++++++++++++++-<br>
 1 file changed, 17 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c<br>
index 773bd32..7b4413d 100644<br>
--- a/fs/ocfs2/dlm/dlmrecovery.c<br>
+++ b/fs/ocfs2/dlm/dlmrecovery.c<br>
@@ -2191,6 +2191,21 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,<br>
        }<br>
 }<br>
<br>
+static void dlm_force_clean_refmap(struct dlm_ctxt *dlm,<br>
+               struct dlm_lock_resource *res, u16 dead_node)<br>
+{<br>
+       assert_spin_locked(&amp;dlm-&gt;spinlock);<br>
+       assert_spin_locked(&amp;res-&gt;spinlock);<br>
+<br>
+       if (test_bit(dead_node, res-&gt;refmap)) {<br>
+               mlog(0, &quot;%s:%.*s: dead node %u had a ref, but had &quot;<br>
+                               &quot;no locks and had not purged before dying\n&quot;,<br>
+                               dlm-&gt;name, res-&gt;lockname.len,<br>
+                               res-&gt;<a href="http://lockname.name" target="_blank">lockname.name</a>, dead_node);<br>
+               dlm_lockres_clear_refmap_bit(dlm, res, dead_node);<br>
+       }<br>
+}<br>
+<br>
 static void dlm_free_dead_locks(struct dlm_ctxt *dlm,<br>
                                struct dlm_lock_resource *res, u8 dead_node)<br>
 {<br>
@@ -2328,7 +2343,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)<br>
                        } else if (res-&gt;owner == dlm-&gt;node_num) {<br>
                                dlm_free_dead_locks(dlm, res, dead_node);<br>
                                __dlm_lockres_calc_usage(dlm, res);<br>
-                       }<br>
+                       } else if (res-&gt;owner == DLM_LOCK_RES_OWNER_UNKNOWN)<br>
+                               dlm_force_clean_refmap(dlm, res, dead_node);<br>
                        spin_unlock(&amp;res-&gt;spinlock);<br>
                }<br>
        }<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.9.7<br>
<br>
<br>
_______________________________________________<br>
Ocfs2-devel mailing list<br>
<a href="mailto:Ocfs2-devel@oss.oracle.com">Ocfs2-devel@oss.oracle.com</a><br>
<a href="https://oss.oracle.com/mailman/listinfo/ocfs2-devel" target="_blank">https://oss.oracle.com/mailman/listinfo/ocfs2-devel</a><br>
</font></span></blockquote></div><br></div>