On Thu, May 24, 2012 at 10:53 PM,  <span dir="ltr">&lt;<a href="mailto:xiaowei.hu@oracle.com" target="_blank">xiaowei.hu@oracle.com</a>&gt;</span> wrote:<br><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c<br>
index 01ebfd0..62659e8 100644<br>
--- a/fs/ocfs2/dlm/dlmrecovery.c<br>
+++ b/fs/ocfs2/dlm/dlmrecovery.c<br>
@@ -555,6 +555,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)<br>
        int all_nodes_done;<br>
        int destroy = 0;<br>
        int pass = 0;<br>
+       int dying = 0;<br>
<br>
        do {<br>
                /* we have become recovery master.  there is no escaping<br>
@@ -659,6 +660,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)<br>
                list_for_each_entry(ndata, &amp;dlm-&gt;reco.node_data, list) {<br>
                        mlog(0, &quot;checking recovery state of node %u\n&quot;,<br>
                             ndata-&gt;node_num);<br>
+                       dying = 0;<br>
                        switch (ndata-&gt;state) {<br>
                                case DLM_RECO_NODE_DATA_INIT:<br>
                                case DLM_RECO_NODE_DATA_REQUESTING:<br>
@@ -679,6 +681,13 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)<br>
                                             dlm-&gt;name, ndata-&gt;node_num,<br>
                                             ndata-&gt;state==DLM_RECO_NODE_DATA_RECEIVING ?<br>
                                             &quot;receiving&quot; : &quot;requested&quot;);<br>
+                                       spin_lock(&amp;dlm-&gt;spinlock);<br>
+                                       dying = !test_bit(ndata-&gt;node_num, dlm-&gt;live_nodes_map);<br>
+                                       spin_unlock(&amp;dlm-&gt;spinlock);<br>
+                                       if (dying) {<br>
+                                               ndata-&gt;state = DLM_RECO_NODE_DATA_DEAD;<br>
+                                               break;<br>
+                                       }<br></blockquote><div><br><br><br><br>I would suggest exploring adding this in dlm hb down event. Checking live map all<br>over the place is hacky. We do it more than we should right now. Let&#39;s not add to the<br>
mess.<br><br><br><br> </div><blockquote class="gmail_quote" style="margin:0pt 0pt 0pt 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
                                        all_nodes_done = 0;<br>
                                        break;<br>
                                case DLM_RECO_NODE_DATA_DONE:<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.7.6<br>
<br>
<br>
_______________________________________________<br>
Ocfs2-devel mailing list<br>
<a href="mailto:Ocfs2-devel@oss.oracle.com">Ocfs2-devel@oss.oracle.com</a><br>
<a href="http://oss.oracle.com/mailman/listinfo/ocfs2-devel" target="_blank">http://oss.oracle.com/mailman/listinfo/ocfs2-devel</a><br>
</font></span></blockquote></div><br>