[Ocfs2-commits] jlbec commits r2396 - in trunk/fs/ocfs2: cluster dlm

Wed Jun 15 18:25:05 CDT 2005

Author: jlbec
Signed-off-by: mfasheh
Date: 2005-06-15 18:25:03 -0500 (Wed, 15 Jun 2005)
New Revision: 2396

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
   trunk/fs/ocfs2/dlm/dlmdomain.c
   trunk/fs/ocfs2/dlm/dlmdomain.h
   trunk/fs/ocfs2/dlm/dlmrecovery.c
Log:

o Make sure writes are on disk before checking steady_iterations
o Steady state is LIVE_THRESHOLD+1, so as to make sure we catch anyone
  coming up at the same time.
o Reject join requests from nodes we don't consider live.
o Set JOINED at the same time we clear joining node, so we can properly
  advertise our state in query_join_handler().
o Launch the recovery thread early, only doing work after JOINED.

Signed-off-by: mfasheh



Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================

--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-06-15 22:18:12 UTC (rev 2395)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-06-15 23:25:03 UTC (rev 2396)
@@ -675,15 +675,19 @@
 		change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
 	}
 
+	/*
+	 * We have to be sure we've advertised ourselves on disk
+	 * before we can go to steady state.  This ensures that
+	 * people we find in our steady state have seen us.
+	 */
+	o2hb_wait_on_io(reg, &write_wc);
+	bio_put(write_bio);
+
 	/* let the person who launched us know when things are steady */
 	if (!change && (atomic_read(&reg->hr_steady_iterations) != 0)) {
 		if (atomic_dec_and_test(&reg->hr_steady_iterations))
 			wake_up(&o2hb_steady_queue);
 	}
-
-	/* Make sure the write hits disk before we return. */
-	o2hb_wait_on_io(reg, &write_wc);
-	bio_put(write_bio);
 }
 
 /*
@@ -1102,7 +1106,12 @@
 		goto out;
 	}
 
-	atomic_set(&reg->hr_steady_iterations, 2);
+	/*
+	 * A node is considered live after it has beat LIVE_THRESHOLD
+	 * times.  We're not steady until we've given them a chance
+	 * _after_ our first read.
+	 */
+	atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1);
 
 	reg->hr_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
 				   reg->hr_item.ci_name);

Modified: trunk/fs/ocfs2/dlm/dlmdomain.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmdomain.c	2005-06-15 22:18:12 UTC (rev 2395)
+++ trunk/fs/ocfs2/dlm/dlmdomain.c	2005-06-15 23:25:03 UTC (rev 2396)
@@ -482,6 +482,20 @@
 	spin_unlock(&dlm->spinlock);
 }
 
+int dlm_joined(dlm_ctxt *dlm)
+{
+	int ret = 0;
+
+	spin_lock(&dlm_domain_lock);
+
+	if (dlm->dlm_state == DLM_CTXT_JOINED)
+		ret = 1;
+
+	spin_unlock(&dlm_domain_lock);
+
+	return ret;
+}
+
 int dlm_shutting_down(dlm_ctxt *dlm)
 {
 	int ret = 0;
@@ -537,6 +551,7 @@
 	dlm_query_join_request *query;
 	enum dlm_query_join_response response;
 	dlm_ctxt *dlm = NULL;
+	unsigned long hb_node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
 
 	query = (dlm_query_join_request *) msg->buf;
 	dlm_query_join_request_to_host(query);
@@ -544,6 +559,20 @@
 	mlog(0, "node %u wants to join domain %s\n", query->node_idx,
 		  query->domain);
 
+	/* 
+	 * If heartbeat doesn't consider the node live, tell it
+	 * to back off and try again.  This gives heartbeat a chance
+	 * to catch up.
+	 */
+	o2hb_fill_node_map(hb_node_map, sizeof(hb_node_map));
+	if (!test_bit(query->node_idx, hb_node_map)) {
+		mlog(0, "node %u is not in our live map yet\n",
+		     query->node_idx);
+
+		response = JOIN_DISALLOW;
+		goto respond;
+	}
+
 	response = JOIN_OK_NO_MAP;
 
 	spin_lock(&dlm_domain_lock);
@@ -575,6 +604,7 @@
 	}
 	spin_unlock(&dlm_domain_lock);
 
+respond:
 	mlog(0, "We respond with %u\n", response);
 
 	return response;
@@ -906,6 +936,15 @@
 
 	dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
 
+	/* Joined state *must* be set before the joining node
+	 * information, otherwise the query_join handler may read no
+	 * current joiner but a state of NEW and tell joining nodes
+	 * we're not in the domain. */
+	spin_lock(&dlm_domain_lock);
+	dlm->dlm_state = DLM_CTXT_JOINED;
+	dlm->num_joins++;
+	spin_unlock(&dlm_domain_lock);
+
 bail:
 	spin_lock(&dlm->spinlock);
 	__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
@@ -1079,6 +1118,12 @@
 		goto bail;
 	}
 
+	status = dlm_launch_recovery_thread(dlm);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
 	do {
 		unsigned int backoff;
 		status = dlm_try_to_join_domain(dlm);
@@ -1111,17 +1156,6 @@
 		goto bail;
 	}
 
-	status = dlm_launch_recovery_thread(dlm);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail;
-	}
-
-	spin_lock(&dlm_domain_lock);
-	dlm->num_joins++;
-	dlm->dlm_state = DLM_CTXT_JOINED;
-	spin_unlock(&dlm_domain_lock);
-
 	status = 0;
 bail:
 	wake_up(&dlm_domain_events);
@@ -1129,6 +1163,7 @@
 	if (status) {
 		dlm_unregister_domain_handlers(dlm);
 		dlm_complete_thread(dlm);
+		dlm_complete_recovery_thread(dlm);
 	}
 
 	return status;
@@ -1239,7 +1274,7 @@
 	}
 
 	if (!o2hb_check_local_node_heartbeating()) {
-		mlog(ML_ERROR, "the local node has not ben configured, or is "
+		mlog(ML_ERROR, "the local node has not been configured, or is "
 		     "not heartbeating\n");
 		goto leave;
 	}

Modified: trunk/fs/ocfs2/dlm/dlmdomain.h
===================================================================
--- trunk/fs/ocfs2/dlm/dlmdomain.h	2005-06-15 22:18:12 UTC (rev 2395)
+++ trunk/fs/ocfs2/dlm/dlmdomain.h	2005-06-15 23:25:03 UTC (rev 2396)
@@ -29,6 +29,7 @@
 extern struct list_head dlm_domains;
 
 dlm_ctxt * __dlm_lookup_domain(const char *domain);
+int dlm_joined(dlm_ctxt *dlm);
 int dlm_shutting_down(dlm_ctxt *dlm);
 
 #endif

Modified: trunk/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-06-15 22:18:12 UTC (rev 2395)
+++ trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-06-15 23:25:03 UTC (rev 2396)
@@ -47,6 +47,7 @@
 
 #include "dlmapi.h"
 #include "dlmcommon.h"
+#include "dlmdomain.h"
 
 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_RECOVERY)
 #include "cluster/masklog.h"
@@ -230,13 +231,15 @@
 	mlog(0, "dlm thread running for %s...\n", dlm->name);
 
 	while (!kthread_should_stop()) {
-		status = dlm_do_recovery(dlm);
-		if (status == -EAGAIN) {
-			/* do not sleep, recheck immediately. */
-			continue;
+		if (dlm_joined(dlm)) {
+			status = dlm_do_recovery(dlm);
+			if (status == -EAGAIN) {
+				/* do not sleep, recheck immediately. */
+				continue;
+			}
+			if (status < 0)
+				mlog_errno(status);
 		}
-		if (status < 0)
-			mlog_errno(status);
 
 		wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq,
 						 kthread_should_stop(),