[Ocfs2-commits] zab commits r2457 - trunk/fs/ocfs2/cluster

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Fri Jul 1 20:21:22 CDT 2005


Author: zab
Signed-off-by: mfasheh
Date: 2005-07-01 20:21:20 -0500 (Fri, 01 Jul 2005)
New Revision: 2457

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
   trunk/fs/ocfs2/cluster/quorum.c
Log:
fix mount failure immediately after unmount as described in bug 510.

o o2quo_conn_err() sure can be called before the conn is ever brought up.
  replace a bogus assertion with the right precautions.
o rapid restarting of hb was overwriting an explicit down leaving other nodes
  to think that a node has stayed up.  randomize the generation on each region
  start so that other nodes see the generation change in this case. 

Signed-off-by: mfasheh


Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-07-02 00:18:30 UTC (rev 2456)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-07-02 01:21:20 UTC (rev 2457)
@@ -55,7 +55,6 @@
 static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
 static LIST_HEAD(o2hb_node_events);
 static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
-static u64 o2hb_generation;
 
 static LIST_HEAD(o2hb_all_regions);
 
@@ -117,6 +116,10 @@
 
 	unsigned int		hr_timeout_ms;
 	unsigned int		hr_dead_iter;
+
+	/* randomized as the region goes up and down so that a node
+	 * recognizes a node going up and down in one iteration */
+	u64			hr_generation;
 };
 
 struct o2hb_bio_wait_ctxt {
@@ -748,7 +751,7 @@
 		     "in our slot!\n", reg->hr_dev_name);
 
 	/* fill in the proper info for our next heartbeat */
-	o2hb_prepare_block(reg, o2hb_generation);
+	o2hb_prepare_block(reg, reg->hr_generation);
 
 	/* And fire off the write. Note that we don't wait on this I/O
 	 * until later. */
@@ -838,11 +841,6 @@
 	INIT_LIST_HEAD(&o2hb_node_events);
 
 	memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
-
-	o2hb_generation = 0;
-	/* Generation of zero is invalid */
-	while (!o2hb_generation)
-		get_random_bytes(&o2hb_generation, sizeof(o2hb_generation));
 }
 
 /* if we're already in a callback then we're already serialized by the sem */
@@ -1203,6 +1201,12 @@
 
 	o2hb_init_region_params(reg);
 
+	/* Generation of zero is invalid */
+	do {
+		get_random_bytes(&reg->hr_generation,
+				 sizeof(reg->hr_generation));
+	} while (reg->hr_generation == 0);
+
 	ret = o2hb_map_slot_data(reg);
 	if (ret) {
 		mlog_errno(ret);

Modified: trunk/fs/ocfs2/cluster/quorum.c
===================================================================
--- trunk/fs/ocfs2/cluster/quorum.c	2005-07-02 00:18:30 UTC (rev 2456)
+++ trunk/fs/ocfs2/cluster/quorum.c	2005-07-02 01:21:20 UTC (rev 2457)
@@ -266,13 +266,15 @@
 
 	spin_lock(&qs->qs_lock);
 
-	qs->qs_connected--;
-	mlog_bug_on_msg(qs->qs_connected < 0,
-			"node %u, connected %d\n",
-			node, qs->qs_connected);
-	mlog_bug_on_msg(!test_bit(node, qs->qs_conn_bm), "node %u\n", node);
-	clear_bit(node, qs->qs_conn_bm);
+	if (test_bit(node, qs->qs_conn_bm)) {
+		qs->qs_connected--;
+		mlog_bug_on_msg(qs->qs_connected < 0,
+				"node %u, connected %d\n",
+				node, qs->qs_connected);
 
+		clear_bit(node, qs->qs_conn_bm);
+	}
+
 	mlog(0, "node %u, %d total\n", node, qs->qs_connected);
 
 	if (test_bit(node, qs->qs_hb_bm))



More information about the Ocfs2-commits mailing list