[Ocfs2-commits] mfasheh commits r2475 - trunk/fs/ocfs2/cluster

Mon Jul 25 17:54:06 CDT 2005

Author: mfasheh
Signed-off-by: zab
Date: 2005-07-25 17:54:05 -0500 (Mon, 25 Jul 2005)
New Revision: 2475

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
   trunk/fs/ocfs2/cluster/heartbeat.h
   trunk/fs/ocfs2/cluster/quorum.c
   trunk/fs/ocfs2/cluster/quorum.h
Log:
* Teach heartbeat how to call into the quorum / fencing code when it times out
  writing to a region.

Signed-off-by: zab



Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================

--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-07-25 22:54:05 UTC (rev 2475)
@@ -34,6 +34,7 @@
 #include "heartbeat.h"
 #include "tcp.h"
 #include "nodemanager.h"
+#include "quorum.h"
 
 #include "masklog.h"
 
@@ -120,6 +121,8 @@
 	/* randomized as the region goes up and down so that a node
 	 * recognizes a node going up and down in one iteration */
 	u64			hr_generation;
+
+	struct work_struct	hr_write_timeout_work;
 };
 
 struct o2hb_bio_wait_ctxt {
@@ -127,6 +130,31 @@
 	struct completion wc_io_complete;
 };
 
+static void o2hb_write_timeout(void *arg)
+{
+	struct o2hb_region *reg = arg;
+
+	mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
+	     "milliseconds\n", reg->hr_dev_name, O2HB_MAX_WRITE_TIMEOUT_MS);
+
+	o2quo_disk_timeout();
+}
+
+static void o2hb_arm_write_timeout(struct o2hb_region *reg)
+{
+	mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS);
+
+	cancel_delayed_work(&reg->hr_write_timeout_work);
+	schedule_delayed_work(&reg->hr_write_timeout_work,
+			      O2HB_MAX_WRITE_TIMEOUT_MS);
+}
+
+static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
+{
+	cancel_delayed_work(&reg->hr_write_timeout_work);
+	flush_scheduled_work();
+}
+
 static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
 				      unsigned int num_ios)
 {
@@ -774,6 +802,7 @@
 	 */
 	o2hb_wait_on_io(reg, &write_wc);
 	bio_put(write_bio);
+	o2hb_arm_write_timeout(reg);
 
 	/* let the person who launched us know when things are steady */
 	if (!change && (atomic_read(&reg->hr_steady_iterations) != 0)) {
@@ -804,6 +833,8 @@
 		msleep_interruptible(reg->hr_timeout_ms);
 	}
 
+	o2hb_disarm_write_timeout(reg);
+
 	/* unclean stop is only used in very bad situation */
 	for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++)
 		o2hb_shutdown_slot(&reg->hr_slots[i]);
@@ -1219,6 +1250,8 @@
 		goto out;
 	}
 
+	INIT_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout, reg);
+
 	/*
 	 * A node is considered live after it has beat LIVE_THRESHOLD
 	 * times.  We're not steady until we've given them a chance

Modified: trunk/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.h	2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/heartbeat.h	2005-07-25 22:54:05 UTC (rev 2475)
@@ -36,6 +36,8 @@
 /* number of equal samples to be seen as dead */
 #define O2HB_DEAD_THRESHOLD	   7
 
+#define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (O2HB_DEAD_THRESHOLD - 1))
+
 #define O2HB_CB_MAGIC		0x51d1e4ec
 
 /* callback stuff */

Modified: trunk/fs/ocfs2/cluster/quorum.c
===================================================================
--- trunk/fs/ocfs2/cluster/quorum.c	2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/quorum.c	2005-07-25 22:54:05 UTC (rev 2475)
@@ -64,6 +64,29 @@
 	unsigned long		qs_hold_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
 } o2quo_state;
 
+/* this is horribly heavy-handed.  It should instead flip the file
+ * system RO and call some userspace script. */
+static void o2quo_fence_self(void)
+{
+	/* panic spins with interrupts enabled.  with preempt
+	 * threads can still schedule, etc, etc */
+	o2hb_stop_all_regions();
+	panic("ocfs2 is very sorry to be fencing this system by panicing\n");
+}
+
+/* Indicate that a timeout occured on a hearbeat region write. The
+ * other nodes in the cluster may consider us dead at that time so we
+ * want to "fence" ourselves so that we don't scribble on the disk
+ * after they think they've recovered us. This can't solve all
+ * problems related to writeout after recovery but this hack can at
+ * least close some of those gaps. When we have real fencing, this can
+ * go away as our node would be fenced externally before other nodes
+ * begin recovery. */
+void o2quo_disk_timeout(void)
+{
+	o2quo_fence_self();
+}
+
 static void o2quo_make_decision(void *arg)
 {
 	int quorum;
@@ -122,15 +145,8 @@
 
 out:
 	spin_unlock(&qs->qs_lock);
-	/* this is horribly heavy-handed.  It should instead flip the file
-	 * system RO and call some userspace script */
-	if (fence) {
-		/* panic spins with interrupts enabled.  with preempt
-		 * threads can still schedule, etc, etc */
-		o2hb_stop_all_regions();
-		panic("ocfs2 is very sorry to be fencing this system by "
-		      "panicing\n");
-	}
+	if (fence)
+		o2quo_fence_self();
 }
 
 static void o2quo_set_hold(struct o2quo_state *qs, u8 node)

Modified: trunk/fs/ocfs2/cluster/quorum.h
===================================================================
--- trunk/fs/ocfs2/cluster/quorum.h	2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/quorum.h	2005-07-25 22:54:05 UTC (rev 2475)
@@ -31,5 +31,6 @@
 void o2quo_hb_still_up(u8 node);
 void o2quo_conn_up(u8 node);
 void o2quo_conn_err(u8 node);
+void o2quo_disk_timeout(void);
 
 #endif /* O2CLUSTER_QUORUM_H */