[Ocfs2-commits] mfasheh commits r2475 - trunk/fs/ocfs2/cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Mon Jul 25 17:54:06 CDT 2005
Author: mfasheh
Signed-off-by: zab
Date: 2005-07-25 17:54:05 -0500 (Mon, 25 Jul 2005)
New Revision: 2475
Modified:
trunk/fs/ocfs2/cluster/heartbeat.c
trunk/fs/ocfs2/cluster/heartbeat.h
trunk/fs/ocfs2/cluster/quorum.c
trunk/fs/ocfs2/cluster/quorum.h
Log:
* Teach heartbeat how to call into the quorum / fencing code when it times out
writing to a region.
Signed-off-by: zab
Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c 2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/heartbeat.c 2005-07-25 22:54:05 UTC (rev 2475)
@@ -34,6 +34,7 @@
#include "heartbeat.h"
#include "tcp.h"
#include "nodemanager.h"
+#include "quorum.h"
#include "masklog.h"
@@ -120,6 +121,8 @@
/* randomized as the region goes up and down so that a node
* recognizes a node going up and down in one iteration */
u64 hr_generation;
+
+ struct work_struct hr_write_timeout_work;
};
struct o2hb_bio_wait_ctxt {
@@ -127,6 +130,31 @@
struct completion wc_io_complete;
};
+static void o2hb_write_timeout(void *arg)
+{
+ struct o2hb_region *reg = arg;
+
+ mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
+ "milliseconds\n", reg->hr_dev_name, O2HB_MAX_WRITE_TIMEOUT_MS);
+
+ o2quo_disk_timeout();
+}
+
+static void o2hb_arm_write_timeout(struct o2hb_region *reg)
+{
+ mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS);
+
+ cancel_delayed_work(®->hr_write_timeout_work);
+ schedule_delayed_work(®->hr_write_timeout_work,
+ O2HB_MAX_WRITE_TIMEOUT_MS);
+}
+
+static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
+{
+ cancel_delayed_work(®->hr_write_timeout_work);
+ flush_scheduled_work();
+}
+
static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
unsigned int num_ios)
{
@@ -774,6 +802,7 @@
*/
o2hb_wait_on_io(reg, &write_wc);
bio_put(write_bio);
+ o2hb_arm_write_timeout(reg);
/* let the person who launched us know when things are steady */
if (!change && (atomic_read(®->hr_steady_iterations) != 0)) {
@@ -804,6 +833,8 @@
msleep_interruptible(reg->hr_timeout_ms);
}
+ o2hb_disarm_write_timeout(reg);
+
/* unclean stop is only used in very bad situation */
for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++)
o2hb_shutdown_slot(®->hr_slots[i]);
@@ -1219,6 +1250,8 @@
goto out;
}
+ INIT_WORK(®->hr_write_timeout_work, o2hb_write_timeout, reg);
+
/*
* A node is considered live after it has beat LIVE_THRESHOLD
* times. We're not steady until we've given them a chance
Modified: trunk/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.h 2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/heartbeat.h 2005-07-25 22:54:05 UTC (rev 2475)
@@ -36,6 +36,8 @@
/* number of equal samples to be seen as dead */
#define O2HB_DEAD_THRESHOLD 7
+#define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (O2HB_DEAD_THRESHOLD - 1))
+
#define O2HB_CB_MAGIC 0x51d1e4ec
/* callback stuff */
Modified: trunk/fs/ocfs2/cluster/quorum.c
===================================================================
--- trunk/fs/ocfs2/cluster/quorum.c 2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/quorum.c 2005-07-25 22:54:05 UTC (rev 2475)
@@ -64,6 +64,29 @@
unsigned long qs_hold_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
} o2quo_state;
+/* this is horribly heavy-handed. It should instead flip the file
+ * system RO and call some userspace script. */
+static void o2quo_fence_self(void)
+{
+ /* panic spins with interrupts enabled. with preempt
+ * threads can still schedule, etc, etc */
+ o2hb_stop_all_regions();
+ panic("ocfs2 is very sorry to be fencing this system by panicing\n");
+}
+
+/* Indicate that a timeout occured on a hearbeat region write. The
+ * other nodes in the cluster may consider us dead at that time so we
+ * want to "fence" ourselves so that we don't scribble on the disk
+ * after they think they've recovered us. This can't solve all
+ * problems related to writeout after recovery but this hack can at
+ * least close some of those gaps. When we have real fencing, this can
+ * go away as our node would be fenced externally before other nodes
+ * begin recovery. */
+void o2quo_disk_timeout(void)
+{
+ o2quo_fence_self();
+}
+
static void o2quo_make_decision(void *arg)
{
int quorum;
@@ -122,15 +145,8 @@
out:
spin_unlock(&qs->qs_lock);
- /* this is horribly heavy-handed. It should instead flip the file
- * system RO and call some userspace script */
- if (fence) {
- /* panic spins with interrupts enabled. with preempt
- * threads can still schedule, etc, etc */
- o2hb_stop_all_regions();
- panic("ocfs2 is very sorry to be fencing this system by "
- "panicing\n");
- }
+ if (fence)
+ o2quo_fence_self();
}
static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
Modified: trunk/fs/ocfs2/cluster/quorum.h
===================================================================
--- trunk/fs/ocfs2/cluster/quorum.h 2005-07-25 18:46:51 UTC (rev 2474)
+++ trunk/fs/ocfs2/cluster/quorum.h 2005-07-25 22:54:05 UTC (rev 2475)
@@ -31,5 +31,6 @@
void o2quo_hb_still_up(u8 node);
void o2quo_conn_up(u8 node);
void o2quo_conn_err(u8 node);
+void o2quo_disk_timeout(void);
#endif /* O2CLUSTER_QUORUM_H */
More information about the Ocfs2-commits
mailing list