[Ocfs2-commits] zab commits r2024 - trunk/fs/ocfs2/cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Mon Mar 21 15:11:00 CST 2005
Author: zab
Signed-off-by: khackel
Date: 2005-03-21 15:10:58 -0600 (Mon, 21 Mar 2005)
New Revision: 2024
Modified:
trunk/fs/ocfs2/cluster/heartbeat.c
trunk/fs/ocfs2/cluster/heartbeat.h
Log:
o simplifiy the hb state transitions. This was fixes the case where a node
comes live when stale hb data is seen and immediately ticks down to death.
Signed-off-by: khackel
Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c 2005-03-21 20:05:03 UTC (rev 2023)
+++ trunk/fs/ocfs2/cluster/heartbeat.c 2005-03-21 21:10:58 UTC (rev 2024)
@@ -107,16 +107,17 @@
u64 ds_block;
u8 ds_node_num;
unsigned long ds_last_time;
- u16 ds_margin;
- /* the single hb-thread only ever touches these items, no locking */
- struct list_head ds_dead_item;
- struct list_head ds_alive_item;
+ u16 ds_equal_samples;
+ u16 ds_changed_samples;
+ /* protected by the hr_slot_list_lock */
+ struct list_head ds_live_item; /* on alive_list when live */
};
/* each thread owns a region.. when we're asked to tear down the region
* we ask the thread to stop, who cleans up the region */
struct hb_region {
struct kobject hr_kobj;
+ /* protected by the hr_callback_sem */
struct list_head hr_active_item;
struct task_struct *hr_task;
u64 hr_block_bytes;
@@ -128,8 +129,7 @@
struct hb_disk_slot *hr_slots;
/* a single hb-thread writer and many fill_node readers are protected */
rwlock_t hr_slot_list_lock;
- struct list_head hr_alive_list;
- struct list_head hr_dead_list;
+ struct list_head hr_live_list;
};
static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
@@ -292,45 +292,42 @@
wait_on_buffer(bh);
hb_block = (hb_disk_heartbeat_block *)bh->b_data;
+ /* we don't care if these wrap.. the state transitions below
+ * clear at the right places */
cputime = le64_to_cpu(hb_block->time);
- if (slot->ds_last_time != cputime) {
- /* the node is active */
- if (!list_empty(&slot->ds_dead_item))
- list_del_init(&slot->ds_dead_item);
+ if (slot->ds_last_time != cputime)
+ slot->ds_changed_samples++;
+ else
+ slot->ds_equal_samples++;
+ slot->ds_last_time = cputime;
- if (list_empty(&slot->ds_alive_item))
- list_add_tail(&slot->ds_alive_item, &newborn);
-
- slot->ds_last_time = cputime;
- slot->ds_margin = HB_DISK_MARGIN;
+ /* dead nodes only come to life after some number of
+ * changes at any time during their dead time */
+ if (list_empty(&slot->ds_live_item) &&
+ slot->ds_changed_samples >= HB_LIVE_THRESHOLD) {
+ list_add_tail(&slot->ds_live_item, &newborn);
+ slot->ds_equal_samples = 0;
continue;
}
- /* only tick down on idlw nodes that we think are alive.
- * this stops us from getting a sea of node down events for
- * nodes that have never been active. */
- if (!list_empty(&slot->ds_dead_item) ||
- list_empty(&slot->ds_alive_item))
- continue;
-
- /* decrease slot margin to zero as long as we don't
- * see any updates */
- if (slot->ds_margin) {
- if (--slot->ds_margin)
+ /* live nodes only go dead after enough consequtive missed
+ * samples.. reset the missed counter whenever we see
+ * activity */
+ if (!list_empty(&slot->ds_live_item)) {
+ if (slot->ds_equal_samples >= HB_DEAD_THRESHOLD) {
+ hbprintk("node %d JUST DIED!!!!\n", i);
+ list_move(&slot->ds_live_item, &deceased);
+ slot->ds_changed_samples = 0;
continue;
+ }
+ if (slot->ds_changed_samples) {
+ slot->ds_changed_samples = 0;
+ slot->ds_equal_samples = 0;
+ }
}
-
- /* ok, margin is 0, it's really dead */
- if (list_empty(&slot->ds_dead_item)) {
- hbprintk("node %d JUST DIED!!!!\n", i);
- list_add_tail(&slot->ds_dead_item, &deceased);
- }
}
- /* we're the only thing that modifies the lists, we don't have to lock
- * while we're just reading them. the write locks protect the
- * fill_node_map readers. */
- list_for_each_entry(slot, &newborn, ds_alive_item) {
+ list_for_each_entry(slot, &newborn, ds_live_item) {
node = nm_get_node_by_num(slot->ds_node_num);
if (node == NULL) {
hbprintk("saw hb for node %d but don't have a node\n",
@@ -340,7 +337,7 @@
hb_do_node_up(node, slot->ds_node_num);
nm_node_put(node);
}
- list_for_each_entry(slot, &deceased, ds_dead_item) {
+ list_for_each_entry(slot, &deceased, ds_live_item) {
node = nm_get_node_by_num(slot->ds_node_num);
if (node == NULL) {
hbprintk("node %d went down but don't have a node\n",
@@ -351,10 +348,11 @@
nm_node_put(node);
}
- write_lock(®->hr_slot_list_lock);
- list_splice_init(&newborn, ®->hr_alive_list);
- list_splice_init(&deceased, ®->hr_dead_list);
- write_unlock(®->hr_slot_list_lock);
+ if (!list_empty(&newborn)) {
+ write_lock(®->hr_slot_list_lock);
+ list_splice_init(&newborn, ®->hr_live_list);
+ write_unlock(®->hr_slot_list_lock);
+ }
}
/*
@@ -407,7 +405,7 @@
list_for_each_entry(reg, &hb_active_regions, hr_active_item) {
read_lock(®->hr_slot_list_lock);
- list_for_each_entry(slot, ®->hr_alive_list, ds_alive_item)
+ list_for_each_entry(slot, ®->hr_live_list, ds_live_item)
set_bit(slot->ds_node_num, map);
read_unlock(®->hr_slot_list_lock);
}
@@ -569,9 +567,7 @@
slot = ®->hr_slots[i];
slot->ds_block = reg->hr_start_block + i;
slot->ds_node_num = i;
- slot->ds_margin = HB_INITIAL_DISK_MARGIN;
- INIT_LIST_HEAD(&slot->ds_alive_item);
- INIT_LIST_HEAD(&slot->ds_dead_item);
+ INIT_LIST_HEAD(&slot->ds_live_item);
}
reg->hr_task = kthread_run(hb_thread, reg, "hb-%s",
@@ -704,8 +700,7 @@
INIT_LIST_HEAD(®->hr_active_item);
rwlock_init(®->hr_slot_list_lock);
- INIT_LIST_HEAD(®->hr_alive_list);
- INIT_LIST_HEAD(®->hr_dead_list);
+ INIT_LIST_HEAD(®->hr_live_list);
kobject_set_name(®->hr_kobj, name);
reg->hr_kobj.ktype = &hb_region_type.ktype;
Modified: trunk/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.h 2005-03-21 20:05:03 UTC (rev 2023)
+++ trunk/fs/ocfs2/cluster/heartbeat.h 2005-03-21 21:10:58 UTC (rev 2024)
@@ -48,9 +48,10 @@
int hc_type;
};
-// number of allowed misses in steady state
-#define HB_INITIAL_DISK_MARGIN 60
-#define HB_DISK_MARGIN 30
+/* number of changes to be seen as live */
+#define HB_LIVE_THRESHOLD 2
+/* number of missed changes to be seen as dead */
+#define HB_DEAD_THRESHOLD 30
struct kset *hb_alloc_hb_set(void);
void hb_free_hb_set(struct kset *kset);
More information about the Ocfs2-commits
mailing list