[Ocfs2-commits] zab commits r2024 - trunk/fs/ocfs2/cluster

Mon Mar 21 15:11:00 CST 2005

Author: zab
Signed-off-by: khackel
Date: 2005-03-21 15:10:58 -0600 (Mon, 21 Mar 2005)
New Revision: 2024

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
   trunk/fs/ocfs2/cluster/heartbeat.h
Log:
o simplifiy the hb state transitions.  This was fixes the case where a node
  comes live when stale hb data is seen and immediately ticks down to death.

Signed-off-by: khackel


Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================

--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-03-21 20:05:03 UTC (rev 2023)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-03-21 21:10:58 UTC (rev 2024)
@@ -107,16 +107,17 @@
 	u64			ds_block;
 	u8			ds_node_num;
 	unsigned long		ds_last_time;
-	u16			ds_margin;
-	/* the single hb-thread only ever touches these items, no locking */
-	struct list_head	ds_dead_item;
-	struct list_head	ds_alive_item;
+	u16			ds_equal_samples;
+	u16			ds_changed_samples;
+	/* protected by the hr_slot_list_lock */ 
+	struct list_head	ds_live_item; /* on alive_list when live */
 };
 
 /* each thread owns a region.. when we're asked to tear down the region
  * we ask the thread to stop, who cleans up the region */
 struct hb_region {
 	struct kobject		hr_kobj;
+	/* protected by the hr_callback_sem */
 	struct list_head	hr_active_item;
 	struct task_struct 	*hr_task;
 	u64			hr_block_bytes;
@@ -128,8 +129,7 @@
 	struct hb_disk_slot	*hr_slots;
 	/* a single hb-thread writer and many fill_node readers are protected */
 	rwlock_t		hr_slot_list_lock;
-	struct list_head	hr_alive_list;
-	struct list_head	hr_dead_list;
+	struct list_head	hr_live_list;
 };
 
 static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
@@ -292,45 +292,42 @@
 		wait_on_buffer(bh);
 		hb_block = (hb_disk_heartbeat_block *)bh->b_data;
 
+		/* we don't care if these wrap.. the state transitions below
+		 * clear at the right places */
 		cputime = le64_to_cpu(hb_block->time);
-		if (slot->ds_last_time != cputime) {
-			/* the node is active */
-			if (!list_empty(&slot->ds_dead_item))
-				list_del_init(&slot->ds_dead_item);
+		if (slot->ds_last_time != cputime)
+			slot->ds_changed_samples++;
+		else
+			slot->ds_equal_samples++;
+		slot->ds_last_time = cputime;
 
-			if (list_empty(&slot->ds_alive_item))
-				list_add_tail(&slot->ds_alive_item, &newborn);
-
-			slot->ds_last_time = cputime;
-			slot->ds_margin = HB_DISK_MARGIN;
+		/* dead nodes only come to life after some number of 
+		 * changes at any time during their dead time */
+		if (list_empty(&slot->ds_live_item) &&
+		    slot->ds_changed_samples >= HB_LIVE_THRESHOLD) {
+			list_add_tail(&slot->ds_live_item, &newborn);
+			slot->ds_equal_samples = 0;
 			continue;
 		} 
 
-		/* only tick down on idlw nodes that we think are alive.
-		 * this stops us from getting a sea of node down events for
-		 * nodes that have never been active. */
-		if (!list_empty(&slot->ds_dead_item) ||
-		    list_empty(&slot->ds_alive_item))
-			continue;
-
-		/* decrease slot margin to zero as long as we don't
-		 * see any updates */
-		if (slot->ds_margin) {
-			if (--slot->ds_margin)
+		/* live nodes only go dead after enough consequtive missed
+		 * samples..  reset the missed counter whenever we see 
+		 * activity */
+		if (!list_empty(&slot->ds_live_item)) {
+			if (slot->ds_equal_samples >= HB_DEAD_THRESHOLD) {
+				hbprintk("node %d JUST DIED!!!!\n", i);
+				list_move(&slot->ds_live_item, &deceased);
+				slot->ds_changed_samples = 0;
 				continue;
+			}
+			if (slot->ds_changed_samples) {
+				slot->ds_changed_samples = 0;
+				slot->ds_equal_samples = 0;
+			}
 		}
-
-		/* ok, margin is 0, it's really dead */ 
-		if (list_empty(&slot->ds_dead_item)) {
-			hbprintk("node %d JUST DIED!!!!\n", i);
-			list_add_tail(&slot->ds_dead_item, &deceased);
-		}
 	}
 
-	/* we're the only thing that modifies the lists, we don't have to lock
-	 * while we're just reading them.  the write locks protect the
-	 * fill_node_map readers.  */
-	list_for_each_entry(slot, &newborn, ds_alive_item) {
+	list_for_each_entry(slot, &newborn, ds_live_item) {
 		node = nm_get_node_by_num(slot->ds_node_num);
 		if (node == NULL) {
 			hbprintk("saw hb for node %d but don't have a node\n",
@@ -340,7 +337,7 @@
 		hb_do_node_up(node, slot->ds_node_num);
 		nm_node_put(node);
 	}
-	list_for_each_entry(slot, &deceased, ds_dead_item) {
+	list_for_each_entry(slot, &deceased, ds_live_item) {
 		node = nm_get_node_by_num(slot->ds_node_num);
 		if (node == NULL) {
 			hbprintk("node %d went down but don't have a node\n",
@@ -351,10 +348,11 @@
 		nm_node_put(node);
 	}
 
-	write_lock(&reg->hr_slot_list_lock);
-	list_splice_init(&newborn, &reg->hr_alive_list);
-	list_splice_init(&deceased, &reg->hr_dead_list);
-	write_unlock(&reg->hr_slot_list_lock);
+	if (!list_empty(&newborn)) {
+		write_lock(&reg->hr_slot_list_lock);
+		list_splice_init(&newborn, &reg->hr_live_list);
+		write_unlock(&reg->hr_slot_list_lock);
+	}
 }
 
 /*
@@ -407,7 +405,7 @@
 
 	list_for_each_entry(reg, &hb_active_regions, hr_active_item) {
 		read_lock(&reg->hr_slot_list_lock);
-		list_for_each_entry(slot, &reg->hr_alive_list, ds_alive_item)
+		list_for_each_entry(slot, &reg->hr_live_list, ds_live_item)
 			set_bit(slot->ds_node_num, map);
 		read_unlock(&reg->hr_slot_list_lock);
 	}
@@ -569,9 +567,7 @@
 		slot = &reg->hr_slots[i];
 		slot->ds_block = reg->hr_start_block + i;
 		slot->ds_node_num = i;
-		slot->ds_margin = HB_INITIAL_DISK_MARGIN;
-		INIT_LIST_HEAD(&slot->ds_alive_item);
-		INIT_LIST_HEAD(&slot->ds_dead_item);
+		INIT_LIST_HEAD(&slot->ds_live_item);
 	}
 
 	reg->hr_task = kthread_run(hb_thread, reg, "hb-%s",
@@ -704,8 +700,7 @@
 
 	INIT_LIST_HEAD(&reg->hr_active_item);
 	rwlock_init(&reg->hr_slot_list_lock);
-	INIT_LIST_HEAD(&reg->hr_alive_list);
-	INIT_LIST_HEAD(&reg->hr_dead_list);
+	INIT_LIST_HEAD(&reg->hr_live_list);
 
 	kobject_set_name(&reg->hr_kobj, name);
 	reg->hr_kobj.ktype = &hb_region_type.ktype;

Modified: trunk/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.h	2005-03-21 20:05:03 UTC (rev 2023)
+++ trunk/fs/ocfs2/cluster/heartbeat.h	2005-03-21 21:10:58 UTC (rev 2024)
@@ -48,9 +48,10 @@
 	int			hc_type;
 };
 
-// number of allowed misses in steady state
-#define HB_INITIAL_DISK_MARGIN     60
-#define HB_DISK_MARGIN             30
+/* number of changes to be seen as live */ 
+#define HB_LIVE_THRESHOLD	   2
+/* number of missed changes to be seen as dead */ 
+#define HB_DEAD_THRESHOLD	   30
 
 struct kset *hb_alloc_hb_set(void);
 void hb_free_hb_set(struct kset *kset);