[Ocfs2-commits] smushran commits r2794 - branches/ocfs2-1.2-cert/patches
svn-commits@oss.oracle.com
svn-commits at oss.oracle.com
Mon Mar 27 18:55:08 CST 2006
Author: smushran
Signed-off-by: mfasheh
Date: 2006-03-27 18:55:07 -0600 (Mon, 27 Mar 2006)
New Revision: 2794
Added:
branches/ocfs2-1.2-cert/patches/ocfs2_heartbeat-better_I_O_error_handling.patch
Modified:
branches/ocfs2-1.2-cert/patches/series
Log:
ocfs2_heartbeat-better_I_O_error_handling.patch added
Signed-off-by: mfasheh
Added: branches/ocfs2-1.2-cert/patches/ocfs2_heartbeat-better_I_O_error_handling.patch
===================================================================
--- branches/ocfs2-1.2-cert/patches/ocfs2_heartbeat-better_I_O_error_handling.patch 2006-03-28 00:45:22 UTC (rev 2793)
+++ branches/ocfs2-1.2-cert/patches/ocfs2_heartbeat-better_I_O_error_handling.patch 2006-03-28 00:55:07 UTC (rev 2794)
@@ -0,0 +1,129 @@
+Propagate errors received in o2hb_bio_end_io() back to the heartbeat thread
+so it can skip re-arming the timer.
+
+Index: fs/ocfs2/cluster/heartbeat.c
+===================================================================
+--- fs/ocfs2/cluster/heartbeat.c (revision 2788)
++++ fs/ocfs2/cluster/heartbeat.c (working copy)
+@@ -165,6 +165,7 @@ static spinlock_t o2hb_blocker_lock = SP
+ struct o2hb_bio_wait_ctxt {
+ atomic_t wc_num_reqs;
+ struct completion wc_io_complete;
++ int wc_error;
+ };
+
+ static unsigned int o2hb_elapsed_msecs(struct timeval *start,
+@@ -293,6 +294,7 @@ static inline void o2hb_bio_wait_init(st
+ {
+ atomic_set(&wc->wc_num_reqs, num_ios);
+ init_completion(&wc->wc_io_complete);
++ wc->wc_error = 0;
+ }
+
+ /* Used in error paths too */
+@@ -325,8 +327,10 @@ static int o2hb_bio_end_io(struct bio *b
+ {
+ struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
+
+- if (error)
++ if (error) {
+ mlog(ML_ERROR, "IO Error %d\n", error);
++ wc->wc_error = error;
++ }
+
+ if (bio->bi_size)
+ return 1;
+@@ -515,6 +519,8 @@ static int o2hb_read_slots(struct o2hb_r
+ bail_and_wait:
+ o2hb_mlog_blocking(reg, &start, "waiting for read completion");
+ o2hb_wait_on_io(reg, &wc);
++ if (wc.wc_error && !status)
++ status = wc.wc_error;
+ o2hb_mlog_blocking_done(reg, &start);
+
+ if (bios) {
+@@ -917,7 +923,7 @@ static int o2hb_highest_node(unsigned lo
+ return highest;
+ }
+
+-static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
++static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
+ {
+ int i, ret, highest_node, change = 0;
+ unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
+@@ -925,13 +931,17 @@ static void o2hb_do_disk_heartbeat(struc
+ struct o2hb_bio_wait_ctxt write_wc;
+ struct timeval start;
+
+- if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
+- return;
++ ret = o2nm_configured_node_map(configured_nodes,
++ sizeof(configured_nodes));
++ if (ret) {
++ mlog_errno(ret);
++ return ret;
++ }
+
+ highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
+ if (highest_node >= O2NM_MAX_NODES) {
+ mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
+- return;
++ return -EINVAL;
+ }
+
+ /* No sense in reading the slots of nodes that don't exist
+@@ -941,7 +951,7 @@ static void o2hb_do_disk_heartbeat(struc
+ ret = o2hb_read_slots(reg, highest_node + 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+- return;
++ return ret;
+ }
+
+ /* With an up to date view of the slots, we can check that no
+@@ -959,7 +969,7 @@ static void o2hb_do_disk_heartbeat(struc
+ ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
+ if (ret < 0) {
+ mlog_errno(ret);
+- return;
++ return ret;
+ }
+
+ o2hb_mlog_blocking(reg, &start, "checking slots");
+@@ -979,6 +989,15 @@ static void o2hb_do_disk_heartbeat(struc
+ o2hb_wait_on_io(reg, &write_wc);
+ o2hb_mlog_blocking_done(reg, &start);
+ bio_put(write_bio);
++ if (write_wc.wc_error) {
++ /* Do not re-arm the write timeout on I/O error - we
++ * can't be sure that the new block ever made it to
++ * disk */
++ mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
++ write_wc.wc_error, reg->hr_dev_name);
++ return write_wc.wc_error;
++ }
++
+ o2hb_arm_write_timeout(reg);
+
+ /* let the person who launched us know when things are steady */
+@@ -986,6 +1005,8 @@ static void o2hb_do_disk_heartbeat(struc
+ if (atomic_dec_and_test(®->hr_steady_iterations))
+ wake_up(&o2hb_steady_queue);
+ }
++
++ return 0;
+ }
+
+ /* Subtract b from a, storing the result in a. a *must* have a larger
+@@ -1045,7 +1066,10 @@ static int o2hb_thread(void *data)
+ * likely to time itself out. */
+ do_gettimeofday(&before_hb);
+
+- o2hb_do_disk_heartbeat(reg);
++ i = 0;
++ do {
++ ret = o2hb_do_disk_heartbeat(reg);
++ } while (ret && ++i < 2);
+
+ do_gettimeofday(&after_hb);
+ elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
Modified: branches/ocfs2-1.2-cert/patches/series
===================================================================
--- branches/ocfs2-1.2-cert/patches/series 2006-03-28 00:45:22 UTC (rev 2793)
+++ branches/ocfs2-1.2-cert/patches/series 2006-03-28 00:55:07 UTC (rev 2794)
@@ -17,3 +17,4 @@
hold-dirty-ref.patch
fix-recovery-spin.patch
fix-dlmlock_remote.patch
+ocfs2_heartbeat-better_I_O_error_handling.patch -p0
More information about the Ocfs2-commits
mailing list