[Ocfs2-commits] mfasheh commits r2125 - trunk/fs/ocfs2/cluster

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Apr 7 20:39:10 CDT 2005


Author: mfasheh
Signed-off-by: jlbec
Date: 2005-04-07 20:39:08 -0500 (Thu, 07 Apr 2005)
New Revision: 2125

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
Log:
* Rework the heartbeat code to allow for smaller request sizes.

Signed-off-by: jlbec



Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-04-08 01:29:59 UTC (rev 2124)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-04-08 01:39:08 UTC (rev 2125)
@@ -166,9 +166,35 @@
 }
 
 struct hb_bio_wait_ctxt {
+	atomic_t          wc_num_reqs;
 	struct completion wc_io_complete;
 };
 
+static inline void hb_bio_wait_init(struct hb_bio_wait_ctxt *wc,
+				    unsigned int num_ios)
+{
+	atomic_set(&wc->wc_num_reqs, num_ios);
+	init_completion(&wc->wc_io_complete);
+}
+
+/* Used in error paths too */
+static inline void hb_bio_wait_dec(struct hb_bio_wait_ctxt *wc,
+				   unsigned int num)
+{
+	if (atomic_sub_and_test(num, &wc->wc_num_reqs))
+		complete(&wc->wc_io_complete);
+}
+
+static void hb_wait_on_io(struct hb_region *reg,
+			  struct hb_bio_wait_ctxt *wc)
+{
+	struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
+
+	blk_run_address_space(mapping);
+
+	wait_for_completion(&wc->wc_io_complete);
+}
+
 static int hb_bio_end_io(struct bio *bio,
 			 unsigned int bytes_done,
 			 int error)
@@ -181,103 +207,209 @@
 	if (bio->bi_size)
 		return 1;
 
-	complete(&wc->wc_io_complete);
+	hb_bio_wait_dec(wc, 1);
 	return 0;
 }
 
-static int hb_setup_read_bio(struct hb_region *reg,
-			     struct bio **read_bio,
-			     struct hb_bio_wait_ctxt *bio_wait)
+/* Setup a Bio to cover I/O against num_slots slots starting at
+ * start_slot. */
+static struct bio * hb_setup_one_bio(struct hb_region *reg,
+				     struct hb_bio_wait_ctxt *wc,
+				     unsigned int start_slot,
+				     unsigned int num_slots)
 {
-	int status, i, num_pages, len;
-	unsigned int vec_len;
+	int i, nr_vecs, len, first_page, last_page;
+	unsigned int vec_len, vec_start;
 	unsigned int bits = reg->hr_hb_block_bits;
 	unsigned int spp = reg->hr_hb_slots_per_page;
-	struct bio *bio = NULL;
+	struct bio *bio;
 	struct page *page;
 
-	num_pages = reg->hr_num_pages;
+	nr_vecs = (num_slots + spp - 1) / spp;
 
-	bio = bio_alloc(GFP_KERNEL, num_pages);
+	bio = bio_alloc(GFP_KERNEL, nr_vecs);
 	if (!bio) {
 		hbprintk("Could not alloc slots BIO!\n");
-		status = -ENOMEM;
+		bio = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
 	/* Must put everything in 512 byte sectors for the bio... */
-	bio->bi_sector = reg->hr_start_block << (bits - 9);
+	bio->bi_sector = (reg->hr_start_block + start_slot) << (bits - 9);
 	bio->bi_bdev = reg->hr_bdev;
-	bio->bi_private = bio_wait;
+	bio->bi_private = wc;
 	bio->bi_end_io = hb_bio_end_io;
 
-	vec_len = PAGE_CACHE_SIZE;
-	for(i = 0; i < num_pages; i++) {
+	first_page = start_slot / spp;
+	last_page = first_page + nr_vecs;
+	vec_start = (start_slot << bits) % PAGE_CACHE_SIZE;
+	for(i = first_page; i < last_page; i++) {
 		page = reg->hr_slot_data[i];
 
+		vec_len = PAGE_CACHE_SIZE;
 		/* last page might be short */
-		if (i == (num_pages - 1))
-			vec_len = (reg->hr_blocks % spp) << bits;
+		if (((i + 1) * spp) > (start_slot + num_slots))
+			vec_len = ((num_slots + start_slot) % spp) << bits;
+		vec_len -=  vec_start;
 
-		len = bio_add_page(bio, page, vec_len, 0);
+#if 0
+		/* Enable this for tons of output */
+		hbprintk("page %d, vec_len = %u, vec_start = %u\n", i,
+			 vec_len, vec_start);
+#endif
+
+		len = bio_add_page(bio, page, vec_len, vec_start);
 		if (len != vec_len) {
 			bio_put(bio);
+			bio = ERR_PTR(-EIO);
 
 			hbprintk("Error adding page to bio i = %d, vec_len = "
-				 "%u, len = %d\n", i, vec_len, len);
-			status = -EIO;
+				 "%u, len = %d\n, start = %u", i, vec_len,
+				 len, vec_start);
 			goto bail;
 		}
+
+		vec_start = 0;
 	}
 
-	*read_bio = bio;
-	status = 0;
 bail:
-	return status;
+	return bio;
 }
 
-static int hb_setup_write_bio(struct hb_region *reg,
-			      struct bio **write_bio,
-			      struct hb_bio_wait_ctxt *bio_wait)
+/*
+ * Compute the maximum number of sectors the bdev can handle in one bio,
+ * as a power of two.
+ *
+ * Stolen from oracleasm, thanks Joel!
+ */
+static int compute_max_sectors(struct block_device *bdev)
 {
-	int i, len, status;
-	unsigned int bits = reg->hr_hb_block_bits;
-	u64 write_block;
-	unsigned int start;
-	struct bio *bio = NULL;
-	struct page *page;
+	int max_pages, max_sectors, pow_two_sectors;
 
-	bio = bio_alloc(GFP_KERNEL, 1);
-	if (!bio) {
-		hbprintk("Could not alloc node BIO!\n");
+	struct request_queue *q;
+
+	q = bdev_get_queue(bdev);
+	max_pages = q->max_sectors >> (PAGE_SHIFT - 9);
+	if (max_pages > BIO_MAX_PAGES)
+		max_pages = BIO_MAX_PAGES;
+	if (max_pages > q->max_phys_segments)
+		max_pages = q->max_phys_segments;
+	if (max_pages > q->max_hw_segments)
+		max_pages = q->max_hw_segments;
+	max_pages--; /* Handle I/Os that straddle a page */
+
+	max_sectors = max_pages << (PAGE_SHIFT - 9);
+
+	/* Why is fls() 1-based???? */
+	pow_two_sectors = 1 << (fls(max_sectors) - 1);
+
+	return pow_two_sectors;
+}
+
+static inline void hb_compute_request_limits(struct hb_region *reg,
+					     unsigned int num_slots,
+					     unsigned int *num_bios,
+					     unsigned int *slots_per_bio)
+{
+	unsigned int max_sectors, io_sectors;
+
+	max_sectors = compute_max_sectors(reg->hr_bdev);
+
+	io_sectors = num_slots << (reg->hr_hb_block_bits - 9);
+
+	*num_bios = (io_sectors + max_sectors - 1) / max_sectors;
+	*slots_per_bio = max_sectors >> (reg->hr_hb_block_bits - 9);
+
+#if 0
+	hbprintk("My io size is %u sectors for %u slots. This device can "
+		 "handle %u sectors of I/O\n", io_sectors, num_slots,
+		 max_sectors);
+	hbprintk("Will need %u bios holding %u slots each\n", *num_bios,
+		 *slots_per_bio);
+#endif
+}
+
+static int hb_read_slots(struct hb_region *reg,
+			 unsigned int max_slots)
+{
+	unsigned int num_bios, slots_per_bio, start_slot, num_slots;
+	int i, status;
+	struct hb_bio_wait_ctxt wc;
+	struct bio **bios;
+	struct bio *bio;
+
+	hb_compute_request_limits(reg, max_slots, &num_bios, &slots_per_bio);
+
+	bios = kcalloc(num_bios, sizeof(struct bio *), GFP_KERNEL);
+	if (!bios) {
 		status = -ENOMEM;
-		goto bail;
+		hbprintk("%d\n", status);
+		return status;
 	}
 
-	/* find the page, and then the offset within */
-	i = nm_this_node() / reg->hr_hb_slots_per_page;
-	page = reg->hr_slot_data[i];
-	start = (nm_this_node() % reg->hr_hb_slots_per_page) << bits;
+	hb_bio_wait_init(&wc, num_bios);
 
-	write_block = reg->hr_start_block + nm_this_node();
-	bio->bi_sector = write_block << (bits - 9);
-	bio->bi_bdev = reg->hr_bdev;
-	bio->bi_private = bio_wait;
-	bio->bi_end_io = hb_bio_end_io;
+	num_slots = slots_per_bio;
+	for(i = 0; i < num_bios; i++) {
+		start_slot = i * slots_per_bio;
 
-	len = bio_add_page(bio, page, reg->hr_hb_block_bytes, start);
-	if (len != reg->hr_hb_block_bytes) {
-		bio_put(bio);
+		/* adjust num_slots at last bio */
+		if (max_slots < (start_slot + num_slots))
+			num_slots = max_slots - start_slot;
 
-		hbprintk("Error adding my node bio len = %d\n", len);
-		status = -EIO;
+		bio = hb_setup_one_bio(reg, &wc, start_slot, num_slots);
+		if (IS_ERR(bio)) {
+			hb_bio_wait_dec(&wc, num_bios - i);
+
+			status = PTR_ERR(bio);
+			hbprintk("%d\n", status);
+			goto bail_and_wait;
+		}
+		bios[i] = bio;
+
+		bio_get(bio);
+		submit_bio(READ, bio);
+	}
+
+	status = 0;
+bail_and_wait:
+	hb_wait_on_io(reg, &wc);
+
+	if (bios) {
+		for(i = 0; i < num_bios; i++)
+			if (bios[i])
+				bio_put(bios[i]);
+		kfree(bios);
+	}
+
+	return status;
+}
+
+static int hb_issue_node_write(struct hb_region *reg,
+			       struct bio **write_bio,
+			       struct hb_bio_wait_ctxt *write_wc)
+{
+	int status;
+	unsigned int slot;
+	struct bio *bio;
+
+	hb_bio_wait_init(write_wc, 1);
+
+	slot = nm_this_node();
+
+	bio = hb_setup_one_bio(reg, write_wc, slot, 1);
+	if (IS_ERR(bio)) {
+		status = PTR_ERR(bio);
+		hbprintk("%d\n", status);
 		goto bail;
 	}
 
+	bio_get(bio);
+	submit_bio(WRITE, bio);
+
 	*write_bio = bio;
 	status = 0;
 bail:
-
 	return status;
 }
 
@@ -323,20 +455,10 @@
 	hb_block->time = cpu_to_le64(cputime);
 }
 
-static void hb_wait_on_io(struct hb_region *reg,
-			  struct hb_bio_wait_ctxt *wc)
-{
-	struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
-
-	blk_run_address_space(mapping);
-
-	wait_for_completion(&wc->wc_io_complete);
-}
-
 static void hb_do_disk_heartbeat(struct hb_region *reg)
 {
 	u64 cputime;
-	int i, ret, wrote = 0;
+	int i, ret;
 	struct nm_node *node;
 	struct hb_disk_slot *slot;
 	struct list_head *pos, *tmp;
@@ -347,33 +469,18 @@
 	LIST_HEAD(newborn);
 	LIST_HEAD(deceased);
 	unsigned long configured_nodes[BITS_TO_LONGS(NM_MAX_NODES)];
-	struct bio *read_bio = NULL;
-	struct bio *write_bio = NULL;
-	struct hb_bio_wait_ctxt read_wc;
+	struct bio *write_bio;
 	struct hb_bio_wait_ctxt write_wc;
 
 	if (nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
 		return;
 
-	init_completion(&read_wc.wc_io_complete);
-	ret = hb_setup_read_bio(reg, &read_bio, &read_wc);
+	ret = hb_read_slots(reg, NM_MAX_NODES - 1);
 	if (ret < 0) {
-		hbprintk("ret = %d setting up read io\n", ret);
-		goto bail;
+		hbprintk("ret = %d reading slots\n", ret);
+		return;
 	}
 
-	init_completion(&write_wc.wc_io_complete);
-	ret = hb_setup_write_bio(reg, &write_bio, &write_wc);
-	if (ret < 0) {
-		hbprintk("ret = %d setting up read io\n", ret);
-		goto bail;
-	}
-
-	/* We 1st do a sync read of all slots */
-	bio_get(read_bio);
-	submit_bio(READ, read_bio);
-	hb_wait_on_io(reg, &read_wc);
-
 	/* With an up to date view of the slots, we can check that no
 	 * other node has been improperly configured to heartbeat in
 	 * our slot. */
@@ -386,9 +493,11 @@
 
 	/* And fire off the write. Note that we don't wait on this I/O
 	 * until later. */
-	bio_get(write_bio);
-	submit_bio(WRITE, write_bio);
-	wrote = 1;
+	ret = hb_issue_node_write(reg, &write_bio, &write_wc);
+	if (ret < 0) {
+		hbprintk("ret = %d setting up read io\n", ret);
+		return;
+	}
 
 	i = -1;
 	while((i = find_next_bit(configured_nodes, NM_MAX_NODES, i + 1)) < NM_MAX_NODES) {
@@ -471,14 +580,9 @@
 		nm_node_put(node);
 	}
 
-bail:
-	if (wrote) /* Make sure the write hits disk before we return. */
-		hb_wait_on_io(reg, &write_wc);
-
-	if (read_bio)
-		bio_put(read_bio);
-	if (write_bio)
-		bio_put(write_bio);
+	/* Make sure the write hits disk before we return. */
+	hb_wait_on_io(reg, &write_wc);
+	bio_put(write_bio);
 }
 
 /*



More information about the Ocfs2-commits mailing list