[Ocfs2-commits] mfasheh commits r2125 - trunk/fs/ocfs2/cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Apr 7 20:39:10 CDT 2005
Author: mfasheh
Signed-off-by: jlbec
Date: 2005-04-07 20:39:08 -0500 (Thu, 07 Apr 2005)
New Revision: 2125
Modified:
trunk/fs/ocfs2/cluster/heartbeat.c
Log:
* Rework the heartbeat code to allow for smaller request sizes.
Signed-off-by: jlbec
Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c 2005-04-08 01:29:59 UTC (rev 2124)
+++ trunk/fs/ocfs2/cluster/heartbeat.c 2005-04-08 01:39:08 UTC (rev 2125)
@@ -166,9 +166,35 @@
}
struct hb_bio_wait_ctxt {
+ atomic_t wc_num_reqs;
struct completion wc_io_complete;
};
+static inline void hb_bio_wait_init(struct hb_bio_wait_ctxt *wc,
+ unsigned int num_ios)
+{
+ atomic_set(&wc->wc_num_reqs, num_ios);
+ init_completion(&wc->wc_io_complete);
+}
+
+/* Used in error paths too */
+static inline void hb_bio_wait_dec(struct hb_bio_wait_ctxt *wc,
+ unsigned int num)
+{
+ if (atomic_sub_and_test(num, &wc->wc_num_reqs))
+ complete(&wc->wc_io_complete);
+}
+
+static void hb_wait_on_io(struct hb_region *reg,
+ struct hb_bio_wait_ctxt *wc)
+{
+ struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
+
+ blk_run_address_space(mapping);
+
+ wait_for_completion(&wc->wc_io_complete);
+}
+
static int hb_bio_end_io(struct bio *bio,
unsigned int bytes_done,
int error)
@@ -181,103 +207,209 @@
if (bio->bi_size)
return 1;
- complete(&wc->wc_io_complete);
+ hb_bio_wait_dec(wc, 1);
return 0;
}
-static int hb_setup_read_bio(struct hb_region *reg,
- struct bio **read_bio,
- struct hb_bio_wait_ctxt *bio_wait)
+/* Setup a Bio to cover I/O against num_slots slots starting at
+ * start_slot. */
+static struct bio * hb_setup_one_bio(struct hb_region *reg,
+ struct hb_bio_wait_ctxt *wc,
+ unsigned int start_slot,
+ unsigned int num_slots)
{
- int status, i, num_pages, len;
- unsigned int vec_len;
+ int i, nr_vecs, len, first_page, last_page;
+ unsigned int vec_len, vec_start;
unsigned int bits = reg->hr_hb_block_bits;
unsigned int spp = reg->hr_hb_slots_per_page;
- struct bio *bio = NULL;
+ struct bio *bio;
struct page *page;
- num_pages = reg->hr_num_pages;
+ nr_vecs = (num_slots + spp - 1) / spp;
- bio = bio_alloc(GFP_KERNEL, num_pages);
+ bio = bio_alloc(GFP_KERNEL, nr_vecs);
if (!bio) {
hbprintk("Could not alloc slots BIO!\n");
- status = -ENOMEM;
+ bio = ERR_PTR(-ENOMEM);
goto bail;
}
/* Must put everything in 512 byte sectors for the bio... */
- bio->bi_sector = reg->hr_start_block << (bits - 9);
+ bio->bi_sector = (reg->hr_start_block + start_slot) << (bits - 9);
bio->bi_bdev = reg->hr_bdev;
- bio->bi_private = bio_wait;
+ bio->bi_private = wc;
bio->bi_end_io = hb_bio_end_io;
- vec_len = PAGE_CACHE_SIZE;
- for(i = 0; i < num_pages; i++) {
+ first_page = start_slot / spp;
+ last_page = first_page + nr_vecs;
+ vec_start = (start_slot << bits) % PAGE_CACHE_SIZE;
+ for(i = first_page; i < last_page; i++) {
page = reg->hr_slot_data[i];
+ vec_len = PAGE_CACHE_SIZE;
/* last page might be short */
- if (i == (num_pages - 1))
- vec_len = (reg->hr_blocks % spp) << bits;
+ if (((i + 1) * spp) > (start_slot + num_slots))
+ vec_len = ((num_slots + start_slot) % spp) << bits;
+ vec_len -= vec_start;
- len = bio_add_page(bio, page, vec_len, 0);
+#if 0
+ /* Enable this for tons of output */
+ hbprintk("page %d, vec_len = %u, vec_start = %u\n", i,
+ vec_len, vec_start);
+#endif
+
+ len = bio_add_page(bio, page, vec_len, vec_start);
if (len != vec_len) {
bio_put(bio);
+ bio = ERR_PTR(-EIO);
hbprintk("Error adding page to bio i = %d, vec_len = "
- "%u, len = %d\n", i, vec_len, len);
- status = -EIO;
+ "%u, len = %d\n, start = %u", i, vec_len,
+ len, vec_start);
goto bail;
}
+
+ vec_start = 0;
}
- *read_bio = bio;
- status = 0;
bail:
- return status;
+ return bio;
}
-static int hb_setup_write_bio(struct hb_region *reg,
- struct bio **write_bio,
- struct hb_bio_wait_ctxt *bio_wait)
+/*
+ * Compute the maximum number of sectors the bdev can handle in one bio,
+ * as a power of two.
+ *
+ * Stolen from oracleasm, thanks Joel!
+ */
+static int compute_max_sectors(struct block_device *bdev)
{
- int i, len, status;
- unsigned int bits = reg->hr_hb_block_bits;
- u64 write_block;
- unsigned int start;
- struct bio *bio = NULL;
- struct page *page;
+ int max_pages, max_sectors, pow_two_sectors;
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio) {
- hbprintk("Could not alloc node BIO!\n");
+ struct request_queue *q;
+
+ q = bdev_get_queue(bdev);
+ max_pages = q->max_sectors >> (PAGE_SHIFT - 9);
+ if (max_pages > BIO_MAX_PAGES)
+ max_pages = BIO_MAX_PAGES;
+ if (max_pages > q->max_phys_segments)
+ max_pages = q->max_phys_segments;
+ if (max_pages > q->max_hw_segments)
+ max_pages = q->max_hw_segments;
+ max_pages--; /* Handle I/Os that straddle a page */
+
+ max_sectors = max_pages << (PAGE_SHIFT - 9);
+
+ /* Why is fls() 1-based???? */
+ pow_two_sectors = 1 << (fls(max_sectors) - 1);
+
+ return pow_two_sectors;
+}
+
+static inline void hb_compute_request_limits(struct hb_region *reg,
+ unsigned int num_slots,
+ unsigned int *num_bios,
+ unsigned int *slots_per_bio)
+{
+ unsigned int max_sectors, io_sectors;
+
+ max_sectors = compute_max_sectors(reg->hr_bdev);
+
+ io_sectors = num_slots << (reg->hr_hb_block_bits - 9);
+
+ *num_bios = (io_sectors + max_sectors - 1) / max_sectors;
+ *slots_per_bio = max_sectors >> (reg->hr_hb_block_bits - 9);
+
+#if 0
+ hbprintk("My io size is %u sectors for %u slots. This device can "
+ "handle %u sectors of I/O\n", io_sectors, num_slots,
+ max_sectors);
+ hbprintk("Will need %u bios holding %u slots each\n", *num_bios,
+ *slots_per_bio);
+#endif
+}
+
+static int hb_read_slots(struct hb_region *reg,
+ unsigned int max_slots)
+{
+ unsigned int num_bios, slots_per_bio, start_slot, num_slots;
+ int i, status;
+ struct hb_bio_wait_ctxt wc;
+ struct bio **bios;
+ struct bio *bio;
+
+ hb_compute_request_limits(reg, max_slots, &num_bios, &slots_per_bio);
+
+ bios = kcalloc(num_bios, sizeof(struct bio *), GFP_KERNEL);
+ if (!bios) {
status = -ENOMEM;
- goto bail;
+ hbprintk("%d\n", status);
+ return status;
}
- /* find the page, and then the offset within */
- i = nm_this_node() / reg->hr_hb_slots_per_page;
- page = reg->hr_slot_data[i];
- start = (nm_this_node() % reg->hr_hb_slots_per_page) << bits;
+ hb_bio_wait_init(&wc, num_bios);
- write_block = reg->hr_start_block + nm_this_node();
- bio->bi_sector = write_block << (bits - 9);
- bio->bi_bdev = reg->hr_bdev;
- bio->bi_private = bio_wait;
- bio->bi_end_io = hb_bio_end_io;
+ num_slots = slots_per_bio;
+ for(i = 0; i < num_bios; i++) {
+ start_slot = i * slots_per_bio;
- len = bio_add_page(bio, page, reg->hr_hb_block_bytes, start);
- if (len != reg->hr_hb_block_bytes) {
- bio_put(bio);
+ /* adjust num_slots at last bio */
+ if (max_slots < (start_slot + num_slots))
+ num_slots = max_slots - start_slot;
- hbprintk("Error adding my node bio len = %d\n", len);
- status = -EIO;
+ bio = hb_setup_one_bio(reg, &wc, start_slot, num_slots);
+ if (IS_ERR(bio)) {
+ hb_bio_wait_dec(&wc, num_bios - i);
+
+ status = PTR_ERR(bio);
+ hbprintk("%d\n", status);
+ goto bail_and_wait;
+ }
+ bios[i] = bio;
+
+ bio_get(bio);
+ submit_bio(READ, bio);
+ }
+
+ status = 0;
+bail_and_wait:
+ hb_wait_on_io(reg, &wc);
+
+ if (bios) {
+ for(i = 0; i < num_bios; i++)
+ if (bios[i])
+ bio_put(bios[i]);
+ kfree(bios);
+ }
+
+ return status;
+}
+
+static int hb_issue_node_write(struct hb_region *reg,
+ struct bio **write_bio,
+ struct hb_bio_wait_ctxt *write_wc)
+{
+ int status;
+ unsigned int slot;
+ struct bio *bio;
+
+ hb_bio_wait_init(write_wc, 1);
+
+ slot = nm_this_node();
+
+ bio = hb_setup_one_bio(reg, write_wc, slot, 1);
+ if (IS_ERR(bio)) {
+ status = PTR_ERR(bio);
+ hbprintk("%d\n", status);
goto bail;
}
+ bio_get(bio);
+ submit_bio(WRITE, bio);
+
*write_bio = bio;
status = 0;
bail:
-
return status;
}
@@ -323,20 +455,10 @@
hb_block->time = cpu_to_le64(cputime);
}
-static void hb_wait_on_io(struct hb_region *reg,
- struct hb_bio_wait_ctxt *wc)
-{
- struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
-
- blk_run_address_space(mapping);
-
- wait_for_completion(&wc->wc_io_complete);
-}
-
static void hb_do_disk_heartbeat(struct hb_region *reg)
{
u64 cputime;
- int i, ret, wrote = 0;
+ int i, ret;
struct nm_node *node;
struct hb_disk_slot *slot;
struct list_head *pos, *tmp;
@@ -347,33 +469,18 @@
LIST_HEAD(newborn);
LIST_HEAD(deceased);
unsigned long configured_nodes[BITS_TO_LONGS(NM_MAX_NODES)];
- struct bio *read_bio = NULL;
- struct bio *write_bio = NULL;
- struct hb_bio_wait_ctxt read_wc;
+ struct bio *write_bio;
struct hb_bio_wait_ctxt write_wc;
if (nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
return;
- init_completion(&read_wc.wc_io_complete);
- ret = hb_setup_read_bio(reg, &read_bio, &read_wc);
+ ret = hb_read_slots(reg, NM_MAX_NODES - 1);
if (ret < 0) {
- hbprintk("ret = %d setting up read io\n", ret);
- goto bail;
+ hbprintk("ret = %d reading slots\n", ret);
+ return;
}
- init_completion(&write_wc.wc_io_complete);
- ret = hb_setup_write_bio(reg, &write_bio, &write_wc);
- if (ret < 0) {
- hbprintk("ret = %d setting up read io\n", ret);
- goto bail;
- }
-
- /* We 1st do a sync read of all slots */
- bio_get(read_bio);
- submit_bio(READ, read_bio);
- hb_wait_on_io(reg, &read_wc);
-
/* With an up to date view of the slots, we can check that no
* other node has been improperly configured to heartbeat in
* our slot. */
@@ -386,9 +493,11 @@
/* And fire off the write. Note that we don't wait on this I/O
* until later. */
- bio_get(write_bio);
- submit_bio(WRITE, write_bio);
- wrote = 1;
+ ret = hb_issue_node_write(reg, &write_bio, &write_wc);
+ if (ret < 0) {
+ hbprintk("ret = %d setting up read io\n", ret);
+ return;
+ }
i = -1;
while((i = find_next_bit(configured_nodes, NM_MAX_NODES, i + 1)) < NM_MAX_NODES) {
@@ -471,14 +580,9 @@
nm_node_put(node);
}
-bail:
- if (wrote) /* Make sure the write hits disk before we return. */
- hb_wait_on_io(reg, &write_wc);
-
- if (read_bio)
- bio_put(read_bio);
- if (write_bio)
- bio_put(write_bio);
+ /* Make sure the write hits disk before we return. */
+ hb_wait_on_io(reg, &write_wc);
+ bio_put(write_bio);
}
/*
More information about the Ocfs2-commits
mailing list