[Ocfs2-commits] mfasheh commits r2111 - trunk/fs/ocfs2/cluster

Mon Apr 4 19:42:54 CDT 2005

Author: mfasheh
Signed-off-by: zab
Date: 2005-04-04 19:42:52 -0500 (Mon, 04 Apr 2005)
New Revision: 2111

Modified:
   trunk/fs/ocfs2/cluster/heartbeat.c
Log:
* use bio's for heartbeat. this divorces heartbeat block size from ocfs2
  block size and in the process fixes two problems:
  - obviously, now hb can use a different block size from the fs, it defaults
    Other sizes have had minimal testing so be warned.
  - we no longer have to worry about set_blocksize messing up our buffer
    heads as we don't use them anymore. ocfs2 blocksize detection was   
    freaking heartbeat out on some systems.

* add some trivial checking to see if a node is sharing our heartbeat slot.

* calculate block size / bits input in a common function

* add a check in hb_region_dev_read so we don't segfault on a read of an
  unset region device.

Signed-off-by: zab



Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================

--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-04-04 22:30:11 UTC (rev 2110)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-04-05 00:42:52 UTC (rev 2111)
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
+#include <linux/bio.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/types.h>
@@ -97,15 +98,18 @@
 #define hbprintk(x, arg...)    
 #define hbprintk0(x)
 #else
-#define hbprintk(x, arg...)    printk("(hb:%d) " x, current->pid, ##arg)
-#define hbprintk0(x)           printk("(hb:%d) " x, current->pid)
+#define hbprintk(x, arg...)    printk("(hb:(%d) %s:%d) " x, current->pid, \
+						__FUNCTION__, __LINE__, ##arg)
+#define hbprintk0(x)           printk("(hb:(%d) %s:%d) " x, __FUNCTION__, \
+						__LINE__current->pid)
 #endif
 
 #define HB_THREAD_MS                  2000   // every 2 seconds
 
+#define HB_DEFAULT_BLOCK_BITS         9
+
 struct hb_disk_slot {
-	struct buffer_head	*ds_bh;
-	u64			ds_block;
+	hb_disk_heartbeat_block *ds_raw_block;
 	u8			ds_node_num;
 	unsigned long		ds_last_time;
 	u16			ds_equal_samples;
@@ -121,11 +125,20 @@
 	/* protected by the hr_callback_sem */
 	struct list_head	hr_active_item;
 	struct task_struct 	*hr_task;
-	u64			hr_block_bytes;
-	u64			hr_block_bits;
-	u64			hr_start_block;
-	u8			hr_blocks;
+	unsigned int		hr_fs_block_bytes;
+	unsigned int		hr_fs_block_bits;
+	unsigned long long	hr_fs_start_block;
+	unsigned int		hr_fs_blocks;
+	/* will probably need: hr_start_block, hr_blocks */
+	unsigned int		hr_blocks;
+	unsigned long long	hr_start_block;
 
+	unsigned int		hr_hb_block_bits;
+	unsigned int		hr_hb_block_bytes;
+	unsigned int		hr_hb_slots_per_page;
+
+	unsigned int		hr_num_pages;
+	struct page             **hr_slot_data;
 	struct block_device	*hr_bdev;
 	struct hb_disk_slot	*hr_slots;
 	/* a single hb-thread writer and many fill_node readers are protected */
@@ -138,17 +151,6 @@
 	atomic_t		hr_steady_iterations;
 };
 
-static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
-{
-	if (uptodate)
-		set_buffer_uptodate(bh);
-	else {
-		hbprintk("eek!  EIO!\n");
-		clear_buffer_uptodate(bh);
-	}
-	unlock_buffer(bh);
-}
-
 static int hb_do_node_down(struct nm_node *node, int idx)
 {
 	hbprintk("hb_do_node_down:  node=%u\n", node->nd_num);
@@ -163,83 +165,179 @@
 	return 0;
 }
 
-static inline void hb_submit_bh(int rw, struct buffer_head *bh)
+struct hb_bio_wait_ctxt {
+	struct completion wc_io_complete;
+};
+
+static int hb_bio_end_io(struct bio *bio,
+			 unsigned int bytes_done,
+			 int error)
 {
-#if 0
-	unsigned long long blocknr = bh->b_blocknr;
+	struct hb_bio_wait_ctxt *wc = bio->bi_private;
 
-	hbprintk("submit_bh: rw=%s, blocknr=%llu, mapped=%s\n",
-	       rw==WRITE?"write":"read", blocknr, 
-	       buffer_mapped(bh) ? "yes" : "no");
-#endif
-	submit_bh(rw, bh);
-	blk_run_address_space(bh->b_bdev->bd_inode->i_mapping);
+	if (error)
+		printk("ocfs2_heartbeat: IO Error %d\n", error);
+
+	if (bio->bi_size)
+		return 1;
+
+	complete(&wc->wc_io_complete);
+	return 0;
 }
 
-/* 
- * hr_region's block_bytes attribute defines the block size that we'll use to
- * calculate block offsets.  The blocksize of the device might change under us,
- * though.  this detects when that happens and tries to lookup the bh again
- * with the newer blocksize.  The exciting wrinkle here is that its fatal to
- * call __getblk() with the wrong block size in some 2.6 kernels.
- *
- * We only ever use a few bytes of the block so it is ok that we return a
- * smaller bh than what is implied by blocksize_bits.  We're just fixing up
- * addressing here.
- */
-static struct buffer_head *hb_getblk(struct block_device *bdev, int orig_blkno,
-				     int bits)
+static int hb_setup_read_bio(struct hb_region *reg,
+			     struct bio **read_bio,
+			     struct hb_bio_wait_ctxt *bio_wait)
 {
-	/* XXX getblk() takes an int block in 2.4 :/ */
-	int blkno;
-	int dev_bits;
-	struct buffer_head *bh = NULL;
+	int status, i, num_pages, len;
+	unsigned int vec_len;
+	unsigned int bits = reg->hr_hb_block_bits;
+	unsigned int spp = reg->hr_hb_slots_per_page;
+	struct bio *bio = NULL;
+	struct page *page;
 
-	blkno = orig_blkno;
+	num_pages = reg->hr_num_pages;
 
-	dev_bits = bdev->bd_inode->i_blkbits;
-	if (dev_bits < 0)
-		goto out;
+	bio = bio_alloc(GFP_KERNEL, num_pages);
+	if (!bio) {
+		hbprintk("Could not alloc slots BIO!\n");
+		status = -ENOMEM;
+		goto bail;
+	}
 
-	if (dev_bits < bits)
-		blkno <<= bits - dev_bits;
-	else if (dev_bits > bits)
-		blkno >>= dev_bits - bits;
+	/* Must put everything in 512 byte sectors for the bio... */
+	bio->bi_sector = reg->hr_start_block >> (bits - 9);
+	bio->bi_bdev = reg->hr_bdev;
+	bio->bi_private = bio_wait;
+	bio->bi_end_io = hb_bio_end_io;
 
-	bh = __getblk(bdev, blkno, 1 << dev_bits);
-out:
-	return bh;
+	vec_len = PAGE_CACHE_SIZE;
+	for(i = 0; i < num_pages; i++) {
+		page = reg->hr_slot_data[i];
+
+		/* last page might be short */
+		if (i == (num_pages - 1))
+			vec_len = (reg->hr_blocks % spp) << bits;
+
+		len = bio_add_page(bio, page, vec_len, 0);
+		if (len != vec_len) {
+			bio_put(bio);
+
+			hbprintk("Error adding page to bio i = %d, vec_len = "
+				 "%u, len = %d\n", i, vec_len, len);
+			status = -EIO;
+			goto bail;
+		}
+	}
+
+	*read_bio = bio;
+	status = 0;
+bail:
+	return status;
 }
 
-static struct buffer_head *hb_get_locked_mapped(struct hb_region *reg,
-						struct hb_disk_slot *slot)
+static int hb_setup_write_bio(struct hb_region *reg,
+			      struct bio **write_bio,
+			      struct hb_bio_wait_ctxt *bio_wait)
 {
-	struct buffer_head *bh = NULL;
+	int i, len, status;
+	unsigned int bits = reg->hr_hb_block_bits;
+	u64 write_block;
+	unsigned int start;
+	struct bio *bio = NULL;
+	struct page *page;
 
-	bh = slot->ds_bh;
-	if (bh) {
-		lock_buffer(bh);
-		if (buffer_mapped(bh))
-			goto out;
-		slot->ds_bh = NULL;
-		unlock_buffer(bh);
-		brelse(bh);
+	bio = bio_alloc(GFP_KERNEL, 1);
+	if (!bio) {
+		hbprintk("Could not alloc node BIO!\n");
+		status = -ENOMEM;
+		goto bail;
 	}
 
-	slot->ds_bh = hb_getblk(reg->hr_bdev, slot->ds_block,
-				reg->hr_block_bits);
-	if (slot->ds_bh) {
-		bh = slot->ds_bh;
-		lock_buffer(bh);
+	/* find the page, and then the offset within */
+	i = nm_this_node() / reg->hr_hb_slots_per_page;
+	page = reg->hr_slot_data[i];
+	start = (nm_this_node() % reg->hr_hb_slots_per_page) << bits;
+
+	write_block = reg->hr_start_block + nm_this_node();
+	bio->bi_sector = write_block >> (bits - 9);
+	bio->bi_bdev = reg->hr_bdev;
+	bio->bi_private = bio_wait;
+	bio->bi_end_io = hb_bio_end_io;
+
+	len = bio_add_page(bio, page, reg->hr_hb_block_bytes, start);
+	if (len != reg->hr_hb_block_bytes) {
+		bio_put(bio);
+
+		hbprintk("Error adding my node bio len = %d\n", len);
+		status = -EIO;
+		goto bail;
 	}
-out:
-	return bh;
+
+	*write_bio = bio;
+	status = 0;
+bail:
+
+	return status;
 }
 
+/* We want to make sure that nobody is heartbeating on top of us --
+ * this will help detect an invalid configuration. */
+static int hb_check_last_timestamp(struct hb_region *reg)
+{
+	int node_num, ret;
+	struct hb_disk_slot *slot;
+	hb_disk_heartbeat_block *hb_block;
+
+	node_num = nm_this_node();
+
+	ret = 1;
+	slot = &reg->hr_slots[node_num];
+	/* Don't check on our 1st timestamp */
+	if (slot->ds_last_time) {
+		hb_block = slot->ds_raw_block;
+
+		if (le64_to_cpu(hb_block->time) != slot->ds_last_time)
+			ret = 0;
+	}
+
+	return ret;
+}
+
+static inline void hb_set_local_node_timestamp(struct hb_region *reg)
+{
+	int node_num;
+	u64 cputime;
+	struct hb_disk_slot *slot;
+	hb_disk_heartbeat_block *hb_block;
+
+	node_num = nm_this_node();
+	slot = &reg->hr_slots[node_num];
+
+	hb_block = (hb_disk_heartbeat_block *) slot->ds_raw_block;
+	memset(hb_block, 0, reg->hr_hb_block_bytes);
+	/* TODO: time stuff */
+	cputime = OCFS_CURRENT_SECONDS;
+	if (!cputime)
+		cputime = 1;
+	hb_block->time = cpu_to_le64(cputime);
+}
+
+static void hb_wait_on_io(struct hb_region *reg,
+			  struct hb_bio_wait_ctxt *wc)
+{
+	struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
+
+	blk_run_address_space(mapping);
+
+	wait_for_completion(&wc->wc_io_complete);
+}
+
 static void hb_do_disk_heartbeat(struct hb_region *reg)
 {
+	u64 cputime;
+	int i, ret, wrote = 0;
 	struct nm_node *node;
-	struct buffer_head *bh;
 	struct hb_disk_slot *slot;
 	struct list_head *pos, *tmp;
 	hb_disk_heartbeat_block *hb_block;
@@ -248,60 +346,55 @@
 	 * lists and the slot items */
 	LIST_HEAD(newborn);
 	LIST_HEAD(deceased);
-	u64 cputime;
-	int i, rw;
 	unsigned long configured_nodes[BITS_TO_LONGS(NM_MAX_NODES)];
+	struct bio *read_bio = NULL;
+	struct bio *write_bio = NULL;
+	struct hb_bio_wait_ctxt read_wc;
+	struct hb_bio_wait_ctxt write_wc;
 
 	if (nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
 		return;
 
-	/* first we clear uptodate on other nodes slots and write our slot */
-	i = -1;
-	while((i = find_next_bit(configured_nodes, NM_MAX_NODES, i + 1)) < NM_MAX_NODES) {
-		slot = &reg->hr_slots[i];
+	init_completion(&read_wc.wc_io_complete);
+	ret = hb_setup_read_bio(reg, &read_bio, &read_wc);
+	if (ret < 0) {
+		hbprintk("ret = %d setting up read io\n", ret);
+		goto bail;
+	}
 
-		bh = hb_get_locked_mapped(reg, slot);
-		if (bh == NULL) {
-			hbprintk("getblk failed in slot %d!\n", i);
-			continue;
-		}
+	init_completion(&write_wc.wc_io_complete);
+	ret = hb_setup_write_bio(reg, &write_bio, &write_wc);
+	if (ret < 0) {
+		hbprintk("ret = %d setting up read io\n", ret);
+		goto bail;
+	}
 
-		if (slot->ds_node_num == nm_this_node()) {
-			memset(bh->b_data, 0, bh->b_size);
-			hb_block = (hb_disk_heartbeat_block *)bh->b_data;
-			BUG_ON(bh->b_size < sizeof(*hb_block));
+	/* We 1st do a sync read of all slots */
+	bio_get(read_bio);
+	submit_bio(READ, read_bio);
+	hb_wait_on_io(reg, &read_wc);
 
-			/* TODO: time stuff */
-			cputime = OCFS_CURRENT_SECONDS;
-			if (!cputime)
-				cputime = 1;
-			hb_block->time = cpu_to_le64(cputime);
+	/* With an up to date view of the slots, we can check that no
+	 * other node has been improperly configured to heartbeat in
+	 * our slot. */
+	if (!hb_check_last_timestamp(reg))
+		printk(KERN_CRIT "ocfs2_heartbeat: Another node is "
+		       "heartbeating in our slot!\n");
 
-			set_buffer_uptodate(bh);
-			clear_buffer_dirty(bh);
-			rw = WRITE;
-		} else {
-			clear_buffer_uptodate(bh);
-			rw = READ;
-		}
+	/* Set our raw timestamp */
+	hb_set_local_node_timestamp(reg);
 
-		bh->b_end_io = hb_end_buffer_io_sync;
-		hb_submit_bh(rw, bh);
-	}
+	/* And fire off the write. Note that we don't wait on this I/O
+	 * until later. */
+	bio_get(write_bio);
+	submit_bio(WRITE, write_bio);
+	wrote = 1;
 
-	/* now we read again and see what other nodes have done */
 	i = -1;
 	while((i = find_next_bit(configured_nodes, NM_MAX_NODES, i + 1)) < NM_MAX_NODES) {
 		slot = &reg->hr_slots[i];
+		hb_block = slot->ds_raw_block;
 
-		bh = slot->ds_bh;
-		if (bh == NULL) {
-			hbprintk("no bh in slot %d!\n", i);
-			continue;
-		}
-		wait_on_buffer(bh);
-		hb_block = (hb_disk_heartbeat_block *)bh->b_data;
-
 		/* we don't care if these wrap.. the state transitions below
 		 * clear at the right places */
 		cputime = le64_to_cpu(hb_block->time);
@@ -318,7 +411,7 @@
 			list_add_tail(&slot->ds_live_item, &newborn);
 			slot->ds_equal_samples = 0;
 			continue;
-		} 
+		}
 
 		/* live nodes only go dead after enough consequtive missed
 		 * samples..  reset the missed counter whenever we see 
@@ -377,6 +470,15 @@
 		hb_do_node_down(node, slot->ds_node_num);
 		nm_node_put(node);
 	}
+
+bail:
+	if (wrote) /* Make sure the write hits disk before we return. */
+		hb_wait_on_io(reg, &write_wc);
+
+	if (read_bio)
+		bio_put(read_bio);
+	if (write_bio)
+		bio_put(write_bio);
 }
 
 /*
@@ -389,7 +491,7 @@
 	struct hb_region *reg = data;
 
 	hbprintk("hb thread running\n");
-	
+
 	while (!kthread_should_stop()) {
 		hb_do_disk_heartbeat(reg);
 		set_current_state(TASK_UNINTERRUPTIBLE);
@@ -453,56 +555,112 @@
  * attributes might have built up. */
 static void hb_region_release(struct config_item *item)
 {
+	int i;
+	struct page *page;
 	struct hb_region *reg = to_hb_region(item);
-	struct hb_disk_slot *slot;
-	int i;
 
-
-	/* with the thread down these bhs should be update/idle */
-	for(i = 0; reg->hr_slots && i < reg->hr_blocks; i++) {
-		slot = &reg->hr_slots[i];
-		if (slot->ds_bh)
-			brelse(slot->ds_bh);
+	if (reg->hr_slot_data) {
+		for (i = 0; i < reg->hr_num_pages; i++) {
+			page = reg->hr_slot_data[i];
+			if (page)
+				__free_page(page);
+		}
+		kfree(reg->hr_slot_data);
 	}
 
 	if (reg->hr_bdev)
 		blkdev_put(reg->hr_bdev);
 
-	kfree(reg->hr_slots); /* might be null if never activated */
+	if (reg->hr_slots)
+		kfree(reg->hr_slots);
 	kfree(reg);
 }
 
-static ssize_t hb_region_block_bytes_read(struct hb_region *reg, char *page)
+static void hb_init_region_params(struct hb_region *reg,
+				  unsigned int block_bits)
 {
-	return sprintf(page, "%lld\n", (long long)reg->hr_block_bytes);
+	reg->hr_hb_block_bits = block_bits;
+	reg->hr_hb_block_bytes = 1 << block_bits;
+	reg->hr_hb_slots_per_page = PAGE_CACHE_SIZE >> block_bits;
 }
 
-static ssize_t hb_region_block_bytes_write(struct hb_region *reg,
-					   const char *page,
-					   size_t count)
+static int hb_read_block_input(struct hb_region *reg,
+			       const char *page,
+			       size_t count,
+			       unsigned long *ret_bytes,
+			       unsigned int *ret_bits)
 {
-	unsigned long long tmp;
+	unsigned long bytes;
 	char *p = (char *)page;
 
-	tmp = simple_strtoull(p, &p, 0);
+	bytes = simple_strtoul(p, &p, 0);
 	if (!p || (*p && (*p != '\n')))
 		return -EINVAL;
 
-	/* XXX probably very stupid. */
-	if (tmp >= 65536) /* just so we can use hweight16 */
+	/* Heartbeat and fs min / max block sizes are the same. */
+	if (bytes > 4096 || bytes < 512)
 		return -ERANGE;
-	if (hweight16(tmp) != 1)
+	if (hweight16(bytes) != 1)
 		return -EINVAL;
 
+	if (ret_bytes)
+		*ret_bytes = bytes;
+	if (ret_bits)
+		*ret_bits = ffs(bytes) - 1;
+
+	return 0;
+}
+
+static ssize_t hb_region_slot_bytes_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%u\n", reg->hr_hb_block_bytes);
+}
+
+static ssize_t hb_region_slot_bytes_write(struct hb_region *reg,
+					  const char *page,
+					  size_t count)
+{
+	int status;
+	unsigned int slot_bits;
+
+	status = hb_read_block_input(reg, page, count, NULL, &slot_bits);
+	if (status)
+		return status;
+
+	hb_init_region_params(reg, slot_bits);
+
+	hbprintk("Heartbeat region using %u sized slots\n", 1 << slot_bits);
+
+	return count;
+}
+
+static ssize_t hb_region_block_bytes_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%u\n", reg->hr_fs_block_bytes);
+}
+
+static ssize_t hb_region_block_bytes_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
+{
+	int status;
+	unsigned long fs_bytes;
+	unsigned int fs_bits;
+
+	status = hb_read_block_input(reg, page, count, &fs_bytes, &fs_bits);
+	if (status)
+		return status;
+
 	/* XXX compare blocks against dev later on commit? */
-	reg->hr_block_bytes = tmp;
-	reg->hr_block_bits = ffs(reg->hr_block_bytes) - 1;
+	reg->hr_fs_block_bytes = (unsigned int) fs_bytes;
+	reg->hr_fs_block_bits = fs_bits;
 
 	return count;
 }
+
 static ssize_t hb_region_start_block_read(struct hb_region *reg, char *page)
 {
-	return sprintf(page, "%lld\n", (long long)reg->hr_start_block);
+	return sprintf(page, "%llu\n", reg->hr_fs_start_block);
 }
 
 static ssize_t hb_region_start_block_write(struct hb_region *reg,
@@ -517,44 +675,130 @@
 		return -EINVAL;
 
 	/* XXX compare blocks against dev later on commit? */
-	reg->hr_start_block = tmp;
+	reg->hr_fs_start_block = tmp;
 
 	return count;
 }
 static ssize_t hb_region_blocks_read(struct hb_region *reg, char *page)
 {
-	return sprintf(page, "%d\n", reg->hr_blocks);
+	return sprintf(page, "%d\n", reg->hr_fs_blocks);
 }
 
 static ssize_t hb_region_blocks_write(struct hb_region *reg,
 					   const char *page,
 					   size_t count)
 {
-	unsigned long long tmp;
+	unsigned long tmp;
 	char *p = (char *)page;
 
-	tmp = simple_strtoull(p, &p, 0);
+	tmp = simple_strtoul(p, &p, 0);
 	if (!p || (*p && (*p != '\n')))
 		return -EINVAL;
 
 	if (tmp >= NM_MAX_NODES)
 		return -ERANGE;
 
-	reg->hr_slots = kcalloc(tmp, sizeof(struct hb_disk_slot), GFP_KERNEL);
-	if (reg->hr_slots == NULL)
-		return -ENOMEM;
+	reg->hr_fs_blocks = (unsigned int) tmp;
 
-	reg->hr_blocks = tmp;
-
 	return count;
 }
 
 static ssize_t hb_region_dev_read(struct hb_region *reg, char *page)
 {
-	const char *str = bdevname(reg->hr_bdev, page);
-	return sprintf(page, "%s\n", str);
+	unsigned int ret = 0;
+	const char *str;
+
+	if (reg->hr_bdev) {
+		str = bdevname(reg->hr_bdev, page);
+		ret = sprintf(page, "%s\n", str);
+	}
+	return ret;
 }
 
+/* expects all hr_fs_ members to be filled, as well as hr->hr_bdev */
+static void hb_calculate_region_offsets(struct hb_region *reg)
+{
+	unsigned long long tmp_bytes;
+
+	/* for the # of hearbeat blocks, don't use more than the
+	 * number of fs blocks allocated to the file system. We
+	 * already know that hb block size can't be bigger than fs
+	 * block size... */
+	reg->hr_blocks = reg->hr_fs_blocks;
+
+	tmp_bytes = reg->hr_fs_start_block << reg->hr_fs_block_bits;
+	reg->hr_start_block = tmp_bytes >> reg->hr_hb_block_bits;
+
+	hbprintk("fs_block_bytes = %u, fs_block_bits = %u\n",
+		 reg->hr_fs_block_bytes, reg->hr_fs_block_bits);
+	hbprintk("fs_start_block = %llu\n", reg->hr_fs_start_block);
+	hbprintk("hr_start_block = %llu, hr_blocks = %u\n",
+		 reg->hr_start_block, reg->hr_blocks);
+	hbprintk("hb block bytes = %u, hb block bits = %u\n",
+		 reg->hr_hb_block_bytes, reg->hr_hb_block_bits);
+}
+
+static int hb_map_slot_data(struct hb_region *reg)
+{
+	int i, j;
+	unsigned int last_slot;
+	unsigned int spp = reg->hr_hb_slots_per_page;
+	struct page *page;
+	char *raw;
+	struct hb_disk_slot *slot;
+
+	reg->hr_slots = kcalloc(reg->hr_blocks,
+				sizeof(struct hb_disk_slot), GFP_KERNEL);
+	if (reg->hr_slots == NULL) {
+		hbprintk("%d\n", -ENOMEM);
+		return -ENOMEM;
+	}
+
+	for(i = 0; i < reg->hr_blocks; i++) {
+		slot = &reg->hr_slots[i];
+		slot->ds_node_num = i;
+		INIT_LIST_HEAD(&slot->ds_live_item);
+		slot->ds_raw_block = NULL;
+	}
+
+	reg->hr_num_pages = (reg->hr_blocks + spp - 1) / spp;
+	hbprintk("Going to require %u pages to cover %u blocks at %u "
+		 "blocks per page\n", reg->hr_num_pages, reg->hr_blocks,
+		 spp);
+
+	reg->hr_slot_data = kcalloc(reg->hr_num_pages, sizeof(struct page *),
+				    GFP_KERNEL);
+	if (!reg->hr_slot_data) {
+		hbprintk("%d\n", -ENOMEM);
+		return -ENOMEM;
+	}
+
+	for(i = 0; i < reg->hr_num_pages; i++) {
+		page = alloc_page(GFP_KERNEL);
+		if (!page) {
+			hbprintk("%d\n", -ENOMEM);
+			return -ENOMEM;
+		}
+
+		reg->hr_slot_data[i] = page;
+
+		last_slot = i * spp;
+		raw = page_address(page);
+		for (j = 0;
+		     (j < spp) && ((j + last_slot) < reg->hr_blocks);
+		     j++) {
+			BUG_ON((j + last_slot) >= reg->hr_blocks);
+
+			slot = &reg->hr_slots[j + last_slot];
+			slot->ds_raw_block = (hb_disk_heartbeat_block *) raw;
+
+			raw += reg->hr_hb_block_bytes;
+		}
+	}
+
+	return 0;
+}
+
 /* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */
 static ssize_t hb_region_dev_write(struct hb_region *reg, const char *page,
 				   size_t count)
@@ -564,8 +808,6 @@
 	struct file *filp = NULL;
 	struct inode *inode = NULL;
 	ssize_t ret = -EINVAL;
-	struct hb_disk_slot *slot;
-	int i;
 
 	fd = simple_strtol(p, &p, 0);
 	if (!p || (*p && (*p != '\n')))
@@ -578,9 +820,12 @@
 	if (filp == NULL)
 		goto out;
 
-	if (reg->hr_blocks == 0 || reg->hr_slots == NULL)
+	if (reg->hr_fs_blocks == 0 || reg->hr_fs_start_block == 0)
 		goto out;
 
+	if (reg->hr_hb_block_bits > reg->hr_fs_block_bits)
+		goto out;
+
 	inode = igrab(filp->f_mapping->host);
 	if (inode == NULL)
 		goto out;
@@ -593,11 +838,12 @@
 	}
 	inode = NULL;
 
-	for(i = 0; i < reg->hr_blocks; i++) {
-		slot = &reg->hr_slots[i];
-		slot->ds_block = reg->hr_start_block + i;
-		slot->ds_node_num = i;
-		INIT_LIST_HEAD(&slot->ds_live_item);
+	hb_calculate_region_offsets(reg);
+
+	ret = hb_map_slot_data(reg);
+	if (ret) {
+		hbprintk("Error %ld initializing slot data\n", (long) ret);
+		goto out;
 	}
 
 	atomic_set(&reg->hr_steady_iterations, 2);
@@ -637,6 +883,11 @@
 	ssize_t (*store)(struct hb_region *, const char *, size_t);
 };
 
+static struct hb_region_attribute hb_region_attr_slot_size = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "slot_bytes", .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_slot_bytes_read,
+	.store	= hb_region_slot_bytes_write,
+};
 static struct hb_region_attribute hb_region_attr_block_bytes = {
 	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "block_bytes", .ca_mode = S_IRUGO | S_IWUSR },
 	.show	= hb_region_block_bytes_read,
@@ -659,6 +910,7 @@
 };
 
 static struct configfs_attribute *hb_region_attrs[] = {
+	&hb_region_attr_slot_size.attr,
 	&hb_region_attr_block_bytes.attr,
 	&hb_region_attr_start_block.attr,
 	&hb_region_attr_blocks.attr,
@@ -733,6 +985,7 @@
 	INIT_LIST_HEAD(&reg->hr_active_item);
 	rwlock_init(&reg->hr_slot_list_lock);
 	INIT_LIST_HEAD(&reg->hr_live_list);
+	hb_init_region_params(reg, HB_DEFAULT_BLOCK_BITS);
 
 	config_item_init_type_name(&reg->hr_item, name, &hb_region_type);