[Ocfs2-devel] [RFC 1/3] ocfs2: allocation reservations

Mark Fasheh mfasheh at suse.com
Tue Dec 15 14:58:16 PST 2009


This patch improves Ocfs2 allocation policy by allowing an inode to
reserve a portion of the local alloc bitmap for itself. Allocation windows
are advisory in that they won't block use of that portion of the bitmap.
This makes dealing with corner cases much easier - we can always fall back
to previous policy.

Reservation windows are represented internally by a red-black tree. Within
that tree, each node represents the reservation window of one inode. When
new data is written, we try to allocate from the window first. If that
allocation fails, we fall back to our old heuristics and a new window is
computed from the results. Allocation windows will also be extended if
allocation from them succeeds.

Signed-off-by: Mark Fasheh <mfasheh at suse.com>
---
 Documentation/filesystems/ocfs2.txt |    3 +
 fs/ocfs2/Makefile                   |    1 +
 fs/ocfs2/cluster/masklog.c          |    1 +
 fs/ocfs2/cluster/masklog.h          |    1 +
 fs/ocfs2/localalloc.c               |   39 ++-
 fs/ocfs2/ocfs2.h                    |    5 +
 fs/ocfs2/reservations.c             |  624 +++++++++++++++++++++++++++++++++++
 fs/ocfs2/reservations.h             |  149 +++++++++
 fs/ocfs2/suballoc.h                 |    2 +
 fs/ocfs2/super.c                    |   26 ++
 10 files changed, 845 insertions(+), 6 deletions(-)
 create mode 100644 fs/ocfs2/reservations.c
 create mode 100644 fs/ocfs2/reservations.h

diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index c58b9f5..0e94600 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -80,3 +80,6 @@ user_xattr	(*)	Enables Extended User Attributes.
 nouser_xattr		Disables Extended User Attributes.
 acl			Enables POSIX Access Control Lists support.
 noacl		(*)	Disables POSIX Access Control Lists support.
+resv_level=3	(*)	Set how agressive allocation reservations will be.
+			Valid values are between 0 (reservations off) to 6
+			(maximum space for reservations).
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 31f25ce..c8eee00 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -29,6 +29,7 @@ ocfs2-objs := \
 	mmap.o 			\
 	namei.o 		\
 	refcounttree.o		\
+	reservations.o		\
 	resize.o		\
 	slot_map.o 		\
 	suballoc.o 		\
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 1cd2934..a56147a 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -115,6 +115,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
 	define_mask(ERROR),
 	define_mask(NOTICE),
 	define_mask(KTHREAD),
+	define_mask(RESERVATIONS),
 };
 
 static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 9b4d117..af1a610 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -118,6 +118,7 @@
 #define ML_ERROR	0x0000000100000000ULL /* sent to KERN_ERR */
 #define ML_NOTICE	0x0000000200000000ULL /* setn to KERN_NOTICE */
 #define ML_KTHREAD	0x0000000400000000ULL /* kernel thread activity */
+#define	ML_RESERVATIONS	0x0000000800000000ULL /* ocfs2 alloc reservations */
 
 #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
 #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ac10f83..2042680 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
 
 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 					     struct ocfs2_dinode *alloc,
-					     u32 numbits);
+					     u32 numbits,
+					     struct ocfs2_alloc_reservation *resv);
 
 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
 
@@ -262,6 +263,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 
 	osb->local_alloc_state = OCFS2_LA_DISABLED;
 
+	ocfs2_resmap_uninit(&osb->osb_la_resmap);
+
 	main_bm_inode = ocfs2_get_system_file_inode(osb,
 						    GLOBAL_BITMAP_SYSTEM_INODE,
 						    OCFS2_INVALID_SLOT);
@@ -498,7 +501,7 @@ static int ocfs2_local_alloc_in_range(struct inode *inode,
 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 	la = OCFS2_LOCAL_ALLOC(alloc);
 
-	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
+	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted, NULL);
 	if (start == -1) {
 		mlog_errno(-ENOSPC);
 		return 0;
@@ -664,7 +667,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 	la = OCFS2_LOCAL_ALLOC(alloc);
 
-	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
+	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted,
+						  ac->ac_resv);
 	if (start == -1) {
 		/* TODO: Shouldn't we just BUG here? */
 		status = -ENOSPC;
@@ -687,6 +691,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 		goto bail;
 	}
 
+	ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
+				  bits_wanted);
+
 	while(bits_wanted--)
 		ocfs2_set_bit(start++, bitmap);
 
@@ -722,11 +729,13 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
 }
 
 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
-					     struct ocfs2_dinode *alloc,
-					     u32 numbits)
+				     struct ocfs2_dinode *alloc,
+				     u32 numbits,
+				     struct ocfs2_alloc_reservation *resv)
 {
 	int numfound, bitoff, left, startoff, lastzero;
 	void *bitmap = NULL;
+	struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
 
 	mlog_entry("(numbits wanted = %u)\n", numbits);
 
@@ -738,6 +747,20 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 
 	bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
 
+	/*
+	 * Ask the reservations code first whether this request can be
+	 * easily fulfilled. No errors here are fatal - if we didn't
+	 * find the number of bits needed, we'll just take the slow
+	 * path.
+	 */
+	if (ocfs2_resmap_resv_bits(resmap, resv, bitmap, &bitoff, &numfound)
+	    == 0) {
+		if (numfound >= numbits) {
+			numfound = numbits;
+			goto bail;
+		}
+	}
+
 	numfound = bitoff = startoff = 0;
 	lastzero = -1;
 	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
@@ -772,8 +795,10 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 
 	if (numfound == numbits)
 		bitoff = startoff - numfound;
-	else
+	else {
+		numfound = 0;
 		bitoff = -1;
+	}
 
 bail:
 	mlog_exit(bitoff);
@@ -1096,6 +1121,8 @@ retry_enospc:
 	memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
 	       le16_to_cpu(la->la_size));
 
+	ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count);
+
 	mlog(0, "New window allocated:\n");
 	mlog(0, "window la_bm_off = %u\n",
 	     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d963d86..5a6300f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -46,6 +46,7 @@
 /* For struct ocfs2_blockcheck_stats */
 #include "blockcheck.h"
 
+#include "reservations.h"
 
 /* Caching of metadata buffers */
 
@@ -340,6 +341,10 @@ struct ocfs2_super
 
 	u64 la_last_gd;
 
+	struct ocfs2_reservation_map	osb_la_resmap;
+
+	unsigned int	osb_resv_level;
+
 	/* Next three fields are for local node slot recovery during
 	 * mount. */
 	int dirty;
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
new file mode 100644
index 0000000..8879e28
--- /dev/null
+++ b/fs/ocfs2/reservations.c
@@ -0,0 +1,624 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * reservations.c
+ *
+ * Allocation reservations implementation
+ *
+ * Some code borrowed from fs/ext3/balloc.c
+ *
+ * Copyright (C) 2009 Novell.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/bitops.h>
+
+#define MLOG_MASK_PREFIX ML_RESERVATIONS
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+
+#ifdef CONFIG_OCFS2_DEBUG_FS
+#define OCFS2_CHECK_RESERVATIONS
+#endif
+
+#define OCFS2_CHECK_RESERVATIONS
+
+
+DEFINE_SPINLOCK(resv_lock);
+
+static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
+{
+	if (resv->r_len)
+		return resv->r_start + resv->r_len - 1;
+	return resv->r_start;
+}
+
+static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
+{
+	return !!(resv->r_len == 0);
+}
+
+static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
+{
+	if (resmap->m_osb->osb_resv_level == 0)
+		return 1;
+	return 0;
+}
+
+static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
+{
+	struct ocfs2_super *osb = resmap->m_osb;
+	struct rb_node *node;
+	struct ocfs2_alloc_reservation *resv;
+	int i = 0;
+
+	mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
+	     osb->dev_str, resmap->m_bitmap_len);
+
+	node = rb_first(&resmap->m_reservations);
+	while (node) {
+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
+
+		mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
+		     "\tlast_len: %u\tallocated: %u\n", resv->r_start,
+		     ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
+		     resv->r_last_len, resv->r_allocated);
+
+		node = rb_next(node);
+		i++;
+	}
+
+	mlog(ML_NOTICE, "%d reservations found\n", i);
+}
+
+#ifdef OCFS2_CHECK_RESERVATIONS
+static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
+{
+	unsigned int off = 0;
+	int i = 0;
+	struct rb_node *node;
+	struct ocfs2_alloc_reservation *resv;
+
+	node = rb_first(&resmap->m_reservations);
+	while (node) {
+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
+
+		if (i > 0 && resv->r_start <= off) {
+			mlog(ML_ERROR, "reservation %d has bad start off!\n",
+			     i);
+			goto bad;
+		}
+
+		if (resv->r_len == 0) {
+			mlog(ML_ERROR, "reservation %d has no length!\n",
+			     i);
+			goto bad;
+		}
+
+		if (resv->r_start > ocfs2_resv_end(resv)) {
+			mlog(ML_ERROR, "reservation %d has invalid range!\n",
+			     i);
+			goto bad;
+		}
+
+		if (ocfs2_resv_end(resv) > resmap->m_bitmap_len) {
+			mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
+			     i);
+			goto bad;
+		}
+
+		off = ocfs2_resv_end(resv);
+		node = rb_next(node);
+
+		i++;
+	}
+	return;
+
+bad:
+	ocfs2_dump_resv(resmap);
+	BUG();
+}
+#else
+static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
+{
+
+}
+#endif
+
+void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
+{
+	memset(resv, 0, sizeof(*resv));
+}
+
+int ocfs2_resmap_init(struct ocfs2_super *osb,
+		      struct ocfs2_reservation_map *resmap)
+{
+	memset(resmap, 0, sizeof(*resmap));
+
+	resmap->m_osb = osb;
+	resmap->m_reservations = RB_ROOT;
+	/* m_bitmap_len is initialized to zero by the above memset. */
+
+	return 0;
+}
+
+static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
+{
+	resv->r_len = 0;
+	resv->r_allocated = 0;
+}
+
+static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
+				 struct ocfs2_alloc_reservation *resv)
+{
+		__ocfs2_resv_trunc(resv);
+		rb_erase(&resv->r_node, &resmap->m_reservations);
+}
+
+/* does nothing if 'resv' is null */
+void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
+			struct ocfs2_alloc_reservation *resv)
+{
+	if (resv) {
+		spin_lock(&resv_lock);
+		__ocfs2_resv_discard(resmap, resv);
+		spin_unlock(&resv_lock);
+	}
+}
+
+static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
+{
+	struct rb_node *node;
+	struct ocfs2_alloc_reservation *resv;
+
+	assert_spin_locked(&resv_lock);
+
+	while ((node = rb_last(&resmap->m_reservations)) != NULL) {
+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
+
+		__ocfs2_resv_trunc(resv);
+		rb_erase(&resv->r_node, &resmap->m_reservations);
+	}
+}
+
+/* If any parameters have changed, this function will call
+ * ocfs2_resv_trunc against all existing reservations. */
+void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
+			  unsigned int clen)
+{
+	if (ocfs2_resmap_disabled(resmap))
+		return;
+
+	spin_lock(&resv_lock);
+
+	ocfs2_resmap_clear_all_resv(resmap);
+	resmap->m_bitmap_len = clen;
+
+	spin_unlock(&resv_lock);
+}
+
+void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
+{
+	/* Does nothing for now. Keep this around for API symmetry */
+}
+
+unsigned int ocfs2_resv_target_len(unsigned int last_len)
+{
+	if (last_len < 8)
+		return 8;
+
+	last_len += 4;
+
+	if (last_len > 32)
+		return 32;
+
+	return last_len;
+}
+
+static int ocfs2_try_to_extend_resv(struct ocfs2_reservation_map *resmap,
+				    struct ocfs2_alloc_reservation *my_resv)
+{
+	unsigned int available, avail_end;
+	struct rb_node *next, *node = &my_resv->r_node;
+	struct ocfs2_alloc_reservation *next_resv;
+
+	next = rb_next(node);
+
+	if (next) {
+		next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
+				     r_node);
+		avail_end = next_resv->r_start;
+	} else {
+		avail_end = resmap->m_bitmap_len - 1;
+	}
+
+	if (ocfs2_resv_end(my_resv) == avail_end)
+		return -ENOENT;
+
+	available = avail_end - ocfs2_resv_end(my_resv) - 1;
+
+	if (available > 4)
+		available = 4;
+
+	my_resv->r_len += available;
+
+	ocfs2_check_resmap(resmap);
+
+	return 0;
+}
+
+static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
+			      struct ocfs2_alloc_reservation *new)
+{
+	struct rb_root *root = &resmap->m_reservations;
+	struct rb_node *parent = NULL;
+	struct rb_node **p = &root->rb_node;
+	struct ocfs2_alloc_reservation *tmp;
+
+	mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
+	     new->r_len);
+
+	while(*p) {
+		parent = *p;
+
+		tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
+
+		if (new->r_start < tmp->r_start)
+			p = &(*p)->rb_left;
+		else if (new->r_start > ocfs2_resv_end(tmp))
+			p = &(*p)->rb_right;
+		else {
+			/* This should never happen! */
+			mlog(ML_ERROR, "Duplicate reservation window!\n");
+			BUG();
+		}
+	}
+
+	rb_link_node(&new->r_node, parent, p);
+	rb_insert_color(&new->r_node, root);
+
+	ocfs2_check_resmap(resmap);
+}
+
+/**
+ * ocfs2_find_resv() - find the window which contains goal
+ * @resmap: reservation map to search
+ * @goal: which bit to search for
+ *
+ * If a window containing that goal is not found, we return the window
+ * which comes before goal. Returns NULL on empty rbtree.
+ */
+static struct ocfs2_alloc_reservation *
+ocfs2_find_resv(struct ocfs2_reservation_map *resmap, unsigned int goal)
+{
+	struct ocfs2_alloc_reservation *resv;
+	struct rb_node *n = resmap->m_reservations.rb_node;
+
+	assert_spin_locked(&resv_lock);
+
+	if (!n)
+		return NULL;
+
+	do {
+		resv = rb_entry(n, struct ocfs2_alloc_reservation, r_node);
+
+		if (goal < resv->r_start)
+			n = n->rb_left;
+		else if (goal > ocfs2_resv_end(resv))
+			n = n->rb_right;
+		else
+			return resv;
+	} while (n);
+
+	/*
+	 * The goal sits on one end of the tree. If it's the leftmost
+	 * end, we return NULL.
+	 */
+	if (resv->r_start > goal)
+		return NULL;
+
+	return resv;
+}
+
+static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
+				   struct ocfs2_alloc_reservation *resv)
+{
+	struct rb_root *root = &resmap->m_reservations;
+	unsigned int last_start = resv->r_last_start;
+	unsigned int last_end = last_start + resv->r_last_len - 1;
+	unsigned int goal;
+	unsigned int len = ocfs2_resv_target_len(resv->r_last_len);
+	unsigned int gap_start, gap_end, gap_len;
+	struct ocfs2_alloc_reservation *prev_resv, *next_resv;
+	struct rb_node *prev, *next;
+
+	/* Math above doesn't work otherwise... */
+	BUG_ON(resv->r_last_len == 0);
+
+	goal = last_end + 1;
+	if (goal >= resmap->m_bitmap_len)
+		goal = 0;
+
+	/*
+	 * Nasty cases to consider:
+	 *
+	 * - rbtree is empty
+	 * - our window should be first in all reservations
+	 * - our window should be last in all reservations
+	 * - need to make sure we don't go past end of bitmap
+	 */
+
+	assert_spin_locked(&resv_lock);
+
+	if (RB_EMPTY_ROOT(root)) {
+		/*
+		 * Easiest case - empty tree. We can just take
+		 * whatever window we want.
+		 */
+
+		mlog(0, "Empty root\n");
+
+		resv->r_start = goal;
+		resv->r_len = len;
+		if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len)
+			resv->r_len = resmap->m_bitmap_len - resv->r_start;
+
+		ocfs2_resv_insert(resmap, resv);
+		return;
+	}
+
+	prev_resv = ocfs2_find_resv(resmap, goal);
+
+	if (prev_resv == NULL) {
+		mlog(0, "Farthest left window\n");
+
+		/* Ok, we're the farthest left window. */
+		next = rb_first(root);
+		next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
+				     r_node);
+
+		/*
+		 * Try to allocate at far left of tree. If that
+		 * doesn't fit, we just start our linear search from
+		 * next_resv
+		 */
+		if (next_resv->r_start > (goal + len - 1)) {
+			resv->r_start = goal;
+			resv->r_len = len;
+
+			ocfs2_resv_insert(resmap, resv);
+			return;
+		}
+
+		prev_resv = next_resv;
+		next_resv = NULL;
+	}
+
+	prev = &prev_resv->r_node;
+
+	/* Now we do a linear search for a window, starting at 'prev_rsv' */
+	while (1) {
+		next = rb_next(prev);
+		if (next) {
+			mlog(0, "One more resv found in linear search\n");
+			next_resv = rb_entry(next,
+					     struct ocfs2_alloc_reservation,
+					     r_node);
+
+			gap_start = ocfs2_resv_end(prev_resv) + 1;
+			gap_end = next_resv->r_start - 1;
+			gap_len = gap_end - gap_start + 1;
+		} else {
+			mlog(0, "No next node\n");
+			/*
+			 * We're at the rightmost edge of the
+			 * tree. See if a reservation between this
+			 * window and the end of the bitmap will work.
+			 */
+			gap_start = ocfs2_resv_end(prev_resv) + 1;
+			gap_end = resmap->m_bitmap_len - 1;
+			gap_len = gap_end - gap_start + 1;
+		}
+
+		if (gap_start <= gap_end
+		    && gap_start >= goal
+		    && gap_len >= len) {
+			resv->r_start = gap_start;
+			resv->r_len = len;
+
+			ocfs2_resv_insert(resmap, resv);
+			return;
+		}
+
+		if (!next)
+			break;
+
+		prev = next;
+		prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
+				     r_node);
+	}
+}
+
+void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
+			       struct ocfs2_alloc_reservation *resv,
+			       u32 cstart, u32 clen)
+{
+	unsigned int cend = cstart + clen - 1;
+
+	if (resmap == NULL || ocfs2_resmap_disabled(resmap))
+		return;
+
+	if (resv == NULL)
+		return;
+
+	spin_lock(&resv_lock);
+
+	mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
+	     "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
+	     cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
+	     resv->r_len, resv->r_last_start, resv->r_last_len);
+
+	resv->r_last_len = clen;
+	resv->r_last_start = cstart;
+
+	if (ocfs2_resv_empty(resv)) {
+		mlog(0, "Empty reservation, find a new window.\n");
+		/*
+		 * Allocation occured without a window. We find an
+		 * initial reservation for this inode, based on what
+		 * was allocated already.
+		 */
+		ocfs2_resv_find_window(resmap, resv);
+		goto out_unlock;
+	}
+
+	/*
+	 * Did the allocation occur completely outside our
+	 * reservation? Clear it then. Otherwise, try to extend our
+	 * reservation or alloc a new one, if we've used all the bits.
+	 */
+	if (cend < resv->r_start ||
+	    cstart > ocfs2_resv_end(resv)) {
+		mlog(0, "Allocated outside reservation\n");
+
+		/* Truncate and remove reservation */
+		__ocfs2_resv_discard(resmap, resv);
+
+		if (cend < resv->r_start) {
+			/*
+			 * The window wasn't used for some reason. We
+			 * should start our search *past* it to give a
+			 * better chance the next window will be
+			 * used. Best way to do this right now is to
+			 * fool the search code...
+			 */
+			resv->r_last_start = ocfs2_resv_end(resv) + 1;
+			resv->r_last_len = 1;
+		}
+
+		ocfs2_resv_find_window(resmap, resv);
+		goto out_unlock;
+	}
+
+	/*
+	 * We allocated at least partially from our
+	 * reservation. Adjust it and try to extend. Otherwise, we
+	 * search for a new window.
+	 */
+
+	resv->r_allocated += clen;
+
+	if (cend < ocfs2_resv_end(resv)) {
+		u32 old_end;
+
+		mlog(0, "Allocation left at end\n");
+
+		/*
+		 * Partial allocation, leaving some bits free at
+		 * end. We move over the start of the window to take
+		 * this into account and try to extend it.
+		 */
+		old_end = ocfs2_resv_end(resv);
+		resv->r_start = cend + 1; /* Start just past last allocation */
+		resv->r_len = old_end - resv->r_start + 1;
+
+		if (ocfs2_try_to_extend_resv(resmap, resv) == 0)
+			goto out_unlock;
+	}
+
+	mlog(0, "discard reservation\n");
+
+	/*
+	 * No free bits at end or extend failed above. Truncate and
+	 * re-search for a new window.
+	 */
+
+	__ocfs2_resv_discard(resmap, resv);
+
+	ocfs2_resv_find_window(resmap, resv);
+
+out_unlock:
+	mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
+	     "r_len: %u r_last_start: %u r_last_len: %u\n",
+	     resv->r_start, ocfs2_resv_end(resv), resv->r_len,
+	     resv->r_last_start, resv->r_last_len);
+
+	spin_unlock(&resv_lock);
+}
+
+int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
+			   struct ocfs2_alloc_reservation *resv,
+			   char *disk_bitmap, int *cstart, int *clen)
+{
+	int ret = -ENOSPC;
+	unsigned int start, len, best_start = 0, best_len = 0;
+
+	if (resv == NULL || ocfs2_resmap_disabled(resmap))
+		return -ENOSPC;
+
+	spin_lock(&resv_lock);
+
+	if (ocfs2_resv_empty(resv)) {
+		/*
+		 * If resv is empty, we return zero bytes and allow
+		 * ocfs2_resmap_claimed_bits() to start our new reservation.
+		 */
+		*cstart = *clen = 0;
+		ret = 0;
+		goto out;
+	}
+
+	start = resv->r_start;
+	len = 0;
+
+	while (start <= ocfs2_resv_end(resv)) {
+		if (ocfs2_test_bit(start, disk_bitmap)) {
+			mlog(0,
+			     "Reservation was taken at bit %d\n",
+			     start + len);
+			best_len = 0;
+			goto next;
+		}
+
+		/* This is basic, but since the local alloc is
+		 * used very predictably, I think we're ok. */
+		if (!best_len) {
+			best_start = start;
+			best_len = 1;
+		} else {
+			best_len++;
+		}
+
+next:
+		start++;
+	}
+
+	if (best_len) {
+		ret = 0;
+		*cstart = best_start;
+		*clen = best_len;
+	}
+out:
+	spin_unlock(&resv_lock);
+
+	return ret;
+}
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
new file mode 100644
index 0000000..1e170ae
--- /dev/null
+++ b/fs/ocfs2/reservations.h
@@ -0,0 +1,149 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * reservations.h
+ *
+ * Allocation reservations function prototypes and structures.
+ *
+ * Copyright (C) 2009 Novell.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef	OCFS2_RESERVATIONS_H
+#define	OCFS2_RESERVATIONS_H
+
+#include <linux/rbtree.h>
+
+struct ocfs2_bitmap_resv_ops;
+
+#define OCFS2_DEFAULT_RESV_LEVEL	3
+#define OCFS2_MAX_RESV_LEVEL	7
+#define OCFS2_MIN_RESV_LEVEL	0
+
+struct ocfs2_alloc_reservation {
+	struct rb_node	r_node;
+
+	u32	r_start;
+	u32	r_len;
+
+	u32	r_last_len;
+	u32	r_last_start;
+
+	u32	r_allocated;
+};
+
+struct ocfs2_reservation_map {
+	struct rb_root		m_reservations;
+
+	struct ocfs2_super	*m_osb;
+
+	/* The following are not initialized to meaningful values until a disk
+	 * bitmap is provided. */
+	u32			m_bitmap_len;	/* Number of valid
+						 * bits available */
+};
+
+void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
+
+/**
+ * ocfs2_resv_discard() - truncate a reservation
+ * @resmap:
+ * @resv: the reservation to truncate.
+ *
+ * After this function is called, the reservation will be empty, and
+ * unlinked from the rbtree.
+ */
+void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
+			struct ocfs2_alloc_reservation *resv);
+
+
+/**
+ * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
+ * @resmap: struct ocfs2_reservation_map to initialize
+ * @obj: unused for now
+ * @ops: unused for now
+ * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
+ *
+ * Only possible return value other than '0' is -ENOMEM for failure to
+ * allocation mirror bitmap.
+ */
+int ocfs2_resmap_init(struct ocfs2_super *osb,
+		      struct ocfs2_reservation_map *resmap);
+
+/**
+ * ocfs2_resmap_restart() - "restart" a reservation bitmap
+ * @resmap: reservations bitmap
+ * @clen: Number of valid bits in the bitmap
+ *
+ * Re-initialize the parameters of a reservation bitmap. This is
+ * useful for local alloc window slides.
+ * 
+ * If any bitmap parameters have changed, this function will call
+ * ocfs2_trunc_resv against all existing reservations. A future
+ * version will recalculate existing reservations based on the new
+ * bitmap.
+ */
+void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
+			  unsigned int clen);
+
+/**
+ * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
+ * @resmap: the struct ocfs2_reservation_map to uninitialize
+ */
+void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
+
+/**
+ * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
+ * @resmap: reservations bitmap
+ * @resv: reservation to base search from
+ * @disk_bitmap: up to date (from disk) allocation bitmap
+ * @cstart: start of proposed allocation
+ * @clen: length (in clusters) of proposed allocation
+ *
+ * Using the reservation data from resv, this function will compare
+ * resmap and disk_bitmap to determine what part (if any) of the
+ * reservation window is still clear to use. An empty resv passed here
+ * will just return no allocation.
+ *
+ * On success, zero is returned and the valid allocation area is set in cstart
+ * and clen. If no allocation is found, they are set to zero.
+ *
+ * Returns nonzero on error.
+ */
+int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
+			   struct ocfs2_alloc_reservation *resv,
+			   char *disk_bitmap, int *cstart, int *clen);
+
+/**
+ * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
+ * @resmap: reservations bitmap
+ * @resv: optional reservation to recalulate based on new bitmap
+ * @cstart: start of allocation in clusters
+ * @clen: end of allocation in clusters.
+ *
+ * Tell the reservation code that bits were used to fulfill allocation in
+ * resmap. The bits don't have to have been part of any existing
+ * reservation. But we must always call this function when bits are claimed.
+ * Internally, the reservations code will use this information to mark the
+ * reservations bitmap. If resv is passed, it's next allocation window will be
+ * calculated.
+ */
+void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
+			       struct ocfs2_alloc_reservation *resv,
+			       u32 cstart, u32 clen);
+
+#endif	/* OCFS2_RESERVATIONS_H */
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 8c9a78a..5eb7753 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -54,6 +54,8 @@ struct ocfs2_alloc_context {
 	u64    ac_last_group;
 	u64    ac_max_block;  /* Highest block number to allocate. 0 is
 				 is the same as ~0 - unlimited */
+
+	struct ocfs2_alloc_reservation	*ac_resv;
 };
 
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 14f47d2..62b46a2 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -94,6 +94,7 @@ struct mount_options
 	unsigned int	atime_quantum;
 	signed short	slot;
 	unsigned int	localalloc_opt;
+	unsigned int	resv_level;
 	char		cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
 };
 
@@ -173,6 +174,7 @@ enum {
 	Opt_noacl,
 	Opt_usrquota,
 	Opt_grpquota,
+	Opt_resv_level,
 	Opt_err,
 };
 
@@ -199,6 +201,7 @@ static const match_table_t tokens = {
 	{Opt_noacl, "noacl"},
 	{Opt_usrquota, "usrquota"},
 	{Opt_grpquota, "grpquota"},
+	{Opt_resv_level, "resv_level=%u"},
 	{Opt_err, NULL}
 };
 
@@ -1036,6 +1039,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 		     "filesystem does not have the feature enabled.\n");
 		goto read_super_error;
 	}
+	osb->osb_resv_level = parsed_options.resv_level;
 
 	status = ocfs2_verify_userspace_stack(osb, &parsed_options);
 	if (status)
@@ -1262,6 +1266,7 @@ static int ocfs2_parse_options(struct super_block *sb,
 	mopt->slot = OCFS2_INVALID_SLOT;
 	mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
 	mopt->cluster_stack[0] = '\0';
+	mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
 
 	if (!options) {
 		status = 1;
@@ -1426,6 +1431,18 @@ static int ocfs2_parse_options(struct super_block *sb,
 			printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
 			break;
 #endif
+		case Opt_resv_level:
+			if (is_remount)
+				break;
+			if (match_int(&args[0], &option)) {
+				status = 0;
+				goto bail;
+			}
+			if (option >= OCFS2_MIN_RESV_LEVEL &&
+			    option < OCFS2_MAX_RESV_LEVEL)
+				mopt->resv_level = option;
+			break;
+
 		default:
 			mlog(ML_ERROR,
 			     "Unrecognized mount option \"%s\" "
@@ -1509,6 +1526,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 		seq_printf(s, ",noacl");
 #endif
 
+	if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
+		seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
+
 	return 0;
 }
 
@@ -2037,6 +2057,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
 	init_waitqueue_head(&osb->osb_mount_event);
 
+	status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
+	if (status) {
+		mlog_errno(status);
+		goto bail;
+	}
+
 	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
 	if (!osb->vol_label) {
 		mlog(ML_ERROR, "unable to alloc vol label\n");
-- 
1.5.6




More information about the Ocfs2-devel mailing list