[Ocfs2-tools-devel] [PATCH 2/3] tunefs.ocfs2: Size the cache appropriately for large operations.

Joel Becker joel.becker at oracle.com
Fri Jun 12 20:18:43 PDT 2009


This introduces the flag TUNEFS_FLAG_LARGECACHE.  Operations that do not
use this flag get a small I/O cache.  Operations that specify the flag
get a cache as big as they can allocate, up to the size of the
filesystem.  This should speed up operations that have to scan the whole
disk in more than one pass.

Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
 tunefs.ocfs2/feature_inline_data.c       |    3 +-
 tunefs.ocfs2/feature_metaecc.c           |    3 +-
 tunefs.ocfs2/feature_sparse_files.c      |    3 +-
 tunefs.ocfs2/feature_unwritten_extents.c |    3 +-
 tunefs.ocfs2/feature_xattr.c             |    3 +-
 tunefs.ocfs2/libocfs2ne.c                |   72 +++++++++++++++++++++++++++---
 tunefs.ocfs2/libocfs2ne.h                |    2 +
 7 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/tunefs.ocfs2/feature_inline_data.c b/tunefs.ocfs2/feature_inline_data.c
index ea8d682..77ddb42 100644
--- a/tunefs.ocfs2/feature_inline_data.c
+++ b/tunefs.ocfs2/feature_inline_data.c
@@ -277,7 +277,8 @@ out:
 
 DEFINE_TUNEFS_FEATURE_INCOMPAT(inline_data,
 			       OCFS2_FEATURE_INCOMPAT_INLINE_DATA,
-			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
+			       TUNEFS_FLAG_LARGECACHE,
 			       enable_inline_data,
 			       disable_inline_data);
 
diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c
index 0d88de7..c4de8be 100644
--- a/tunefs.ocfs2/feature_metaecc.c
+++ b/tunefs.ocfs2/feature_metaecc.c
@@ -1436,7 +1436,8 @@ out:
 
 DEFINE_TUNEFS_FEATURE_INCOMPAT(metaecc,
 			       OCFS2_FEATURE_INCOMPAT_META_ECC,
-			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
+			       TUNEFS_FLAG_LARGECACHE,
 			       enable_metaecc,
 			       disable_metaecc);
 
diff --git a/tunefs.ocfs2/feature_sparse_files.c b/tunefs.ocfs2/feature_sparse_files.c
index 044523f..ddb966f 100644
--- a/tunefs.ocfs2/feature_sparse_files.c
+++ b/tunefs.ocfs2/feature_sparse_files.c
@@ -559,7 +559,8 @@ out:
 
 DEFINE_TUNEFS_FEATURE_INCOMPAT(sparse_files,
 			       OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC,
-			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
+			       TUNEFS_FLAG_LARGECACHE,
 			       enable_sparse_files,
 			       disable_sparse_files);
 
diff --git a/tunefs.ocfs2/feature_unwritten_extents.c b/tunefs.ocfs2/feature_unwritten_extents.c
index a43b308..ea096c5 100644
--- a/tunefs.ocfs2/feature_unwritten_extents.c
+++ b/tunefs.ocfs2/feature_unwritten_extents.c
@@ -196,7 +196,8 @@ out:
 
 DEFINE_TUNEFS_FEATURE_RO_COMPAT(unwritten_extents,
 				OCFS2_FEATURE_RO_COMPAT_UNWRITTEN,
-				TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+				TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
+				TUNEFS_FLAG_LARGECACHE,
 				enable_unwritten_extents,
 				disable_unwritten_extents);
 
diff --git a/tunefs.ocfs2/feature_xattr.c b/tunefs.ocfs2/feature_xattr.c
index 28989cf..8eef7df 100644
--- a/tunefs.ocfs2/feature_xattr.c
+++ b/tunefs.ocfs2/feature_xattr.c
@@ -446,7 +446,8 @@ out:
 
 DEFINE_TUNEFS_FEATURE_INCOMPAT(xattr,
 			       OCFS2_FEATURE_INCOMPAT_XATTR,
-			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
+			       TUNEFS_FLAG_LARGECACHE,
 			       enable_xattr,
 			       disable_xattr);
 
diff --git a/tunefs.ocfs2/libocfs2ne.c b/tunefs.ocfs2/libocfs2ne.c
index fb800cb..f588596 100644
--- a/tunefs.ocfs2/libocfs2ne.c
+++ b/tunefs.ocfs2/libocfs2ne.c
@@ -447,7 +447,8 @@ errcode_t tunefs_empty_clusters(ocfs2_filesys *fs, uint64_t start_blk,
 	memset(buf, 0, io_blocks * fs->fs_blocksize);
 
 	while (total_blocks) {
-		ret = io_write_block(fs->fs_io, start_blk, io_blocks, buf);
+		ret = io_write_block_nocache(fs->fs_io, start_blk,
+					     io_blocks, buf);
 		if (ret)
 			goto bail;
 
@@ -1250,6 +1251,69 @@ static void tunefs_close_online_descriptor(ocfs2_filesys *fs)
 	}
 }
 
+/*
+ * If io_init_cache fails, we will go do the work without the
+ * io_cache, so there is no check for failure here.
+ */
+static void tunefs_init_cache(ocfs2_filesys *fs)
+{
+	errcode_t err;
+	struct tunefs_private *tp = to_private(fs);
+	uint64_t blocks_wanted;
+	int scale_down;
+
+	/*
+	 * Operations needing a large cache really want enough to
+	 * hold the whole filesystem in memory.  The rest of the
+	 * operations don't need much at all.  A cache big enough to
+	 * hold a chain allocator group should be enough.  Our largest
+	 * chain allocator is 4MB, so let's do 8MB and allow for
+	 * incidental blocks.
+	 */
+	if (tp->tp_open_flags & TUNEFS_FLAG_LARGECACHE)
+		blocks_wanted = fs->fs_blocks;
+	else
+		blocks_wanted = ocfs2_blocks_in_bytes(fs, 8 * 1024 * 1024);
+
+	/*
+	 * We don't want to exhaust memory, so we start with twice our
+	 * actual need.  When we find out how much we can get, we actually
+	 * get half that.
+	 */
+	blocks_wanted <<= 1;
+	scale_down = 1;
+
+	while (blocks_wanted > 0) {
+		io_destroy_cache(fs->fs_io);
+		verbosef(VL_LIB,
+			 "Asking for %"PRIu64" blocks of I/O cache\n",
+			 blocks_wanted);
+		err = io_init_cache(fs->fs_io, blocks_wanted);
+		if (!err) {
+			/*
+			 * We want to pin our cache; there's no point in
+			 * having a large cache if half of it is in swap.
+			 * However, some callers may not be privileged
+			 * enough, so once we get down to a small enough
+			 * number (512 blocks), we'll stop caring.
+			 */
+			err = io_mlock_cache(fs->fs_io);
+			if (err && (blocks_wanted <= 512))
+				err = 0;
+		}
+		if (!err) {
+			verbosef(VL_LIB, "Got %"PRIu64" blocks\n",
+				 blocks_wanted);
+			/* If we've already scaled down, we're done. */
+			if (!scale_down)
+				break;
+			scale_down = 0;
+		}
+
+		blocks_wanted >>= 1;
+	}
+}
+
 static errcode_t tunefs_add_fs(ocfs2_filesys *fs, int flags)
 {
 	errcode_t err;
@@ -1375,13 +1439,9 @@ errcode_t tunefs_open(const char *device, int flags,
 	 * If this tunefs run has both special and regular operations,
 	 * ocfs2ne will retry with the regular arguments and will get
 	 * the cache for the regular operations.
-	 *
-	 * If io_init_cache failed, we will go do the work without the
-	 * io_cache, so there is no check for failure here.
 	 */
 	if (!err)
-		io_init_cache(fs->fs_io,
-			      ocfs2_extent_recs_per_eb(fs->fs_blocksize));
+		tunefs_init_cache(fs);
 
 	/*
 	 * SKIPCLUSTER operations don't check the journals - they couldn't
diff --git a/tunefs.ocfs2/libocfs2ne.h b/tunefs.ocfs2/libocfs2ne.h
index 6138724..dfc6d26 100644
--- a/tunefs.ocfs2/libocfs2ne.h
+++ b/tunefs.ocfs2/libocfs2ne.h
@@ -73,6 +73,8 @@
 					   allocator */
 #define TUNEFS_FLAG_SKIPCLUSTER	0x10	/* Operation cannot start the
 					   cluster stack */
+#define TUNEFS_FLAG_LARGECACHE	0x20	/* Operation needs a large I/O
+					   cache */
 
 
 /* What to do with a feature */
-- 
1.6.3.1




More information about the Ocfs2-tools-devel mailing list