[Ocfs2-tools-devel] [PATCH 2/3] tunefs.ocfs2: Size the cache appropriately for large operations.
Sunil Mushran
sunil.mushran at oracle.com
Mon Jul 20 16:31:42 PDT 2009
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
Joel Becker wrote:
> This introduces the flag TUNEFS_FLAG_LARGECACHE. Operations that do not
> use this flag get a small I/O cache. Operations that specify the flag
> get a cache as big as they can allocate, up to the size of the
> filesystem. This should speed up operations that have to scan the whole
> disk in more than one pass.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
> tunefs.ocfs2/feature_inline_data.c | 3 +-
> tunefs.ocfs2/feature_metaecc.c | 3 +-
> tunefs.ocfs2/feature_sparse_files.c | 3 +-
> tunefs.ocfs2/feature_unwritten_extents.c | 3 +-
> tunefs.ocfs2/feature_xattr.c | 3 +-
> tunefs.ocfs2/libocfs2ne.c | 72 +++++++++++++++++++++++++++---
> tunefs.ocfs2/libocfs2ne.h | 2 +
> 7 files changed, 78 insertions(+), 11 deletions(-)
>
> diff --git a/tunefs.ocfs2/feature_inline_data.c b/tunefs.ocfs2/feature_inline_data.c
> index ea8d682..77ddb42 100644
> --- a/tunefs.ocfs2/feature_inline_data.c
> +++ b/tunefs.ocfs2/feature_inline_data.c
> @@ -277,7 +277,8 @@ out:
>
> DEFINE_TUNEFS_FEATURE_INCOMPAT(inline_data,
> OCFS2_FEATURE_INCOMPAT_INLINE_DATA,
> - TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> + TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> + TUNEFS_FLAG_LARGECACHE,
> enable_inline_data,
> disable_inline_data);
>
> diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c
> index 0d88de7..c4de8be 100644
> --- a/tunefs.ocfs2/feature_metaecc.c
> +++ b/tunefs.ocfs2/feature_metaecc.c
> @@ -1436,7 +1436,8 @@ out:
>
> DEFINE_TUNEFS_FEATURE_INCOMPAT(metaecc,
> OCFS2_FEATURE_INCOMPAT_META_ECC,
> - TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> + TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> + TUNEFS_FLAG_LARGECACHE,
> enable_metaecc,
> disable_metaecc);
>
> diff --git a/tunefs.ocfs2/feature_sparse_files.c b/tunefs.ocfs2/feature_sparse_files.c
> index 044523f..ddb966f 100644
> --- a/tunefs.ocfs2/feature_sparse_files.c
> +++ b/tunefs.ocfs2/feature_sparse_files.c
> @@ -559,7 +559,8 @@ out:
>
> DEFINE_TUNEFS_FEATURE_INCOMPAT(sparse_files,
> OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC,
> - TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> + TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> + TUNEFS_FLAG_LARGECACHE,
> enable_sparse_files,
> disable_sparse_files);
>
> diff --git a/tunefs.ocfs2/feature_unwritten_extents.c b/tunefs.ocfs2/feature_unwritten_extents.c
> index a43b308..ea096c5 100644
> --- a/tunefs.ocfs2/feature_unwritten_extents.c
> +++ b/tunefs.ocfs2/feature_unwritten_extents.c
> @@ -196,7 +196,8 @@ out:
>
> DEFINE_TUNEFS_FEATURE_RO_COMPAT(unwritten_extents,
> OCFS2_FEATURE_RO_COMPAT_UNWRITTEN,
> - TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> + TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> + TUNEFS_FLAG_LARGECACHE,
> enable_unwritten_extents,
> disable_unwritten_extents);
>
> diff --git a/tunefs.ocfs2/feature_xattr.c b/tunefs.ocfs2/feature_xattr.c
> index 28989cf..8eef7df 100644
> --- a/tunefs.ocfs2/feature_xattr.c
> +++ b/tunefs.ocfs2/feature_xattr.c
> @@ -446,7 +446,8 @@ out:
>
> DEFINE_TUNEFS_FEATURE_INCOMPAT(xattr,
> OCFS2_FEATURE_INCOMPAT_XATTR,
> - TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> + TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> + TUNEFS_FLAG_LARGECACHE,
> enable_xattr,
> disable_xattr);
>
> diff --git a/tunefs.ocfs2/libocfs2ne.c b/tunefs.ocfs2/libocfs2ne.c
> index fb800cb..f588596 100644
> --- a/tunefs.ocfs2/libocfs2ne.c
> +++ b/tunefs.ocfs2/libocfs2ne.c
> @@ -447,7 +447,8 @@ errcode_t tunefs_empty_clusters(ocfs2_filesys *fs, uint64_t start_blk,
> memset(buf, 0, io_blocks * fs->fs_blocksize);
>
> while (total_blocks) {
> - ret = io_write_block(fs->fs_io, start_blk, io_blocks, buf);
> + ret = io_write_block_nocache(fs->fs_io, start_blk,
> + io_blocks, buf);
> if (ret)
> goto bail;
>
> @@ -1250,6 +1251,69 @@ static void tunefs_close_online_descriptor(ocfs2_filesys *fs)
> }
> }
>
> +/*
> + * If io_init_cache fails, we will go do the work without the
> + * io_cache, so there is no check for failure here.
> + */
> +static void tunefs_init_cache(ocfs2_filesys *fs)
> +{
> + errcode_t err;
> + struct tunefs_private *tp = to_private(fs);
> + uint64_t blocks_wanted;
> + int scale_down;
> +
> + /*
> + * Operations needing a large cache really want enough to
> + * hold the whole filesystem in memory. The rest of the
> + * operations don't need much at all. A cache big enough to
> + * hold a chain allocator group should be enough. Our largest
> + * chain allocator is 4MB, so let's do 8MB and allow for
> + * incidental blocks.
> + */
> + if (tp->tp_open_flags & TUNEFS_FLAG_LARGECACHE)
> + blocks_wanted = fs->fs_blocks;
> + else
> + blocks_wanted = ocfs2_blocks_in_bytes(fs, 8 * 1024 * 1024);
> +
> + /*
> + * We don't want to exhaust memory, so we start with twice our
> + * actual need. When we find out how much we can get, we actually
> + * get half that.
> + */
> + blocks_wanted <<= 1;
> + scale_down = 1;
> +
> + while (blocks_wanted > 0) {
> + io_destroy_cache(fs->fs_io);
> + verbosef(VL_LIB,
> + "Asking for %"PRIu64" blocks of I/O cache\n",
> + blocks_wanted);
> + err = io_init_cache(fs->fs_io, blocks_wanted);
> + if (!err) {
> + /*
> + * We want to pin our cache; there's no point in
> + * having a large cache if half of it is in swap.
> + * However, some callers may not be privileged
> + * enough, so once we get down to a small enough
> + * number (512 blocks), we'll stop caring.
> + */
> + err = io_mlock_cache(fs->fs_io);
> + if (err && (blocks_wanted <= 512))
> + err = 0;
> + }
> + if (!err) {
> + verbosef(VL_LIB, "Got %"PRIu64" blocks\n",
> + blocks_wanted);
> + /* If we've already scaled down, we're done. */
> + if (!scale_down)
> + break;
> + scale_down = 0;
> + }
> +
> + blocks_wanted >>= 1;
> + }
> +}
> +
> static errcode_t tunefs_add_fs(ocfs2_filesys *fs, int flags)
> {
> errcode_t err;
> @@ -1375,13 +1439,9 @@ errcode_t tunefs_open(const char *device, int flags,
> * If this tunefs run has both special and regular operations,
> * ocfs2ne will retry with the regular arguments and will get
> * the cache for the regular operations.
> - *
> - * If io_init_cache failed, we will go do the work without the
> - * io_cache, so there is no check for failure here.
> */
> if (!err)
> - io_init_cache(fs->fs_io,
> - ocfs2_extent_recs_per_eb(fs->fs_blocksize));
> + tunefs_init_cache(fs);
>
> /*
> * SKIPCLUSTER operations don't check the journals - they couldn't
> diff --git a/tunefs.ocfs2/libocfs2ne.h b/tunefs.ocfs2/libocfs2ne.h
> index 6138724..dfc6d26 100644
> --- a/tunefs.ocfs2/libocfs2ne.h
> +++ b/tunefs.ocfs2/libocfs2ne.h
> @@ -73,6 +73,8 @@
> allocator */
> #define TUNEFS_FLAG_SKIPCLUSTER 0x10 /* Operation cannot start the
> cluster stack */
> +#define TUNEFS_FLAG_LARGECACHE 0x20 /* Operation needs a large I/O
> + cache */
>
>
> /* What to do with a feature */
>
More information about the Ocfs2-tools-devel
mailing list