[Ocfs2-tools-devel] [PATCH 2/3] tunefs.ocfs2: Size the cache appropriately for large operations.

Sunil Mushran sunil.mushran at oracle.com
Mon Jul 20 16:31:42 PDT 2009


Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>


Joel Becker wrote:
> This introduces the flag TUNEFS_FLAG_LARGECACHE.  Operations that do not
> use this flag get a small I/O cache.  Operations that specify the flag
> get a cache as big as they can allocate, up to the size of the
> filesystem.  This should speed up operations that have to scan the whole
> disk in more than one pass.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
>  tunefs.ocfs2/feature_inline_data.c       |    3 +-
>  tunefs.ocfs2/feature_metaecc.c           |    3 +-
>  tunefs.ocfs2/feature_sparse_files.c      |    3 +-
>  tunefs.ocfs2/feature_unwritten_extents.c |    3 +-
>  tunefs.ocfs2/feature_xattr.c             |    3 +-
>  tunefs.ocfs2/libocfs2ne.c                |   72 +++++++++++++++++++++++++++---
>  tunefs.ocfs2/libocfs2ne.h                |    2 +
>  7 files changed, 78 insertions(+), 11 deletions(-)
>
> diff --git a/tunefs.ocfs2/feature_inline_data.c b/tunefs.ocfs2/feature_inline_data.c
> index ea8d682..77ddb42 100644
> --- a/tunefs.ocfs2/feature_inline_data.c
> +++ b/tunefs.ocfs2/feature_inline_data.c
> @@ -277,7 +277,8 @@ out:
>  
>  DEFINE_TUNEFS_FEATURE_INCOMPAT(inline_data,
>  			       OCFS2_FEATURE_INCOMPAT_INLINE_DATA,
> -			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> +			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> +			       TUNEFS_FLAG_LARGECACHE,
>  			       enable_inline_data,
>  			       disable_inline_data);
>  
> diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c
> index 0d88de7..c4de8be 100644
> --- a/tunefs.ocfs2/feature_metaecc.c
> +++ b/tunefs.ocfs2/feature_metaecc.c
> @@ -1436,7 +1436,8 @@ out:
>  
>  DEFINE_TUNEFS_FEATURE_INCOMPAT(metaecc,
>  			       OCFS2_FEATURE_INCOMPAT_META_ECC,
> -			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> +			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> +			       TUNEFS_FLAG_LARGECACHE,
>  			       enable_metaecc,
>  			       disable_metaecc);
>  
> diff --git a/tunefs.ocfs2/feature_sparse_files.c b/tunefs.ocfs2/feature_sparse_files.c
> index 044523f..ddb966f 100644
> --- a/tunefs.ocfs2/feature_sparse_files.c
> +++ b/tunefs.ocfs2/feature_sparse_files.c
> @@ -559,7 +559,8 @@ out:
>  
>  DEFINE_TUNEFS_FEATURE_INCOMPAT(sparse_files,
>  			       OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC,
> -			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> +			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> +			       TUNEFS_FLAG_LARGECACHE,
>  			       enable_sparse_files,
>  			       disable_sparse_files);
>  
> diff --git a/tunefs.ocfs2/feature_unwritten_extents.c b/tunefs.ocfs2/feature_unwritten_extents.c
> index a43b308..ea096c5 100644
> --- a/tunefs.ocfs2/feature_unwritten_extents.c
> +++ b/tunefs.ocfs2/feature_unwritten_extents.c
> @@ -196,7 +196,8 @@ out:
>  
>  DEFINE_TUNEFS_FEATURE_RO_COMPAT(unwritten_extents,
>  				OCFS2_FEATURE_RO_COMPAT_UNWRITTEN,
> -				TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> +				TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> +				TUNEFS_FLAG_LARGECACHE,
>  				enable_unwritten_extents,
>  				disable_unwritten_extents);
>  
> diff --git a/tunefs.ocfs2/feature_xattr.c b/tunefs.ocfs2/feature_xattr.c
> index 28989cf..8eef7df 100644
> --- a/tunefs.ocfs2/feature_xattr.c
> +++ b/tunefs.ocfs2/feature_xattr.c
> @@ -446,7 +446,8 @@ out:
>  
>  DEFINE_TUNEFS_FEATURE_INCOMPAT(xattr,
>  			       OCFS2_FEATURE_INCOMPAT_XATTR,
> -			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
> +			       TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION |
> +			       TUNEFS_FLAG_LARGECACHE,
>  			       enable_xattr,
>  			       disable_xattr);
>  
> diff --git a/tunefs.ocfs2/libocfs2ne.c b/tunefs.ocfs2/libocfs2ne.c
> index fb800cb..f588596 100644
> --- a/tunefs.ocfs2/libocfs2ne.c
> +++ b/tunefs.ocfs2/libocfs2ne.c
> @@ -447,7 +447,8 @@ errcode_t tunefs_empty_clusters(ocfs2_filesys *fs, uint64_t start_blk,
>  	memset(buf, 0, io_blocks * fs->fs_blocksize);
>  
>  	while (total_blocks) {
> -		ret = io_write_block(fs->fs_io, start_blk, io_blocks, buf);
> +		ret = io_write_block_nocache(fs->fs_io, start_blk,
> +					     io_blocks, buf);
>  		if (ret)
>  			goto bail;
>  
> @@ -1250,6 +1251,69 @@ static void tunefs_close_online_descriptor(ocfs2_filesys *fs)
>  	}
>  }
>  
> +/*
> + * If io_init_cache fails, we will go do the work without the
> + * io_cache, so there is no check for failure here.
> + */
> +static void tunefs_init_cache(ocfs2_filesys *fs)
> +{
> +	errcode_t err;
> +	struct tunefs_private *tp = to_private(fs);
> +	uint64_t blocks_wanted;
> +	int scale_down;
> +
> +	/*
> +	 * Operations needing a large cache really want enough to
> +	 * hold the whole filesystem in memory.  The rest of the
> +	 * operations don't need much at all.  A cache big enough to
> +	 * hold a chain allocator group should be enough.  Our largest
> +	 * chain allocator is 4MB, so let's do 8MB and allow for
> +	 * incidental blocks.
> +	 */
> +	if (tp->tp_open_flags & TUNEFS_FLAG_LARGECACHE)
> +		blocks_wanted = fs->fs_blocks;
> +	else
> +		blocks_wanted = ocfs2_blocks_in_bytes(fs, 8 * 1024 * 1024);
> +
> +	/*
> +	 * We don't want to exhaust memory, so we start with twice our
> +	 * actual need.  When we find out how much we can get, we actually
> +	 * get half that.
> +	 */
> +	blocks_wanted <<= 1;
> +	scale_down = 1;
> +
> +	while (blocks_wanted > 0) {
> +		io_destroy_cache(fs->fs_io);
> +		verbosef(VL_LIB,
> +			 "Asking for %"PRIu64" blocks of I/O cache\n",
> +			 blocks_wanted);
> +		err = io_init_cache(fs->fs_io, blocks_wanted);
> +		if (!err) {
> +			/*
> +			 * We want to pin our cache; there's no point in
> +			 * having a large cache if half of it is in swap.
> +			 * However, some callers may not be privileged
> +			 * enough, so once we get down to a small enough
> +			 * number (512 blocks), we'll stop caring.
> +			 */
> +			err = io_mlock_cache(fs->fs_io);
> +			if (err && (blocks_wanted <= 512))
> +				err = 0;
> +		}
> +		if (!err) {
> +			verbosef(VL_LIB, "Got %"PRIu64" blocks\n",
> +				 blocks_wanted);
> +			/* If we've already scaled down, we're done. */
> +			if (!scale_down)
> +				break;
> +			scale_down = 0;
> +		}
> +
> +		blocks_wanted >>= 1;
> +	}
> +}
> +
>  static errcode_t tunefs_add_fs(ocfs2_filesys *fs, int flags)
>  {
>  	errcode_t err;
> @@ -1375,13 +1439,9 @@ errcode_t tunefs_open(const char *device, int flags,
>  	 * If this tunefs run has both special and regular operations,
>  	 * ocfs2ne will retry with the regular arguments and will get
>  	 * the cache for the regular operations.
> -	 *
> -	 * If io_init_cache failed, we will go do the work without the
> -	 * io_cache, so there is no check for failure here.
>  	 */
>  	if (!err)
> -		io_init_cache(fs->fs_io,
> -			      ocfs2_extent_recs_per_eb(fs->fs_blocksize));
> +		tunefs_init_cache(fs);
>  
>  	/*
>  	 * SKIPCLUSTER operations don't check the journals - they couldn't
> diff --git a/tunefs.ocfs2/libocfs2ne.h b/tunefs.ocfs2/libocfs2ne.h
> index 6138724..dfc6d26 100644
> --- a/tunefs.ocfs2/libocfs2ne.h
> +++ b/tunefs.ocfs2/libocfs2ne.h
> @@ -73,6 +73,8 @@
>  					   allocator */
>  #define TUNEFS_FLAG_SKIPCLUSTER	0x10	/* Operation cannot start the
>  					   cluster stack */
> +#define TUNEFS_FLAG_LARGECACHE	0x20	/* Operation needs a large I/O
> +					   cache */
>  
>  
>  /* What to do with a feature */
>   




More information about the Ocfs2-tools-devel mailing list