[Ocfs2-tools-devel] [PATCH 13/13] fsck.ocfs2: Pre-cache dirblocks before we go through them.

Sunil Mushran sunil.mushran at oracle.com
Thu Jul 23 14:20:16 PDT 2009


Cute!

Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>


Joel Becker wrote:
> When we come out of pass 1, o2fsck has a sorted rbtree of dirblock
> addresses.  Pass 2 runs that list and checks each dirblock.  However,
> it currently reads them one block at a time.
>
> The basic operation of pass 2 is a simple loop that iterates the
> dirblocks in block number order.  It passes the dirblock to a callback
> that does the checking.  This callback reads the dirblock and the inode
> it belongs to.
>
> I tried three caching approaches:
>
> 1) Walk the dirblocks, collecting adjacent ones into single I/Os.  Read
>    them to pre-fill the cache.  When o2fsck_worth_caching() returns
>    false, we know we've filled the cache with dirblocks.  Go ahead and
>    process that many of them.  Then go back and read the next hunk of
>    dirblocks.  Keep repeating this until all dirblocks are processed.
>
> 2) The same as (1), except we pre-cache the inode associated with each
>    dirblock as well.
>
> 3) A simpler scheme where we just try to read the current dirblock and
>    any adjacent ones following it.  Then we process those blocks.  So
>    instead of "fill the cache, then process what's in the cache", this
>    is "one read, then process what we read".
>
> Approach (1) was the clear winner.  Depending on the cache size, (3) was
> either identical or worse than (1).  Approach (2) was just plain worse.
> I think this was due to the seek penalty of going off to get the inode
> while pre-caching.  Without getting the inode, all our reads are in
> ascending order.  Obviously approach (1) has to go get the inode during
> the processing phase, but that doesn't impact the pre-cache reads.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
>  fsck.ocfs2/dirblocks.c         |   98 ++++++++++++++++++++++++++++++++++++++--
>  fsck.ocfs2/include/dirblocks.h |    5 +-
>  fsck.ocfs2/pass2.c             |    3 +-
>  3 files changed, 98 insertions(+), 8 deletions(-)
>
> diff --git a/fsck.ocfs2/dirblocks.c b/fsck.ocfs2/dirblocks.c
> index afabc11..1fd5560 100644
> --- a/fsck.ocfs2/dirblocks.c
> +++ b/fsck.ocfs2/dirblocks.c
> @@ -27,6 +27,7 @@
>  #include <string.h>
>  #include <stdarg.h>
>  #include <inttypes.h>
> +#include <assert.h>
>  
>  #include "ocfs2/ocfs2.h"
>  
> @@ -70,17 +71,106 @@ out:
>  	return ret;
>  }
>  
> -void o2fsck_dir_block_iterate(o2fsck_dirblocks *db, dirblock_iterator func,
> -				void *priv_data)
> +/*
> + * Go through the dirblocks pre-filling them.  We try to coalesce adjacent
> + * ones.  Don't care to return errors, because it's a cache pre-fill.
> + */
> +static int try_to_cache(ocfs2_filesys *fs, struct rb_node *node,
> +			char *pre_cache_buf, int pre_cache_blocks)
> +{
> +	int cached_blocks = 0;
> +	o2fsck_dirblock_entry *dbe;
> +	uint64_t io_blkno = 0, next_blkno = 0;
> +	int count = 0;
> +	errcode_t err;
> +	uint64_t blocks_seen = 0;
> +
> +	o2fsck_reset_blocks_cached();
> +	for (; node; node = rb_next(node)) {
> +		blocks_seen++;
> +		dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
> +		if (io_blkno) {
> +			assert(count);
> +			assert(next_blkno > io_blkno);
> +
> +			if ((next_blkno == dbe->e_blkno) &&
> +			    (count < pre_cache_blocks)) {
> +				count++;
> +				next_blkno++;
> +				continue;
> +			}
> +
> +			if (!o2fsck_worth_caching(count)) {
> +				io_blkno = 0;
> +				break;
> +			}
> +
> +			err = ocfs2_read_blocks(fs, io_blkno, count,
> +						pre_cache_buf);
> +			io_blkno = 0;
> +			next_blkno = 0;
> +
> +			if (err)
> +				break;
> +
> +			cached_blocks += count;
> +			count = 0;
> +		}
> +
> +		assert(!io_blkno);
> +		io_blkno = dbe->e_blkno;
> +		next_blkno = io_blkno + 1;
> +		count = 1;
> +	}
> +
> +	/* Catch the last pre-fill buffer */
> +	if (io_blkno && o2fsck_worth_caching(count)) {
> +		assert(count);
> +		err = ocfs2_read_blocks(fs, io_blkno, count, pre_cache_buf);
> +		if (!err)
> +			cached_blocks += count;
> +	}
> +
> +	return cached_blocks;
> +}
> +
> +void o2fsck_dir_block_iterate(o2fsck_state *ost, dirblock_iterator func,
> +			      void *priv_data)
>  {
> +	o2fsck_dirblocks *db = &ost->ost_dirblocks;
> +	ocfs2_filesys *fs = ost->ost_fs;
>  	o2fsck_dirblock_entry *dbe;
>  	struct rb_node *node;
>  	unsigned ret;
> +	errcode_t err;
> +	char *pre_cache_buf = NULL;
> +	int pre_cache_blocks = ocfs2_blocks_in_bytes(fs, 1024 * 1024);
> +	int cached_blocks = 0;
> +
> +	o2fsck_reset_blocks_cached();
> +	if (o2fsck_worth_caching(1)) {
> +		err = ocfs2_malloc_blocks(fs->fs_io, pre_cache_blocks,
> +					  &pre_cache_buf);
> +		if (err)
> +			verbosef("Unable to allocate dirblock pre-cache "
> +				 "buffer, %s\n",
> +				 "ignoring");
> +	}
> +
> +	for (node = rb_first(&db->db_root); node; node = rb_next(node)) {
> +		if (!cached_blocks && pre_cache_buf)
> +			cached_blocks = try_to_cache(fs, node, pre_cache_buf,
> +						     pre_cache_blocks);
>  
> -	for(node = rb_first(&db->db_root); node; node = rb_next(node)) {
>  		dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
>  		ret = func(dbe, priv_data);
>  		if (ret & OCFS2_DIRENT_ABORT)
> -			return;
> +			break;
> +
> +		if (cached_blocks)
> +			cached_blocks--;
>  	}
> +
> +	if (pre_cache_buf)
> +		ocfs2_free(&pre_cache_buf);
>  }
> diff --git a/fsck.ocfs2/include/dirblocks.h b/fsck.ocfs2/include/dirblocks.h
> index cdd9741..7b3a2e9 100644
> --- a/fsck.ocfs2/include/dirblocks.h
> +++ b/fsck.ocfs2/include/dirblocks.h
> @@ -44,8 +44,9 @@ typedef unsigned (*dirblock_iterator)(o2fsck_dirblock_entry *,
>  errcode_t o2fsck_add_dir_block(o2fsck_dirblocks *db, uint64_t ino,
>  			       uint64_t blkno, uint64_t blkcount);
>  
> -void o2fsck_dir_block_iterate(o2fsck_dirblocks *db, dirblock_iterator func,
> -				void *priv_data);
> +struct _o2fsck_state;
> +void o2fsck_dir_block_iterate(struct _o2fsck_state *ost, dirblock_iterator func,
> +                              void *priv_data);
>  		     
>  
>  #endif /* __O2FSCK_DIRBLOCKS_H__ */
> diff --git a/fsck.ocfs2/pass2.c b/fsck.ocfs2/pass2.c
> index 2a1805e..14b1911 100644
> --- a/fsck.ocfs2/pass2.c
> +++ b/fsck.ocfs2/pass2.c
> @@ -888,8 +888,7 @@ errcode_t o2fsck_pass2(o2fsck_state *ost)
>  	if (dp)
>  		dp->dp_dirent = ost->ost_fs->fs_sysdir_blkno;
>  
> -	o2fsck_dir_block_iterate(&ost->ost_dirblocks, pass2_dir_block_iterate, 
> -			 	 &dd);
> +	o2fsck_dir_block_iterate(ost, pass2_dir_block_iterate, &dd);
>  	o2fsck_strings_free(&dd.strings);
>  out:
>  	if (dd.dirblock_buf)
>   




More information about the Ocfs2-tools-devel mailing list