[Ocfs2-tools-devel] [PATCH 13/13] fsck.ocfs2: Pre-cache dirblocks before we go through them.
Sunil Mushran
sunil.mushran at oracle.com
Thu Jul 23 14:20:16 PDT 2009
Cute!
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
Joel Becker wrote:
> When we come out of pass 1, o2fsck has a sorted rbtree of dirblock
> addresses. Pass 2 runs that list and checks each dirblock. However,
> it currently reads them one block at a time.
>
> The basic operation of pass 2 is a simple loop that iterates the
> dirblocks in block number order. It passes the dirblock to a callback
> that does the checking. This callback reads the dirblock and the inode
> it belongs to.
>
> I tried three caching approaches:
>
> 1) Walk the dirblocks, collecting adjacent ones into single I/Os. Read
> them to pre-fill the cache. When o2fsck_worth_caching() returns
> false, we know we've filled the cache with dirblocks. Go ahead and
> process that many of them. Then go back and read the next hunk of
> dirblocks. Keep repeating this until all dirblocks are processed.
>
> 2) The same as (1), except we pre-cache the inode associated with each
> dirblock as well.
>
> 3) A simpler scheme where we just try to read the current dirblock and
> any adjacent ones following it. Then we process those blocks. So
> instead of "fill the cache, then process what's in the cache", this
> is "one read, then process what we read".
>
> Approach (1) was the clear winner. Depending on the cache size, (3) was
> either identical or worse than (1). Approach (2) was just plain worse.
> I think this was due to the seek penalty of going off to get the inode
> while pre-caching. Without getting the inode, all our reads are in
> ascending order. Obviously approach (1) has to go get the inode during
> the processing phase, but that doesn't impact the pre-cache reads.
>
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
> fsck.ocfs2/dirblocks.c | 98 ++++++++++++++++++++++++++++++++++++++--
> fsck.ocfs2/include/dirblocks.h | 5 +-
> fsck.ocfs2/pass2.c | 3 +-
> 3 files changed, 98 insertions(+), 8 deletions(-)
>
> diff --git a/fsck.ocfs2/dirblocks.c b/fsck.ocfs2/dirblocks.c
> index afabc11..1fd5560 100644
> --- a/fsck.ocfs2/dirblocks.c
> +++ b/fsck.ocfs2/dirblocks.c
> @@ -27,6 +27,7 @@
> #include <string.h>
> #include <stdarg.h>
> #include <inttypes.h>
> +#include <assert.h>
>
> #include "ocfs2/ocfs2.h"
>
> @@ -70,17 +71,106 @@ out:
> return ret;
> }
>
> -void o2fsck_dir_block_iterate(o2fsck_dirblocks *db, dirblock_iterator func,
> - void *priv_data)
> +/*
> + * Go through the dirblocks pre-filling them. We try to coalesce adjacent
> + * ones. Don't care to return errors, because it's a cache pre-fill.
> + */
> +static int try_to_cache(ocfs2_filesys *fs, struct rb_node *node,
> + char *pre_cache_buf, int pre_cache_blocks)
> +{
> + int cached_blocks = 0;
> + o2fsck_dirblock_entry *dbe;
> + uint64_t io_blkno = 0, next_blkno = 0;
> + int count = 0;
> + errcode_t err;
> + uint64_t blocks_seen = 0;
> +
> + o2fsck_reset_blocks_cached();
> + for (; node; node = rb_next(node)) {
> + blocks_seen++;
> + dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
> + if (io_blkno) {
> + assert(count);
> + assert(next_blkno > io_blkno);
> +
> + if ((next_blkno == dbe->e_blkno) &&
> + (count < pre_cache_blocks)) {
> + count++;
> + next_blkno++;
> + continue;
> + }
> +
> + if (!o2fsck_worth_caching(count)) {
> + io_blkno = 0;
> + break;
> + }
> +
> + err = ocfs2_read_blocks(fs, io_blkno, count,
> + pre_cache_buf);
> + io_blkno = 0;
> + next_blkno = 0;
> +
> + if (err)
> + break;
> +
> + cached_blocks += count;
> + count = 0;
> + }
> +
> + assert(!io_blkno);
> + io_blkno = dbe->e_blkno;
> + next_blkno = io_blkno + 1;
> + count = 1;
> + }
> +
> + /* Catch the last pre-fill buffer */
> + if (io_blkno && o2fsck_worth_caching(count)) {
> + assert(count);
> + err = ocfs2_read_blocks(fs, io_blkno, count, pre_cache_buf);
> + if (!err)
> + cached_blocks += count;
> + }
> +
> + return cached_blocks;
> +}
> +
> +void o2fsck_dir_block_iterate(o2fsck_state *ost, dirblock_iterator func,
> + void *priv_data)
> {
> + o2fsck_dirblocks *db = &ost->ost_dirblocks;
> + ocfs2_filesys *fs = ost->ost_fs;
> o2fsck_dirblock_entry *dbe;
> struct rb_node *node;
> unsigned ret;
> + errcode_t err;
> + char *pre_cache_buf = NULL;
> + int pre_cache_blocks = ocfs2_blocks_in_bytes(fs, 1024 * 1024);
> + int cached_blocks = 0;
> +
> + o2fsck_reset_blocks_cached();
> + if (o2fsck_worth_caching(1)) {
> + err = ocfs2_malloc_blocks(fs->fs_io, pre_cache_blocks,
> + &pre_cache_buf);
> + if (err)
> + verbosef("Unable to allocate dirblock pre-cache "
> + "buffer, %s\n",
> + "ignoring");
> + }
> +
> + for (node = rb_first(&db->db_root); node; node = rb_next(node)) {
> + if (!cached_blocks && pre_cache_buf)
> + cached_blocks = try_to_cache(fs, node, pre_cache_buf,
> + pre_cache_blocks);
>
> - for(node = rb_first(&db->db_root); node; node = rb_next(node)) {
> dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
> ret = func(dbe, priv_data);
> if (ret & OCFS2_DIRENT_ABORT)
> - return;
> + break;
> +
> + if (cached_blocks)
> + cached_blocks--;
> }
> +
> + if (pre_cache_buf)
> + ocfs2_free(&pre_cache_buf);
> }
> diff --git a/fsck.ocfs2/include/dirblocks.h b/fsck.ocfs2/include/dirblocks.h
> index cdd9741..7b3a2e9 100644
> --- a/fsck.ocfs2/include/dirblocks.h
> +++ b/fsck.ocfs2/include/dirblocks.h
> @@ -44,8 +44,9 @@ typedef unsigned (*dirblock_iterator)(o2fsck_dirblock_entry *,
> errcode_t o2fsck_add_dir_block(o2fsck_dirblocks *db, uint64_t ino,
> uint64_t blkno, uint64_t blkcount);
>
> -void o2fsck_dir_block_iterate(o2fsck_dirblocks *db, dirblock_iterator func,
> - void *priv_data);
> +struct _o2fsck_state;
> +void o2fsck_dir_block_iterate(struct _o2fsck_state *ost, dirblock_iterator func,
> + void *priv_data);
>
>
> #endif /* __O2FSCK_DIRBLOCKS_H__ */
> diff --git a/fsck.ocfs2/pass2.c b/fsck.ocfs2/pass2.c
> index 2a1805e..14b1911 100644
> --- a/fsck.ocfs2/pass2.c
> +++ b/fsck.ocfs2/pass2.c
> @@ -888,8 +888,7 @@ errcode_t o2fsck_pass2(o2fsck_state *ost)
> if (dp)
> dp->dp_dirent = ost->ost_fs->fs_sysdir_blkno;
>
> - o2fsck_dir_block_iterate(&ost->ost_dirblocks, pass2_dir_block_iterate,
> - &dd);
> + o2fsck_dir_block_iterate(ost, pass2_dir_block_iterate, &dd);
> o2fsck_strings_free(&dd.strings);
> out:
> if (dd.dirblock_buf)
>
More information about the Ocfs2-tools-devel
mailing list