[Ocfs2-tools-devel] [PATCH 11/11] fsck.ocfs2: Pre-cache dirblocks before we go through them.

Joel Becker joel.becker at oracle.com
Fri May 22 18:00:26 PDT 2009


When we come out of pass 1, o2fsck has a sorted rbtree of dirblock
addresses.  Pass 2 runs that list and checks each dirblock.  However,
it currently reads them one block at a time.

The basic operation of pass 2 is a simple loop that iterates the
dirblocks in block number order.  It passes the dirblock to a callback
that does the checking.  This callback reads the dirblock and the inode
it belongs to.

I tried three caching approaches:

1) Walk the dirblocks, collecting adjacent ones into single I/Os.  Read
   them to pre-fill the cache.  When o2fsck_worth_caching() returns
   false, we know we've filled the cache with dirblocks.  Go ahead and
   process that many of them.  Then go back and read the next hunk of
   dirblocks.  Keep repeating this until all dirblocks are processed.

2) The same as (1), except we pre-cache the inode associated with each
   dirblock as well.

3) A simpler scheme where we just try to read the current dirblock and
   any adjacent ones following it.  Then we process those blocks.  So
   instead of "fill the cache, then process what's in the cache", this
   is "one read, then process what we read".

Approach (1) was the clear winner.  Depending on the cache size, (3) was
either identical or worse than (1).  Approach (2) was just plain worse.
I think this was due to the seek penalty of going off to get the inode
while pre-caching.  Without getting the inode, all our reads are in
ascending order.  Obviously approach (1) has to go get the inode during
the processing phase, but that doesn't impact the pre-cache reads.

Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
 fsck.ocfs2/dirblocks.c         |   98 ++++++++++++++++++++++++++++++++++++++--
 fsck.ocfs2/include/dirblocks.h |    5 +-
 fsck.ocfs2/pass2.c             |    3 +-
 3 files changed, 98 insertions(+), 8 deletions(-)

diff --git a/fsck.ocfs2/dirblocks.c b/fsck.ocfs2/dirblocks.c
index afabc11..1fd5560 100644
--- a/fsck.ocfs2/dirblocks.c
+++ b/fsck.ocfs2/dirblocks.c
@@ -27,6 +27,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <inttypes.h>
+#include <assert.h>
 
 #include "ocfs2/ocfs2.h"
 
@@ -70,17 +71,106 @@ out:
 	return ret;
 }
 
-void o2fsck_dir_block_iterate(o2fsck_dirblocks *db, dirblock_iterator func,
-				void *priv_data)
+/*
+ * Go through the dirblocks pre-filling them.  We try to coalesce adjacent
+ * ones.  Don't care to return errors, because it's a cache pre-fill.
+ */
+static int try_to_cache(ocfs2_filesys *fs, struct rb_node *node,
+			char *pre_cache_buf, int pre_cache_blocks)
+{
+	int cached_blocks = 0;
+	o2fsck_dirblock_entry *dbe;
+	uint64_t io_blkno = 0, next_blkno = 0;
+	int count = 0;
+	errcode_t err;
+	uint64_t blocks_seen = 0;
+
+	o2fsck_reset_blocks_cached();
+	for (; node; node = rb_next(node)) {
+		blocks_seen++;
+		dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
+		if (io_blkno) {
+			assert(count);
+			assert(next_blkno > io_blkno);
+
+			if ((next_blkno == dbe->e_blkno) &&
+			    (count < pre_cache_blocks)) {
+				count++;
+				next_blkno++;
+				continue;
+			}
+
+			if (!o2fsck_worth_caching(count)) {
+				io_blkno = 0;
+				break;
+			}
+
+			err = ocfs2_read_blocks(fs, io_blkno, count,
+						pre_cache_buf);
+			io_blkno = 0;
+			next_blkno = 0;
+
+			if (err)
+				break;
+
+			cached_blocks += count;
+			count = 0;
+		}
+
+		assert(!io_blkno);
+		io_blkno = dbe->e_blkno;
+		next_blkno = io_blkno + 1;
+		count = 1;
+	}
+
+	/* Catch the last pre-fill buffer */
+	if (io_blkno && o2fsck_worth_caching(count)) {
+		assert(count);
+		err = ocfs2_read_blocks(fs, io_blkno, count, pre_cache_buf);
+		if (!err)
+			cached_blocks += count;
+	}
+
+	return cached_blocks;
+}
+
+void o2fsck_dir_block_iterate(o2fsck_state *ost, dirblock_iterator func,
+			      void *priv_data)
 {
+	o2fsck_dirblocks *db = &ost->ost_dirblocks;
+	ocfs2_filesys *fs = ost->ost_fs;
 	o2fsck_dirblock_entry *dbe;
 	struct rb_node *node;
 	unsigned ret;
+	errcode_t err;
+	char *pre_cache_buf = NULL;
+	int pre_cache_blocks = ocfs2_blocks_in_bytes(fs, 1024 * 1024);
+	int cached_blocks = 0;
+
+	o2fsck_reset_blocks_cached();
+	if (o2fsck_worth_caching(1)) {
+		err = ocfs2_malloc_blocks(fs->fs_io, pre_cache_blocks,
+					  &pre_cache_buf);
+		if (err)
+			verbosef("Unable to allocate dirblock pre-cache "
+				 "buffer, %s\n",
+				 "ignoring");
+	}
+
+	for (node = rb_first(&db->db_root); node; node = rb_next(node)) {
+		if (!cached_blocks && pre_cache_buf)
+			cached_blocks = try_to_cache(fs, node, pre_cache_buf,
+						     pre_cache_blocks);
 
-	for(node = rb_first(&db->db_root); node; node = rb_next(node)) {
 		dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
 		ret = func(dbe, priv_data);
 		if (ret & OCFS2_DIRENT_ABORT)
-			return;
+			break;
+
+		if (cached_blocks)
+			cached_blocks--;
 	}
+
+	if (pre_cache_buf)
+		ocfs2_free(&pre_cache_buf);
 }
diff --git a/fsck.ocfs2/include/dirblocks.h b/fsck.ocfs2/include/dirblocks.h
index cdd9741..7b3a2e9 100644
--- a/fsck.ocfs2/include/dirblocks.h
+++ b/fsck.ocfs2/include/dirblocks.h
@@ -44,8 +44,9 @@ typedef unsigned (*dirblock_iterator)(o2fsck_dirblock_entry *,
 errcode_t o2fsck_add_dir_block(o2fsck_dirblocks *db, uint64_t ino,
 			       uint64_t blkno, uint64_t blkcount);
 
-void o2fsck_dir_block_iterate(o2fsck_dirblocks *db, dirblock_iterator func,
-				void *priv_data);
+struct _o2fsck_state;
+void o2fsck_dir_block_iterate(struct _o2fsck_state *ost, dirblock_iterator func,
+                              void *priv_data);
 		     
 
 #endif /* __O2FSCK_DIRBLOCKS_H__ */
diff --git a/fsck.ocfs2/pass2.c b/fsck.ocfs2/pass2.c
index 2a1805e..14b1911 100644
--- a/fsck.ocfs2/pass2.c
+++ b/fsck.ocfs2/pass2.c
@@ -888,8 +888,7 @@ errcode_t o2fsck_pass2(o2fsck_state *ost)
 	if (dp)
 		dp->dp_dirent = ost->ost_fs->fs_sysdir_blkno;
 
-	o2fsck_dir_block_iterate(&ost->ost_dirblocks, pass2_dir_block_iterate, 
-			 	 &dd);
+	o2fsck_dir_block_iterate(ost, pass2_dir_block_iterate, &dd);
 	o2fsck_strings_free(&dd.strings);
 out:
 	if (dd.dirblock_buf)
-- 
1.6.3




More information about the Ocfs2-tools-devel mailing list