[Ocfs2-tools-devel] [PATCH 02/11] fsck.ocfs2: Reverse earlier commits wrt pre-caching
Sunil Mushran
sunil.mushran at oracle.com
Fri Sep 30 12:22:12 PDT 2011
This patch reverses three earlier commits. This has been done because the
information gleaned from the added statistics show a significant performance
hit when the file system metadata is larger than the cache size.
The test was run on a 2TB volume having 15M files. The cache size was 820MB.
Before:
I/O read disk/cache: 138736MB / 793MB, write: 0MB, rate: 17.46MB/s
Times real: 7989.577s, user: 430.803s, sys: 110.852s
After:
I/O read disk/cache: 68724MB / 261MB, write: 0MB, rate: 12.65MB/s
Times real: 5452.819s, user: 368.226s, sys: 73.978s
Later patches will address the performance issue using mostly pre-caching but
done slightly differently.
The patches reversed (mostly) are:
commit 7fd354d5bd63370316088267fb9832800f4c9b53
fsck.ocfs2: Pre-fill the I/O cache with metadata.
commit 69223be5af7868605c5d681ad64ccf63838f4858
fsck.ocfs2: Pre-cache inodes in reverse order.
commit 1fa5d9dea32caf99efb4e0811a48655f24938468
fsck.ocfs2: Pre-cache dirblocks before we go through them.
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
---
fsck.ocfs2/dirblocks.c | 88 ------------------------------------------------
fsck.ocfs2/pass0.c | 62 +++++-----------------------------
2 files changed, 9 insertions(+), 141 deletions(-)
diff --git a/fsck.ocfs2/dirblocks.c b/fsck.ocfs2/dirblocks.c
index 085dd1f..1e9fbb1 100644
--- a/fsck.ocfs2/dirblocks.c
+++ b/fsck.ocfs2/dirblocks.c
@@ -70,69 +70,6 @@ out:
return ret;
}
-/*
- * Go through the dirblocks pre-filling them. We try to coalesce adjacent
- * ones. Don't care to return errors, because it's a cache pre-fill.
- */
-static int try_to_cache(ocfs2_filesys *fs, struct rb_node *node,
- char *pre_cache_buf, int pre_cache_blocks)
-{
- int cached_blocks = 0;
- o2fsck_dirblock_entry *dbe;
- uint64_t io_blkno = 0, next_blkno = 0;
- int count = 0;
- errcode_t err;
- uint64_t blocks_seen = 0;
-
- o2fsck_reset_blocks_cached();
- for (; node; node = rb_next(node)) {
- blocks_seen++;
- dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
- if (io_blkno) {
- assert(count);
- assert(next_blkno > io_blkno);
-
- if ((next_blkno == dbe->e_blkno) &&
- (count < pre_cache_blocks)) {
- count++;
- next_blkno++;
- continue;
- }
-
- if (!o2fsck_worth_caching(count)) {
- io_blkno = 0;
- break;
- }
-
- err = ocfs2_read_blocks(fs, io_blkno, count,
- pre_cache_buf);
- io_blkno = 0;
- next_blkno = 0;
-
- if (err)
- break;
-
- cached_blocks += count;
- count = 0;
- }
-
- assert(!io_blkno);
- io_blkno = dbe->e_blkno;
- next_blkno = io_blkno + 1;
- count = 1;
- }
-
- /* Catch the last pre-fill buffer */
- if (io_blkno && o2fsck_worth_caching(count)) {
- assert(count);
- err = ocfs2_read_blocks(fs, io_blkno, count, pre_cache_buf);
- if (!err)
- cached_blocks += count;
- }
-
- return cached_blocks;
-}
-
uint64_t o2fsck_search_reidx_dir(struct rb_root *root, uint64_t dino)
{
struct rb_node *node = root->rb_node;
@@ -204,41 +141,16 @@ void o2fsck_dir_block_iterate(o2fsck_state *ost, dirblock_iterator func,
void *priv_data)
{
o2fsck_dirblocks *db = &ost->ost_dirblocks;
- ocfs2_filesys *fs = ost->ost_fs;
o2fsck_dirblock_entry *dbe;
struct rb_node *node;
unsigned ret;
- errcode_t err;
- char *pre_cache_buf = NULL;
- int pre_cache_blocks = ocfs2_blocks_in_bytes(fs, 1024 * 1024);
- int cached_blocks = 0;
-
- o2fsck_reset_blocks_cached();
- if (o2fsck_worth_caching(1)) {
- err = ocfs2_malloc_blocks(fs->fs_io, pre_cache_blocks,
- &pre_cache_buf);
- if (err)
- verbosef("Unable to allocate dirblock pre-cache "
- "buffer, %s\n",
- "ignoring");
- }
for (node = rb_first(&db->db_root); node; node = rb_next(node)) {
- if (!cached_blocks && pre_cache_buf)
- cached_blocks = try_to_cache(fs, node, pre_cache_buf,
- pre_cache_blocks);
-
dbe = rb_entry(node, o2fsck_dirblock_entry, e_node);
ret = func(dbe, priv_data);
if (ret & OCFS2_DIRENT_ABORT)
break;
-
- if (cached_blocks)
- cached_blocks--;
}
-
- if (pre_cache_buf)
- ocfs2_free(&pre_cache_buf);
}
static errcode_t ocfs2_rebuild_indexed_dir(ocfs2_filesys *fs, uint64_t ino)
diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c
index 3d8957e..6187d77 100644
--- a/fsck.ocfs2/pass0.c
+++ b/fsck.ocfs2/pass0.c
@@ -673,7 +673,6 @@ static errcode_t check_chain(o2fsck_state *ost,
struct ocfs2_chain_rec *chain,
char *buf1,
char *buf2,
- char *pre_cache_buf,
int *chain_changed,
ocfs2_bitmap *allowed,
ocfs2_bitmap *forbidden)
@@ -683,8 +682,6 @@ static errcode_t check_chain(o2fsck_state *ost,
uint64_t blkno;
errcode_t ret = 0;
int depth = 0, clear_ref = 0;
- int blocks_per_group = ocfs2_clusters_to_blocks(ost->ost_fs,
- cs->cs_cpg);
verbosef("free %u total %u blkno %"PRIu64"\n", chain->c_free,
chain->c_total, (uint64_t)chain->c_blkno);
@@ -739,15 +736,6 @@ static errcode_t check_chain(o2fsck_state *ost,
* the read below.. */
}
- /*
- * Pre-cache the entire group. Don't care about failure.
- * If it works, the following ocfs2_read_group_desc() will
- * get the block out of the cache.
- */
- if (pre_cache_buf)
- ocfs2_read_blocks(ost->ost_fs, blkno,
- blocks_per_group, pre_cache_buf);
-
ret = ocfs2_read_group_desc(ost->ost_fs, blkno, (char *)bg2);
if (ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC) {
if (prompt(ost, PY, PR_CHAIN_LINK_MAGIC,
@@ -855,13 +843,12 @@ out:
static errcode_t verify_chain_alloc(o2fsck_state *ost,
struct ocfs2_dinode *di,
char *buf1, char *buf2,
- char *pre_cache_buf,
ocfs2_bitmap *allowed,
ocfs2_bitmap *forbidden)
{
struct chain_state cs = {0, };
struct ocfs2_chain_list *cl;
- int i, max_count;
+ uint16_t i, max_count;
struct ocfs2_chain_rec *cr;
uint32_t free = 0, total = 0;
int changed = 0, trust_next_free = 1;
@@ -952,13 +939,7 @@ static errcode_t verify_chain_alloc(o2fsck_state *ost,
if (trust_next_free)
max_count = cl->cl_next_free_rec;
- /*
- * We walk the chains backwards for caching reasons. Basically,
- * at the end the last blocks we read will be the most recently
- * used in the cache. We want that to be the first chains,
- * especially for the inode scan, which will read forwards.
- */
- for (i = max_count - 1; i >= 0; i--) {
+ for (i = 0; i < max_count; i++) {
cr = &cl->cl_recs[i];
/* reset for each run */
@@ -966,8 +947,8 @@ static errcode_t verify_chain_alloc(o2fsck_state *ost,
.cs_chain_no = i,
.cs_cpg = cl->cl_cpg,
};
- ret = check_chain(ost, di, &cs, cr, buf1, buf2, pre_cache_buf,
- &changed, allowed, forbidden);
+ ret = check_chain(ost, di, &cs, cr, buf1, buf2, &changed,
+ allowed, forbidden);
/* XXX what? not checking ret? */
if (cr->c_blkno != 0) {
@@ -994,13 +975,12 @@ static errcode_t verify_chain_alloc(o2fsck_state *ost,
* we copy the last chain into the missing spot
* instead of shifting everyone over a spot
* to minimize the number of chains we have to
- * update. we then reset i so that we can go
- * over that chain and fix bg_chain */
+ * update */
if (i < (cl->cl_next_free_rec - 1)) {
*cr = cl->cl_recs[cl->cl_next_free_rec - 1];
memset(&cl->cl_recs[cl->cl_next_free_rec - 1],
0, sizeof(struct ocfs2_chain_rec));
- i++;
+ i--;
}
cl->cl_next_free_rec--;
@@ -1120,7 +1100,7 @@ static errcode_t verify_bitmap_descs(o2fsck_state *ost,
o2fsck_bitmap_set(allowed, blkno, NULL);
}
- ret = verify_chain_alloc(ost, di, buf1, buf2, NULL, allowed, forbidden);
+ ret = verify_chain_alloc(ost, di, buf1, buf2, allowed, forbidden);
if (ret) {
com_err(whoami, ret, "while looking up chain allocator inode "
"%"PRIu64, (uint64_t)di->i_blkno);
@@ -1275,7 +1255,6 @@ errcode_t o2fsck_pass0(o2fsck_state *ost)
uint64_t blkno;
uint32_t pre_repair_clusters;
char *blocks = NULL;
- char *pre_cache_buf = NULL;
struct ocfs2_dinode *di = NULL;
ocfs2_filesys *fs = ost->ost_fs;
ocfs2_cached_inode **ci;
@@ -1302,27 +1281,6 @@ errcode_t o2fsck_pass0(o2fsck_state *ost)
}
di = (struct ocfs2_dinode *)blocks;
- /*
- * We also allocate a pre-cache buffer of 4MB for reading entire
- * suballocator groups. Some blocksizes have smaller groups, but
- * none have larger (see
- * libocfs2/alloc.c:ocfs2_clusters_per_group()). This allows
- * us to pre-fill the I/O cache; we're already reading the group
- * descriptor, so slurping the whole thing shouldn't hurt.
- *
- * If this allocation fails, we just ignore it. It's a cache.
- */
- o2fsck_reset_blocks_cached();
- if (o2fsck_worth_caching(1)) {
- ret = ocfs2_malloc_blocks(fs->fs_io,
- ocfs2_blocks_in_bytes(fs, 4 * 1024 * 1024),
- &pre_cache_buf);
- if (ret)
- verbosef("Unable to allocate group pre-cache "
- "buffer, %s\n",
- "ignoring");
- }
-
ret = ocfs2_malloc0(max_slots * sizeof(ocfs2_cached_inode *),
&ost->ost_inode_allocs);
if (ret) {
@@ -1456,7 +1414,7 @@ retry_bitmap:
blocks + ost->ost_fs->fs_blocksize,
blocks +
(ost->ost_fs->fs_blocksize * 2),
- pre_cache_buf, NULL, NULL);
+ NULL, NULL);
/* XXX maybe helped by the alternate super block */
if (ret)
@@ -1515,7 +1473,7 @@ retry_bitmap:
blocks + ost->ost_fs->fs_blocksize,
blocks +
(ost->ost_fs->fs_blocksize * 2),
- pre_cache_buf, NULL, NULL);
+ NULL, NULL);
/* XXX maybe helped by the alternate super block */
if (ret)
@@ -1527,8 +1485,6 @@ retry_bitmap:
o2fsck_add_resource_track(&ost->ost_rt, &rt);
out:
- if (pre_cache_buf)
- ocfs2_free(&pre_cache_buf);
if (blocks)
ocfs2_free(&blocks);
if (ret)
--
1.7.4.1
More information about the Ocfs2-tools-devel
mailing list