[Ocfs2-tools-devel] [PATCH 3/4] Defrag directories
Goldwyn Rodrigues
rgoldwyn at gmail.com
Wed Mar 17 18:28:39 PDT 2010
Directories with holes left be deleted entries are defragged.
Algorithm:
1. Read first extent and coalesce entries.
2. Maintain a sliding window of extents, read the next extent
and move dirents from last read to the first extent.
3. Truncate the directory file to the computed size.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
---
defrag.ocfs2/Makefile | 2 +-
defrag.ocfs2/defrag.c | 18 ++-
defrag.ocfs2/dir.c | 335 +++++++++++++++++++++++++++++++++++++++++
defrag.ocfs2/include/defrag.h | 3 +
4 files changed, 355 insertions(+), 3 deletions(-)
create mode 100644 defrag.ocfs2/dir.c
diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
index 8bc48e0..ed0bc37 100644
--- a/defrag.ocfs2/Makefile
+++ b/defrag.ocfs2/Makefile
@@ -13,7 +13,7 @@ LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
CFLAGS += -g
-CFILES = defrag.c alloc.c
+CFILES = defrag.c alloc.c dir.c
HFILES = include/defrag.h
diff --git a/defrag.ocfs2/defrag.c b/defrag.ocfs2/defrag.c
index 22b71d7..4a5ad7e 100644
--- a/defrag.ocfs2/defrag.c
+++ b/defrag.ocfs2/defrag.c
@@ -48,6 +48,7 @@ static errcode_t check_superblock(struct defrag_state *dst)
printf("The superblock max_slots field is set to 0.\n");
ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
}
+ dst->fs_generation = di->i_fs_generation;
return ret;
}
@@ -269,14 +270,27 @@ int main(int argc, char **argv)
printf("Pass 1: Clearing unused groups in Allocators\n");
ret = allocators_clear_unused(dst);
if (ret) {
- fprintf(stderr, "Error while clearing unused inode groups\n"
- "Please execute fsck.ocfs2\n");
retval |= DEFRAG_ERROR;
+ goto error;
+ }
+
+ printf("Pass 2: Defrag directories\n");
+ ret = defrag_dirs(dst);
+ if (ret) {
+ retval |= DEFRAG_ERROR;
+ goto error;
}
ocfs2_write_super(dst->dst_fs);
ocfs2_close(dst->dst_fs);
+ printf("All passes successfully completed.\n");
out:
return retval;
+
+error:
+ fprintf(stderr, "Error while defragging filesystem. Please execute"
+ "fsck.ocfs2\n");
+ return retval;
+
}
diff --git a/defrag.ocfs2/dir.c b/defrag.ocfs2/dir.c
new file mode 100644
index 0000000..739d9da
--- /dev/null
+++ b/defrag.ocfs2/dir.c
@@ -0,0 +1,335 @@
+#include "defrag.h"
+#include <inttypes.h>
+
+#define MAX_DIR_EXTENTS 256 /* Maximum window size */
+
+/*Returns the write offset of the buffer */
+static int coalesce_dirents(ocfs2_filesys *fs, char *buf,
+ int offset, int end, struct ocfs2_dir_entry **p)
+{
+ struct ocfs2_dir_entry *dirent = NULL, *prev = NULL;
+ int s_rec_len, write_off = offset, bs, next_blk_lim;
+ next_blk_lim = bs = fs->fs_blocksize;
+
+ while (offset < end) {
+ dirent = (struct ocfs2_dir_entry *)(buf + offset);
+ if (dirent->inode == 0) {
+ offset += dirent->rec_len;
+ continue;
+ }
+ /* Set r_offset to next block start for corrupted blocks*/
+ if (dirent->rec_len <= 0) {
+ offset = ((offset + bs - 1) / bs) * bs;
+ next_blk_lim = offset + bs;
+ continue;
+ }
+
+ if (prev && prev->inode) {
+ prev->rec_len = OCFS2_DIR_REC_LEN(prev->name_len);
+ write_off += prev->rec_len;
+
+ /* If doing multi-block - check if moving dirent
+ will move it to block boundary
+ */
+ if (write_off + OCFS2_DIR_REC_LEN(dirent->name_len)
+ > next_blk_lim) {
+ prev->rec_len += next_blk_lim - write_off;
+ write_off = next_blk_lim;
+ next_blk_lim += bs;
+ verbosef("Reset - off: %u write:%u "
+ "next_blk:%u d:%u/%u p:%u\n",
+ offset, write_off, next_blk_lim,
+ dirent->name_len, dirent->rec_len,
+ prev->rec_len);
+ }
+
+ }
+ s_rec_len = dirent->rec_len;
+ if (write_off < offset) {
+
+ memmove(buf + write_off, buf + offset,
+ OCFS2_DIR_REC_LEN(dirent->name_len));
+ dirent = (struct ocfs2_dir_entry *)(buf + write_off);
+ }
+ verbosef("off: %u w:%u nb:%u d:%u/%u %.*s %llu\n",
+ offset, write_off, next_blk_lim,
+ dirent->name_len, dirent->rec_len,
+ dirent->name_len, dirent->name, dirent->inode);
+ offset += s_rec_len;
+ prev = dirent;
+ }
+ write_off += OCFS2_DIR_REC_LEN(dirent->name_len);
+ memset(buf + write_off, 0, end - write_off);
+ dirent->rec_len = end - write_off +
+ OCFS2_DIR_REC_LEN(dirent->name_len);
+ *p = prev;
+ return write_off;
+}
+
+
+struct defrag_dir_context {
+ struct ocfs2_extent_rec *er[MAX_DIR_EXTENTS];
+ char *buf[MAX_DIR_EXTENTS];
+ int r_offset;
+ int w_offset;
+ int ext_count;
+ struct ocfs2_dir_entry *prev;
+ uint64_t dirsize;
+};
+
+enum move_rets {
+ READER_EXHAUSTED = 0,
+ WRITER_EXHAUSTED
+};
+
+static enum move_rets move_dirents(ocfs2_filesys *fs,
+ struct defrag_dir_context *c)
+{
+ struct ocfs2_dir_entry *dirent = NULL;
+ int next_blk_lim, bs, len, idx, r_end, w_end;
+ char *rbuf, *wbuf;
+ bs = fs->fs_blocksize;
+ next_blk_lim = ((c->w_offset + bs - 1)/bs) * bs;
+
+ idx = c->ext_count - 1;
+ r_end = ocfs2_clusters_to_bytes(fs, c->er[idx]->e_leaf_clusters);
+ w_end = ocfs2_clusters_to_bytes(fs, c->er[0]->e_leaf_clusters);
+ rbuf = c->buf[idx];
+ wbuf = c->buf[0];
+
+ while (c->r_offset < r_end) {
+ dirent = (struct ocfs2_dir_entry *)(rbuf + c->r_offset);
+ if (dirent->rec_len <= 0) {
+ /* Set r_offset to next block start */
+ c->r_offset = ((c->r_offset + bs) / bs) * bs;
+ continue;
+ }
+ if (dirent->inode == 0)
+ goto next;
+
+
+ c->prev->rec_len = OCFS2_DIR_REC_LEN(c->prev->name_len);
+ len = OCFS2_DIR_REC_LEN(dirent->name_len);
+ if (c->w_offset + len > w_end) {
+ c->prev->rec_len += next_blk_lim - c->w_offset;
+ return WRITER_EXHAUSTED;
+ }
+
+ /* Make sure a block limit does not cut across a
+ dirent */
+ if (c->w_offset + len > next_blk_lim) {
+ c->prev->rec_len += next_blk_lim - c->w_offset;
+ c->w_offset = next_blk_lim;
+ next_blk_lim += bs;
+ }
+
+ verbosef("w %d/%d r %d/%d %.*s i %lu s %lu\n",
+ c->w_offset, w_end,
+ c->r_offset, r_end, dirent->name_len,
+ dirent->name, (uint64_t)dirent->inode,
+ c->dirsize);
+ memmove((wbuf + c->w_offset), (rbuf + c->r_offset),
+ dirent->rec_len);
+ c->prev = (struct ocfs2_dir_entry *)(wbuf + c->w_offset);
+ c->w_offset += len;
+ c->dirsize += len;
+next:
+ c->r_offset += dirent->rec_len;
+ }
+ return READER_EXHAUSTED;
+}
+
+#ifdef DEBUG
+static void print_dirents(char *buf, int end)
+{
+ struct ocfs2_dir_entry *dirent;
+ int off = 0;
+
+ while (off < end) {
+ dirent = (struct ocfs2_dir_entry *)(buf+off);
+ verbosef("o %d/%d %.*s i:%llu\n", off, end,
+ dirent->name_len, dirent->name, dirent->inode);
+ if (!dirent->inode)
+ break;
+ off += dirent->rec_len;
+ }
+
+}
+#endif
+
+static int defrag_dir(ocfs2_filesys *fs,
+ struct ocfs2_extent_rec *rec,
+ int tree_depth, uint32_t ccount, uint64_t ref_blkno,
+ int ref_recno, void *priv_data)
+{
+ struct defrag_dir_context *c = (struct defrag_dir_context *)priv_data;
+ errcode_t ret = 0;
+ int i = c->ext_count, nblks, end;
+ enum move_rets m;
+
+ verbosef("Reading extent %llu w:%d r:%d\n", rec->e_blkno, c->w_offset,
+ c->r_offset);
+
+ c->r_offset = 0;
+ /* allocate/copy because underlying buffer can be freed */
+ ret = ocfs2_malloc(sizeof(struct ocfs2_extent_rec), &c->er[i]);
+ if (ret)
+ goto fail;
+ memcpy(c->er[i], rec, sizeof(struct ocfs2_extent_rec));
+ nblks = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters);
+ ret = ocfs2_malloc_blocks(fs->fs_io, nblks, &c->buf[i]);
+ if (ret)
+ goto fail;
+ ret = ocfs2_read_blocks(fs, c->er[i]->e_blkno, nblks, c->buf[i]);
+ if (ret)
+ goto fail;
+ c->ext_count++;
+
+ if (!i) {
+ /* First extent, just coalesce and return*/
+ end = nblks * fs->fs_blocksize;
+ c->w_offset = c->dirsize =
+ coalesce_dirents(fs, c->buf[0], 0, end, &c->prev);
+ verbosef("w_offset:%u dirsize %lu\n", c->w_offset, c->dirsize);
+ return 0;
+ }
+
+ do {
+ m = move_dirents(fs, c);
+ if (m == READER_EXHAUSTED)
+ return 0;
+ /* else m==WRITER_EXHAUSTED */
+ nblks = ocfs2_clusters_to_blocks(fs, c->er[0]->e_leaf_clusters);
+#ifdef DEBUG
+ print_dirents(c->buf[0],
+ ocfs2_clusters_to_bytes(fs, c->er[0]->e_leaf_clusters));
+#endif
+ io_write_block(fs->fs_io, c->er[0]->e_blkno, nblks, c->buf[0]);
+ verbosef("Writing extent %llu\n", c->er[0]->e_blkno);
+ ocfs2_free(&c->buf[0]);
+ ocfs2_free(&c->er[0]);
+ /* shift window */
+ for (i = 0; i < c->ext_count-1; i++) {
+ c->er[i] = c->er[i+1];
+ c->buf[i] = c->buf[i+1];
+ }
+ c->w_offset = 0;
+ c->ext_count--;
+ } while (m == WRITER_EXHAUSTED);
+
+ return 0;
+fail:
+ if (c->buf[i])
+ ocfs2_free(&c->buf[i]);
+ if (c->er[i])
+ ocfs2_free(&c->er[i]);
+ return OCFS2_EXTENT_ABORT;
+}
+
+errcode_t defrag_dirs(struct defrag_state *dst)
+{
+ ocfs2_inode_scan *scan;
+ ocfs2_filesys *fs = dst->dst_fs;
+ uint64_t blkno;
+ errcode_t ret = 0;
+ char *buf = NULL;
+ struct ocfs2_dinode *di;
+ int offset, end, blockbits, i, nblks, bs;
+ struct defrag_dir_context dc;
+ blockbits = OCFS2_RAW_SB(dst->dst_fs->fs_super)->s_blocksize_bits;
+ bs = fs->fs_blocksize;
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ ret = ocfs2_open_inode_scan(fs, &scan);
+ if (ret)
+ goto out;
+
+ di = (struct ocfs2_dinode *)buf;
+ memset(&dc, 0, sizeof(struct defrag_dir_context));
+ for (;;) {
+ ret = ocfs2_get_next_inode(scan, &blkno, buf);
+
+ if (blkno == 0)
+ break;
+
+ if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
+ strlen(OCFS2_INODE_SIGNATURE)))
+ continue;
+
+ ocfs2_swap_inode_to_cpu(fs, di);
+
+ if (di->i_fs_generation != dst->fs_generation)
+ continue;
+
+ if (!(di->i_flags & OCFS2_VALID_FL) ||
+ (di->i_flags & OCFS2_SYSTEM_FL))
+ continue;
+ verbosef("Inode %"PRIu64" blkno %"PRIu64" flags:0x%x "
+ "dyn_flags:0x%x\n",
+ (uint64_t)di->i_blkno, blkno,
+ di->i_flags, di->i_dyn_features);
+ dst->num_inodes++;
+ if (S_ISDIR(di->i_mode)) {
+ if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) {
+ struct ocfs2_dir_entry *prev = NULL;
+ offset = offsetof(struct ocfs2_dinode,
+ id2.i_data.id_data);
+ end = fs->fs_blocksize;
+ coalesce_dirents(fs, buf, offset, end, &prev);
+ ocfs2_write_inode(fs, blkno, (char *)di);
+ continue;
+ }
+ memset(&dc, 0, sizeof(struct defrag_dir_context));
+ ret = ocfs2_extent_iterate_inode(dst->dst_fs, di,
+ OCFS2_EXTENT_FLAG_DATA_ONLY, NULL,
+ defrag_dir, &dc);
+ if (ret)
+ goto out;
+
+ end = ocfs2_clusters_to_bytes(fs,
+ dc.er[0]->e_leaf_clusters);
+ memset(dc.buf[0] + dc.w_offset, 0, end - dc.w_offset);
+
+ /*Set the last prev */
+ end = ((dc.w_offset + bs - 1) / bs) * bs;
+ verbosef("w_off %u end %u bb:%d l %d i %llu\n",
+ dc.w_offset, end, blockbits,
+ dc.prev->rec_len, dc.prev->inode);
+ dc.prev->rec_len = end - dc.w_offset +
+ OCFS2_DIR_REC_LEN(dc.prev->name_len);
+ nblks = ocfs2_clusters_to_blocks(fs,
+ dc.er[0]->e_leaf_clusters);
+ io_write_block(fs->fs_io, dc.er[0]->e_blkno,
+ nblks, dc.buf[0]);
+
+ /*Making sure all the rest are set to zeros*/
+ for (i = 1; i < dc.ext_count; i++) {
+ nblks = ocfs2_clusters_to_blocks(fs,
+ dc.er[i]->e_leaf_clusters);
+ end = ocfs2_clusters_to_bytes(fs,
+ dc.er[i]->e_leaf_clusters);
+ verbosef("Clearing i: %d end %u\n", i, end);
+ memset(dc.buf[i], 0, end);
+ io_write_block(fs->fs_io, dc.er[i]->e_blkno,
+ nblks, dc.buf[i]);
+ ocfs2_free(&dc.buf[i]);
+ ocfs2_free(&dc.er[i]);
+ }
+
+ dc.dirsize = ((dc.dirsize + bs - 1) / bs) * bs;
+ verbosef("Truncating %lu from %lu to %lu\n",
+ (uint64_t)di->i_blkno, (uint64_t)di->i_size,
+ dc.dirsize);
+ if (dc.dirsize < di->i_size)
+ /* XXX can we call truncate if the inode is
+ read in memory */
+ ocfs2_truncate(dst->dst_fs, di->i_blkno,
+ dc.dirsize);
+ }
+ }
+out:
+ if (scan)
+ ocfs2_close_inode_scan(scan);
+ ocfs2_free(&buf);
+ return ret;
+}
+
diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
index e845893..08b12c7 100644
--- a/defrag.ocfs2/include/defrag.h
+++ b/defrag.ocfs2/include/defrag.h
@@ -33,6 +33,8 @@
struct defrag_state {
ocfs2_filesys *dst_fs;
+ uint32_t fs_generation;
+ int num_inodes;
};
extern int verbose;
@@ -43,6 +45,7 @@ extern int verbose;
errcode_t allocators_clear_unused(struct defrag_state *);
+errcode_t defrag_dirs(struct defrag_state *dst);
#endif /* __OCFS2_DEFRAG_H__ */
--
1.6.4.2
--
Goldwyn
More information about the Ocfs2-tools-devel
mailing list