[Ocfs2-tools-devel] [PATCH 4/4] defrag.ocfs2: Pass 3: Defrag filesystem
Goldwyn Rodrigues
rgoldwyn at gmail.com
Tue May 11 21:02:45 PDT 2010
Defrags the filesystem. For each group_desc, scan for holes.
If a hole is found, try and find extent rec which fits the hole best
and is greater than the block number of the hole. Move the data in
the hole.
TODO:
* If a good fit is not found for a group_desc, try to shift
the data to the top of the group_desc and try again.
* Currently, the extents scanned are from inodes with depths=0 only.
This must be changed to scan inodes greater than zero-depth.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
---
defrag.ocfs2/fs.c | 602 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 602 insertions(+), 0 deletions(-)
create mode 100644 defrag.ocfs2/fs.c
diff --git a/defrag.ocfs2/fs.c b/defrag.ocfs2/fs.c
new file mode 100644
index 0000000..86fccbf
--- /dev/null
+++ b/defrag.ocfs2/fs.c
@@ -0,0 +1,602 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * fs.c
+ *
+ * Copyright (C) 2010 Oracle. All rights reserved.
+
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include "defrag.h"
+#include <inttypes.h>
+#include "ocfs2/bitops.h"
+
+extern char *whoami;
+
+#ifdef DEBUG
+static void print_gd(struct ocfs2_group_desc *gd)
+{
+ int i, c;
+ printf("gd blkno %"PRIu64" size %d bits %d free:%d\n",
+ (uint64_t)gd->bg_blkno,
+ gd->bg_size, gd->bg_bits, gd->bg_free_bits_count);
+ for (i=0; i<gd->bg_size; i+=4) {
+ c = ((int) gd->bg_bitmap[i] << 24) +
+ ((int)gd->bg_bitmap[i+1] << 16) +
+ ((int)gd->bg_bitmap[i+2] << 8) +
+ (int)gd->bg_bitmap[i+3];
+ printf ("%9x", c);
+ if (!(i%32))
+ printf(" %"PRIu64"\n", (uint64_t)gd->bg_blkno + i*8);
+ }
+ printf("\n");
+}
+#endif
+
+/* A simple selection sort for now */
+static void sort_gds(struct defrag_state *dst)
+{
+ int i, j, n=dst->num_gds;
+ struct ocfs2_group_desc **gd = dst->gds;
+ struct ocfs2_group_desc *t;
+
+ for (i=0; i<n-1; i++)
+ for (j=i+1; j<n; j++)
+ if (gd[i]->bg_blkno > gd[j]->bg_blkno) {
+ t = gd[i];
+ gd[i]=gd[j];
+ gd[j] = t;
+ }
+}
+
+static errcode_t cr_read_gds(struct defrag_state *dst,
+ struct ocfs2_chain_rec *cr)
+{
+ ocfs2_filesys *fs = dst->dst_fs;
+ uint64_t blkno = 0;
+ errcode_t ret;
+ struct ocfs2_group_desc *gd = NULL;
+ char *buf = NULL;
+
+ verbosef("free %d tot %d\n", cr->c_free, cr->c_total);
+
+ blkno = cr->c_blkno;
+ while (blkno) {
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret) {
+ com_err(whoami, ret, "while allocating block\n");
+ goto out;
+ }
+ verbosef("Reading gd[%d] %"PRIu64"\n", dst->num_gds,
+ blkno);
+ ret = ocfs2_read_group_desc(fs, blkno, buf);
+ if (ret) {
+ com_err(whoami, ret, "while reading group\n");
+ goto out;
+ }
+ gd = (struct ocfs2_group_desc *)buf;
+ dst->gds[dst->num_gds++] = gd;
+ blkno = gd->bg_next_group;
+ }
+out:
+ return ret;
+}
+
+
+/*Read all gds and store in the dst structure */
+static errcode_t read_gds(struct defrag_state *dst)
+{
+ ocfs2_filesys *fs = dst->dst_fs;
+ uint64_t blkno;
+ struct ocfs2_dinode *di;
+ struct ocfs2_chain_list *cl;
+ char *buf;
+ int i, n;
+ errcode_t ret = 0;
+
+ ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE,
+ 0, &blkno);
+ if (ret) {
+ com_err(whoami, ret, "while looking up system inode\n");
+ return ret;
+ }
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret) {
+ com_err(whoami, ret, "while allocating block\n");
+ return ret;
+ }
+ ret = ocfs2_read_inode(fs, blkno, buf);
+ if (ret) {
+ com_err(whoami, ret, "while reading inode\n");
+ ocfs2_free(&buf);
+ return ret;
+ }
+
+ di = (struct ocfs2_dinode *)buf;
+ cl = &di->id2.i_chain;
+ dst->max_gds = di->i_clusters / cl->cl_cpg;
+ dst->max_gds++;
+ ret = ocfs2_malloc(sizeof(char *) * dst->max_gds, &dst->gds);
+ if (ret) {
+ com_err(whoami, ret, "while allocating block\n");
+ return ret;
+ }
+ memset(dst->gds, 0, sizeof(char *) * dst->max_gds);
+ dst->num_gds = 0;
+
+ n = ocfs2_chain_recs_per_inode(fs->fs_blocksize);
+ n--;
+ while ((cl->cl_recs[n].c_blkno==0) && n>0)
+ n--;
+
+ verbosef("Total gds: %d recs: %d\n", dst->max_gds, n);
+
+ for(i=0; i<=n; i++) {
+ struct ocfs2_chain_rec *cr = &cl->cl_recs[i];
+ ret = cr_read_gds(dst, cr);
+ if (ret) {
+ com_err(whoami, ret, "while reading group descs\n");
+ return ret;
+ }
+ }
+
+ dst->global_bitmap_inode = di;
+ return 0;
+}
+
+static errcode_t get_num_inodes(struct defrag_state *dst)
+{
+ ocfs2_inode_scan *scan;
+ ocfs2_filesys *fs = dst->dst_fs;
+ uint64_t blkno;
+ errcode_t ret = 0;
+ char *buf = NULL;
+ struct ocfs2_dinode *di;
+
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret) {
+ com_err(whoami, ret, "while allocating block\n");
+ goto out;
+ }
+
+ ret = ocfs2_open_inode_scan(fs, &scan);
+ if (ret) {
+ com_err(whoami, ret, "while scanning for inodes\n");
+ goto out;
+ }
+ ret = ocfs2_malloc(sizeof(struct ocfs2_dinode *) * dst->num_inodes,
+ &dst->inodes);
+ if (ret) {
+ com_err(whoami, ret, "while allocating block\n");
+ goto out_close_scan;
+ }
+ memset(dst->inodes, 0, sizeof(void *) * dst->num_inodes);
+
+ di = (struct ocfs2_dinode *)buf;
+ for (;;) {
+ ret = ocfs2_get_next_inode(scan, &blkno, buf);
+ if (ret) {
+ com_err(whoami, ret, "while getting next inode\n");
+ goto out_close_scan;
+ }
+
+ if (blkno == 0)
+ break;
+
+ if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
+ strlen(OCFS2_INODE_SIGNATURE)))
+ continue;
+
+ ocfs2_swap_inode_to_cpu(fs, di);
+
+ if (di->i_fs_generation != dst->fs_generation)
+ continue;
+
+ if (!(di->i_flags & OCFS2_VALID_FL) ||
+ (di->i_flags & OCFS2_SYSTEM_FL))
+ continue;
+
+ dst->num_inodes++;
+ }
+
+out_close_scan:
+ ocfs2_close_inode_scan(scan);
+out:
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static errcode_t read_inodes(struct defrag_state *dst)
+{
+ ocfs2_inode_scan *scan;
+ ocfs2_filesys *fs = dst->dst_fs;
+ uint64_t blkno;
+ errcode_t ret = 0;
+ char *buf = NULL, *tbuf = NULL;
+ struct ocfs2_dinode *di;
+ int ninodes = 0;
+
+
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret) {
+ com_err(whoami, ret, "while allocating block\n");
+ return ret;
+ }
+
+ ret = ocfs2_open_inode_scan(fs, &scan);
+ if (ret) {
+ com_err(whoami, ret, "while scanning for inodes\n");
+ goto out;
+ }
+ ret = ocfs2_malloc(sizeof(struct ocfs2_dinode *) * dst->num_inodes,
+ &dst->inodes);
+ if (ret)
+ goto out_close_scan;
+ memset(dst->inodes, 0, sizeof(void *) * dst->num_inodes);
+
+
+ di = (struct ocfs2_dinode *)buf;
+
+ for (;;) {
+ ret = ocfs2_get_next_inode(scan, &blkno, buf);
+ if (ret)
+ goto out_close_scan;
+
+ if (blkno == 0)
+ break;
+
+ if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
+ strlen(OCFS2_INODE_SIGNATURE)))
+ continue;
+
+ ocfs2_swap_inode_to_cpu(fs, di);
+
+ if (di->i_fs_generation != dst->fs_generation)
+ continue;
+
+ if (!(di->i_flags & OCFS2_VALID_FL) ||
+ (di->i_flags & OCFS2_SYSTEM_FL))
+ continue;
+
+ verbosef("Inode %"PRIu64" blkno %"PRIu64" flags:0x%x "
+ "dyn_flags:0x%x\n",
+ (uint64_t)di->i_blkno, (uint64_t)blkno,
+ di->i_flags, di->i_dyn_features);
+
+ ret = ocfs2_malloc_block(fs->fs_io, &tbuf);
+ if (ret)
+ goto out_close_scan;
+ memcpy(tbuf, buf, fs->fs_blocksize);
+ dst->inodes[ninodes++] = (struct ocfs2_dinode *) tbuf;
+ if (ninodes>dst->num_inodes) {
+ fprintf(stderr, "Incorrect number of inodes "
+ "found in previous pass.\n");
+ goto out_close_scan;
+ }
+ }
+
+out_close_scan:
+ ocfs2_close_inode_scan(scan);
+out:
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static struct ocfs2_extent_rec *find_di(struct defrag_state *dst,
+ uint64_t blkno, struct ocfs2_dinode **di)
+{
+ struct ocfs2_dinode **inode = dst->inodes;
+ struct ocfs2_extent_rec *rec, *er = NULL;
+ struct ocfs2_extent_list *el;
+ int i, j;
+
+ verbosef("Finding di associated with %"PRIu64"\n", blkno);
+ for (i=dst->num_inodes-1; i>=0; i--) {
+ el = &inode[i]->id2.i_list;
+ if (el->l_tree_depth)
+ continue;
+ for (j=0; j<el->l_count; j++) {
+ rec = &el->l_recs[j];
+ if (rec->e_blkno == blkno) {
+ er = rec;
+ break;
+ }
+ }
+ }
+ return er;
+}
+
+static struct ocfs2_extent_rec *find_best_fit_extent(
+ struct defrag_state *dst, int size,
+ uint64_t blkno, struct ocfs2_dinode **di)
+{
+ struct ocfs2_dinode **inode = dst->inodes;
+ struct ocfs2_extent_rec *rec, *er = NULL;
+ struct ocfs2_extent_list *el;
+ int i, j, nb, cblks = -1;
+ *di = NULL;
+
+ for (i=dst->num_inodes-1; i>=0; i--) {
+ el = &inode[i]->id2.i_list;
+ /* XXX - temporarily work only on inodes of depth 0 */
+ if (el->l_tree_depth)
+ continue;
+ for(j=0; j<el->l_count; j++) {
+ rec = &el->l_recs[j];
+ nb = ocfs2_clusters_to_blocks(dst->dst_fs,
+ rec->e_leaf_clusters);
+ if ((rec->e_blkno > blkno) && (nb <= size) &&
+ ((cblks==-1) || (cblks < nb))) {
+ *di = inode[i];
+ if (nb == size)
+ return rec;
+ er = rec;
+ cblks = nb;
+ }
+ }
+ }
+ return er;
+}
+
+
+static struct ocfs2_group_desc *find_gd(struct defrag_state *dst,
+ uint64_t blkno)
+{
+ struct ocfs2_group_desc **gd = dst->gds;
+ int i;
+ uint64_t end;
+ verbosef("Finding gd corresponding to %"PRIu64"\n", blkno);
+ for (i = dst->num_gds - 1; i>=0; i--) {
+ end = gd[i]->bg_blkno +
+ ocfs2_clusters_to_blocks(dst->dst_fs, gd[i]->bg_bits);
+ if (gd[i] && (blkno > gd[i]->bg_blkno) &&
+ (blkno < end))
+ return gd[i];
+ }
+ return NULL;
+}
+
+
+
+
+static errcode_t gd_bitmap_set_value(struct defrag_state *dst,
+ struct ocfs2_group_desc *gd,
+ int start, int len, int value)
+{
+ int i;
+ struct ocfs2_chain_list *cl = &dst->global_bitmap_inode->id2.i_chain;
+ struct ocfs2_chain_rec *cr;
+
+ for (i=start; i<start+len; i++)
+ if (value)
+ ocfs2_set_bit(i, gd->bg_bitmap);
+ else
+ ocfs2_clear_bit(i, gd->bg_bitmap);
+
+
+ i = gd->bg_chain;
+ cr = &cl->cl_recs[i];
+ if (value) {
+ cr->c_free-=len;
+ gd->bg_free_bits_count-=len;
+ }
+ else {
+ cr->c_free+=len;
+ gd->bg_free_bits_count+=len;
+ }
+ return ocfs2_write_group_desc(dst->dst_fs, gd->bg_blkno, (char *)gd);
+}
+
+
+static errcode_t gd_set_bits(struct defrag_state *dst, struct
ocfs2_group_desc *gd,
+ int start, int len)
+{
+ return gd_bitmap_set_value(dst, gd, start, len, 1);
+}
+
+
+static errcode_t gd_clear_bits(struct defrag_state *dst,
+ struct ocfs2_group_desc *gd,
+ int start, int len)
+{
+ return gd_bitmap_set_value(dst, gd, start, len, 0);
+}
+
+
+static errcode_t move_extent(struct defrag_state *dst,
+ struct ocfs2_group_desc *from_gd,
+ struct ocfs2_group_desc *to_gd, struct
+ ocfs2_extent_rec *rec, int to_offset)
+{
+ char *buf;
+ errcode_t ret;
+ uint64_t from = rec->e_blkno, to;
+ uint64_t first_gd_blkno =
+ (OCFS2_RAW_SB(dst->dst_fs->fs_super))->s_first_cluster_group;
+ ocfs2_filesys *fs = dst->dst_fs;
+ int len = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters);
+
+ if (to_gd->bg_blkno == first_gd_blkno)
+ to = to_offset;
+ else
+ to = to_offset + to_gd->bg_blkno;
+ verbosef("Moving %d blocks from %"PRIu64" to %"PRIu64"\n",
+ len, from, to);
+
+ ret = ocfs2_malloc_blocks(fs->fs_io, len, &buf);
+ if (ret)
+ return ret;
+ ret = ocfs2_read_blocks(fs, from, len, buf);
+ if (ret)
+ goto out;
+ ret = io_write_block(fs->fs_io, to, len, buf);
+ if (ret)
+ goto out;
+
+ /* First clear and then set, to accomodate same gds.
+ XXX If any of the following fails and to/from gds are same
+ it will lead to data corruption.
+ */
+ ret = gd_clear_bits(dst, from_gd,
+ ocfs2_blocks_to_clusters(fs, from - from_gd->bg_blkno),
+ rec->e_leaf_clusters);
+ if (ret)
+ goto out;
+ ret = gd_set_bits(dst, to_gd,
+ ocfs2_blocks_to_clusters(fs, to_offset),
+ rec->e_leaf_clusters);
+ rec->e_blkno = to;
+out:
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static errcode_t defrag_gd(struct defrag_state *dst,
+ struct ocfs2_group_desc *gd)
+{
+ int start = 0, end = 0, changed = 0, bstart, bend, last_hole=0;
+ struct ocfs2_extent_rec *er;
+ struct ocfs2_dinode *di;
+ struct ocfs2_group_desc *from_gd;
+ errcode_t ret = 0;
+
+ while (end < gd->bg_bits) {
+ start = ocfs2_find_next_bit_clear (gd->bg_bitmap,
+ gd->bg_bits, end);
+
+ if (start > gd->bg_bits)
+ break;
+
+ end = ocfs2_find_next_bit_set(gd->bg_bitmap, gd->bg_bits, start);
+
+ if (end > gd->bg_bits)
+ break;
+
+ bstart = ocfs2_clusters_to_blocks(dst->dst_fs, start);
+ bend = ocfs2_clusters_to_blocks(dst->dst_fs, end);
+ verbosef("gd %"PRIu64" start %d end %d bstart %d bend %d\n",
+ (uint64_t) gd->bg_blkno, start, end,
+ bstart, bend);
+
+ /* Avoid best fit if last_hole is equal to current one */
+
+ if (last_hole >= bend - bstart)
+ goto no_best_fit;
+
+ er = NULL;
+ di = NULL;
+ er = find_best_fit_extent(dst, bend - bstart,
+ gd->bg_blkno + bstart, &di);
+
+ if (er) {
+ from_gd = find_gd(dst, er->e_blkno);
+ if (from_gd) {
+ ret = move_extent(dst, from_gd, gd, er,
+ bstart);
+ if (ret)
+ break;
+ /* XXX For efficiency, try to move
+ * the rest of the file to
+ * this gd, if space allows */
+ ret = ocfs2_write_inode(dst->dst_fs,
+ di->i_blkno , (char *) di);
+ if (ret)
+ break;
+ changed = 1;
+ last_hole = 0;
+ }
+ continue;
+ }
+
+no_best_fit:
+ if (end >= gd->bg_bits)
+ break;
+
+ /* Best fit not found. Move the following extent to cover the
+ * hole */
+ er = NULL;
+ di = NULL;
+ er = find_di(dst, bend, &di);
+ if (er) {
+ ret = move_extent(dst, gd, gd, er, bstart);
+ if (ret)
+ break;
+ ret = ocfs2_write_inode(dst->dst_fs, di->i_blkno,
+ (char *)di);
+ if (ret)
+ break;
+ changed = 1;
+ last_hole = bend - bstart;
+ } else
+ verbosef("%"PRIu64" not found\n", bend);
+
+ }
+ verbosef("Writing gd %"PRIu64"\n", (uint64_t)gd->bg_blkno);
+ if (changed)
+ ret = ocfs2_write_group_desc(dst->dst_fs,
+ gd->bg_blkno, (char *)gd);
+ return ret;
+}
+
+static errcode_t defrag_gds(struct defrag_state *dst)
+{
+ int i;
+ errcode_t ret = 0;
+ struct ocfs2_group_desc **gd = dst->gds;
+
+ for (i=0; i<dst->num_gds; i++)
+ if (gd[i]->bg_free_bits_count != 0)
+ ret = defrag_gd(dst, gd[i]);
+ return ret;
+}
+
+
+errcode_t defrag_fs(struct defrag_state *dst)
+{
+ errcode_t ret = read_gds(dst);
+ sort_gds(dst);
+#ifdef DEBUG
+ int i;
+ for (i=0; i<dst->num_gds; i++)
+ print_gd(dst->gds[i]);
+#endif
+ /* We did not run the previous passes, so get the num_inodes */
+ if (!dst->num_inodes) {
+ ret = get_num_inodes(dst);
+ if (ret) {
+ com_err(whoami, ret, "while getting number of inodes\n");
+ goto out;
+ }
+ }
+ ret = read_inodes(dst);
+ if (ret) {
+ com_err(whoami, ret, "while reading indoes\n");
+ goto out;
+ }
+
+ ret = defrag_gds(dst);
+ if (ret) {
+ com_err(whoami, ret, "while defragging gds\n");
+ goto out;
+ }
+
+#ifdef DEBUG
+ for (i=0; i<dst->num_gds; i++)
+ print_gd(dst->gds[i]);
+#endif
+ ocfs2_write_inode(dst->dst_fs, dst->global_bitmap_inode->i_blkno,
+ (char *)dst->global_bitmap_inode);
+out:
+ return ret;
+}
+
--
1.6.4.2
More information about the Ocfs2-tools-devel
mailing list