[Ocfs2-tools-devel] [PATCH 4/4] Defrag the filesystem

Goldwyn Rodrigues rgoldwyn at gmail.com
Wed Mar 17 18:29:19 PDT 2010


Algorithm:
1. Read all group descriptors and sort them with respect to block number
2. Read all inodes
3. For all group descriptors:
   a. Find a hole in the bitmap
   b. Find a extent rec which will best fit in the hole and has a
      block number greater than the hole
   c. Find the group descriptor corresponding to the found rec
   d. Move the extent.

Limitations:
 - Only zero depth files are victimized for move.
 - Victim files can be spread across gds, making them inefficient
   to read. can be made efficient by moving the rest of the file
   even if it is not a perfect match

Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>

---
 defrag.ocfs2/Makefile         |    2 +-
 defrag.ocfs2/defrag.c         |    7 +
 defrag.ocfs2/fs.c             |  429 +++++++++++++++++++++++++++++++++++++++++
 defrag.ocfs2/include/defrag.h |    6 +
 4 files changed, 443 insertions(+), 1 deletions(-)
 create mode 100644 defrag.ocfs2/fs.c

diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
index ed0bc37..4bd7355 100644
--- a/defrag.ocfs2/Makefile
+++ b/defrag.ocfs2/Makefile
@@ -13,7 +13,7 @@ LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 CFLAGS += -g
 
-CFILES =	defrag.c alloc.c dir.c
+CFILES =	defrag.c alloc.c dir.c fs.c
 
 HFILES = 	include/defrag.h
 
diff --git a/defrag.ocfs2/defrag.c b/defrag.ocfs2/defrag.c
index 4a5ad7e..7071ca9 100644
--- a/defrag.ocfs2/defrag.c
+++ b/defrag.ocfs2/defrag.c
@@ -281,6 +281,13 @@ int main(int argc, char **argv)
 		goto error;
 	}
 
+	printf("Pass 3: Defrag filesystem\n");
+	ret = defrag_fs(dst);
+	if (ret) {
+		retval |= DEFRAG_ERROR;
+		goto error;
+	}
+
 	ocfs2_write_super(dst->dst_fs);
 	ocfs2_close(dst->dst_fs);
 	printf("All passes successfully completed.\n");
diff --git a/defrag.ocfs2/fs.c b/defrag.ocfs2/fs.c
new file mode 100644
index 0000000..40d52f9
--- /dev/null
+++ b/defrag.ocfs2/fs.c
@@ -0,0 +1,429 @@
+
+
+#include "defrag.h"
+#include <inttypes.h>
+#include "ocfs2/bitops.h"
+
+#ifdef DEBUG
+static void print_gd(struct ocfs2_group_desc *gd)
+{
+	int i, c;
+	printf("gd blkno %"PRIu64" size %d bits %d free:%d\n",
+			(uint64_t)gd->bg_blkno,
+			gd->bg_size, gd->bg_bits, gd->bg_free_bits_count);
+	for (i = 0; i < gd->bg_size; i+=4) {
+		c = ((int) gd->bg_bitmap[i] << 24) +
+			((int)gd->bg_bitmap[i+1] << 16) +
+			((int)gd->bg_bitmap[i+2] << 8) +
+			(int)gd->bg_bitmap[i+3];
+		printf ("%9x", c);
+		if (!(i%32))
+			printf(" %"PRIu64"\n", (uint64_t)gd->bg_blkno + i*8);
+	}
+	printf("\n");
+}
+#endif
+
+/* A simple selection sort for now */
+static void sort_gds(struct defrag_state *dst)
+{
+	int i, j, n = dst->num_gds;
+	struct ocfs2_group_desc **gd = dst->gds;
+	struct ocfs2_group_desc *t;
+
+	for (i = 0; i < n-1; i++)
+		for (j = i+1; j < n; j++)
+			if (gd[i]->bg_blkno > gd[j]->bg_blkno) {
+				t = gd[i];
+				gd[i] = gd[j];
+				gd[j] = t;
+			}
+}
+
+static errcode_t cr_read_gds(struct defrag_state *dst,
+		struct ocfs2_chain_rec *cr)
+{
+	ocfs2_filesys *fs = dst->dst_fs;
+	uint64_t blkno = 0;
+	errcode_t ret;
+	struct ocfs2_group_desc *gd = NULL;
+	char *buf = NULL;
+
+	verbosef("free %d tot %d\n", cr->c_free, cr->c_total);
+
+	blkno = cr->c_blkno;
+	while (blkno) {
+		ret = ocfs2_malloc_block(fs->fs_io, &buf);
+		if (ret)
+			goto out;
+		verbosef("Reading gd[%d] %"PRIu64"\n", dst->num_gds,
+				blkno);
+		ret = ocfs2_read_group_desc(fs, blkno, buf);
+		if (ret)
+			goto out;
+		gd = (struct ocfs2_group_desc *)buf;
+		dst->gds[dst->num_gds++] = gd;
+		blkno = gd->bg_next_group;
+	}
+out:
+	return ret;
+}
+
+
+/*Read all gds and store in the dst structure */
+static errcode_t read_gds(struct defrag_state *dst)
+{
+	ocfs2_filesys *fs = dst->dst_fs;
+	uint64_t blkno;
+	struct ocfs2_dinode *di;
+	struct ocfs2_chain_list *cl;
+	char *buf;
+	int i, n;
+	errcode_t ret = 0;
+
+	ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE,
+			0, &blkno);
+	if (ret)
+		return ret;
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		return ret;
+	ret = ocfs2_read_inode(fs, blkno, buf);
+	if (ret) {
+		ocfs2_free(&buf);
+		return ret;
+	}
+
+	di = (struct ocfs2_dinode *)buf;
+	cl = &di->id2.i_chain;
+	dst->max_gds = di->i_clusters / cl->cl_cpg;
+	dst->max_gds++;
+	ret = ocfs2_malloc(sizeof(char *) * dst->max_gds, &dst->gds);
+	if (ret)
+		return ret;
+	memset(dst->gds, 0, sizeof(char *) * dst->max_gds);
+	dst->num_gds = 0;
+
+	n = ocfs2_chain_recs_per_inode(fs->fs_blocksize);
+	n--;
+	while ((cl->cl_recs[n].c_blkno == 0) && (n > 0))
+		n--;
+
+	verbosef("Total gds: %d recs: %d\n", dst->max_gds, n);
+
+	for(i = 0; i <= n; i++) {
+		struct ocfs2_chain_rec *cr = &cl->cl_recs[i];
+		ret = cr_read_gds(dst, cr);
+		if (ret)
+			return ret;
+	}
+
+	dst->global_bitmap_inode = di;
+	return 0;
+}
+
+
+static errcode_t read_inodes(struct defrag_state *dst)
+{
+	ocfs2_inode_scan *scan;
+	ocfs2_filesys *fs = dst->dst_fs;
+	uint64_t blkno;
+	errcode_t ret = 0;
+	char *buf = NULL, *tbuf = NULL;
+	struct ocfs2_dinode *di;
+	int ninodes = 0;
+
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_open_inode_scan(fs, &scan);
+	if (ret)
+		goto out;
+	ret = ocfs2_malloc(sizeof(struct ocfs2_dinode *) * dst->num_inodes,
+			&dst->inodes);
+	if (ret)
+		goto out_close_scan;
+	memset(dst->inodes, 0, sizeof(void *) * dst->num_inodes);
+
+
+	di = (struct ocfs2_dinode *)buf;
+
+	for (;;) {
+		ret = ocfs2_get_next_inode(scan, &blkno, buf);
+		if (ret)
+			goto out_close_scan;
+
+		if (blkno == 0)
+			break;
+
+		if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
+					strlen(OCFS2_INODE_SIGNATURE)))
+			continue;
+
+		ocfs2_swap_inode_to_cpu(fs, di);
+
+		if (di->i_fs_generation != dst->fs_generation)
+			continue;
+
+		if (!(di->i_flags & OCFS2_VALID_FL) ||
+				(di->i_flags & OCFS2_SYSTEM_FL))
+			continue;
+
+		verbosef("Inode %"PRIu64" blkno %"PRIu64" flags:0x%x "
+				"dyn_flags:0x%x\n",
+				(uint64_t)di->i_blkno, (uint64_t)blkno,
+				di->i_flags, di->i_dyn_features);
+
+		ret = ocfs2_malloc_block(fs->fs_io, &tbuf);
+		if (ret)
+			goto out_close_scan;
+		memcpy(tbuf, buf, fs->fs_blocksize);
+		dst->inodes[ninodes++] = (struct ocfs2_dinode *) tbuf;
+		if (ninodes > dst->num_inodes) {
+			fprintf(stderr, "Incorrect number of inodes "
+					"found in previous pass.\n");
+			goto out_close_scan;
+		}
+
+	}
+
+out_close_scan:
+	ocfs2_close_inode_scan(scan);
+out:
+	ocfs2_free(&buf);
+	return ret;
+}
+
+static struct ocfs2_extent_rec *find_best_fit_extent(
+		struct defrag_state *dst, int size,
+		uint64_t blkno, struct ocfs2_dinode **di)
+{
+	struct ocfs2_dinode **inode = dst->inodes;
+	struct ocfs2_extent_rec *rec, *er = NULL;
+	struct ocfs2_extent_list *el;
+	int i, j, nb, cblks = -1;
+	*di = NULL;
+
+	for (i = dst->num_inodes-1; i >= 0; i--) {
+		el = &inode[i]->id2.i_list;
+		/* XXX - temporarily work only on inodes of depth 0 */
+		if (el->l_tree_depth)
+			continue;
+		for(j = 0; j < el->l_count; j++) {
+			rec = &el->l_recs[j];
+			nb = ocfs2_clusters_to_blocks(dst->dst_fs,
+					rec->e_leaf_clusters);
+			if ((rec->e_blkno > blkno) && (nb <= size) &&
+				((cblks == -1) || (cblks < nb))) {
+				*di = inode[i];
+				if (nb == size)
+					return rec;
+				er = rec;
+				cblks = nb;
+			}
+		}
+	}
+	return er;
+}
+
+static struct ocfs2_group_desc *find_gd(struct defrag_state *dst,
+		uint64_t blkno)
+{
+	struct ocfs2_group_desc **gd = dst->gds;
+	int i;
+	uint64_t end;
+	verbosef("Finding gd corresponding to %"PRIu64"\n", blkno);
+	for (i = dst->num_gds - 1; i >= 0; i--) {
+		end = gd[i]->bg_blkno +
+			ocfs2_clusters_to_blocks(dst->dst_fs, gd[i]->bg_bits);
+		if (gd[i] && (blkno > gd[i]->bg_blkno) &&
+				(blkno < end))
+			return gd[i];
+	}
+	return NULL;
+}
+
+
+
+
+static errcode_t gd_bitmap_set_value(struct defrag_state *dst,
+		struct ocfs2_group_desc *gd,
+		int start, int len, int value)
+{
+	int i;
+	struct ocfs2_chain_list *cl = &dst->global_bitmap_inode->id2.i_chain;
+	struct ocfs2_chain_rec *cr;
+
+	for (i = start; i < start + len; i++)
+		if (value)
+			ocfs2_set_bit(i, gd->bg_bitmap);
+		else
+			ocfs2_clear_bit(i, gd->bg_bitmap);
+
+
+	i = gd->bg_chain;
+	cr = &cl->cl_recs[i];
+	if (value) {
+		cr->c_free -= len;
+		gd->bg_free_bits_count -= len;
+	} else {
+		cr->c_free += len;
+		gd->bg_free_bits_count += len;
+	}
+	return ocfs2_write_group_desc(dst->dst_fs, gd->bg_blkno, (char *)gd);
+}
+
+
+static errcode_t gd_set_bits(struct defrag_state *dst,
+		struct ocfs2_group_desc *gd, int start, int len)
+{
+	return gd_bitmap_set_value(dst, gd, start, len, 1);
+}
+
+
+static errcode_t gd_clear_bits(struct defrag_state *dst,
+		struct ocfs2_group_desc *gd,
+		int start, int len)
+{
+	return gd_bitmap_set_value(dst, gd, start, len, 0);
+}
+
+
+static errcode_t move_extent(struct defrag_state *dst,
+		struct ocfs2_group_desc *from_gd,
+		struct ocfs2_group_desc *to_gd, struct ocfs2_extent_rec *rec,
+		int to_offset)
+{
+	char *buf;
+	errcode_t ret;
+	uint64_t from = rec->e_blkno, to;
+	uint64_t first_gd_blkno =
+		(OCFS2_RAW_SB(dst->dst_fs->fs_super))->s_first_cluster_group;
+	ocfs2_filesys *fs = dst->dst_fs;
+	int len = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters);
+
+	if (to_gd->bg_blkno == first_gd_blkno)
+		to = to_offset;
+	else
+		to = to_offset + to_gd->bg_blkno;
+	verbosef("Moving %d blocks from %"PRIu64" to %"PRIu64"\n",
+			len, from, to);
+
+	ret = ocfs2_malloc_blocks(fs->fs_io, len, &buf);
+	if (ret)
+		return ret;
+	ret = ocfs2_read_blocks(fs, from, len, buf);
+	if (ret)
+		goto out;
+	ret = io_write_block(fs->fs_io, to, len, buf);
+	if (ret)
+		goto out;
+	ret = gd_set_bits(dst, to_gd,
+			ocfs2_blocks_to_clusters(fs, to_offset),
+			rec->e_leaf_clusters);
+	if (ret)
+		goto out;
+	ret = gd_clear_bits(dst, from_gd,
+			ocfs2_blocks_to_clusters(fs, from - from_gd->bg_blkno),
+			rec->e_leaf_clusters);
+	rec->e_blkno = to;
+out:
+	ocfs2_free(&buf);
+	return ret;
+}
+
+static errcode_t defrag_gd(struct defrag_state *dst,
+		struct ocfs2_group_desc *gd)
+{
+	int start = 0, end = 0, changed = 0, bstart, bend;
+	struct ocfs2_extent_rec *er;
+	struct ocfs2_dinode *di;
+	struct ocfs2_group_desc *from_gd;
+	errcode_t ret;
+
+	while (end < gd->bg_bits) {
+		start = ocfs2_find_next_bit_clear(gd->bg_bitmap,
+				gd->bg_bits, end);
+
+		if (start > gd->bg_bits)
+			break;
+
+		end = ocfs2_find_next_bit_set(gd->bg_bitmap,
+				gd->bg_bits, start);
+
+		if (end > gd->bg_bits)
+			break;
+
+		bstart = ocfs2_clusters_to_blocks(dst->dst_fs, start);
+		bend = ocfs2_clusters_to_blocks(dst->dst_fs, end);
+		verbosef("gd %"PRIu64" start %d end %d bstart %d bend %d\n",
+				(uint64_t) gd->bg_blkno, start, end,
+				bstart, bend);
+
+		er = NULL;
+		di = NULL;
+		er = find_best_fit_extent(dst, bend - bstart,
+				gd->bg_blkno + bstart, &di);
+
+		if (er) {
+			from_gd = find_gd(dst, er->e_blkno);
+			if (from_gd) {
+				ret = move_extent(dst, from_gd, gd, er,
+						bstart);
+				if (ret)
+					break;
+				/* XXX For efficiency, try to move
+				 * the rest of the file to
+				 * this gd, if space allows */
+				ret = ocfs2_write_inode(dst->dst_fs,
+						di->i_blkno , (char *) di);
+				if (ret)
+					break;
+				changed = 1;
+			}
+		}
+	}
+	if (changed)
+		ret = ocfs2_write_group_desc(dst->dst_fs,
+				gd->bg_blkno, (char *)gd);
+	return ret;
+}
+
+static errcode_t defrag_gds(struct defrag_state *dst)
+{
+	int i;
+	errcode_t ret = 0;
+	struct ocfs2_group_desc **gd = dst->gds;
+
+	for (i = 0; i < dst->num_gds; i++)
+		if (gd[i]->bg_free_bits_count != 0)
+			ret = defrag_gd(dst, gd[i]);
+	return ret;
+}
+
+
+errcode_t defrag_fs(struct defrag_state *dst)
+{
+	errcode_t ret = read_gds(dst);
+	sort_gds(dst);
+#ifdef DEBUG
+	int i;
+	for (i=0; i<dst->num_gds; i++)
+		print_gd(dst->gds[i]);
+#endif
+
+	ret = read_inodes(dst);
+	ret = defrag_gds(dst);
+
+#ifdef DEBUG
+	for (i = 0; i < dst->num_gds; i++)
+		print_gd(dst->gds[i]);
+#endif
+	ocfs2_write_inode(dst->dst_fs, dst->global_bitmap_inode->i_blkno,
+			(char *)dst->global_bitmap_inode);
+	
+	return ret;
+}
+
diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
index 08b12c7..b499002 100644
--- a/defrag.ocfs2/include/defrag.h
+++ b/defrag.ocfs2/include/defrag.h
@@ -34,6 +34,11 @@
 struct defrag_state {
 	ocfs2_filesys 	*dst_fs;
 	uint32_t        fs_generation;
+	struct ocfs2_dinode *global_bitmap_inode;
+	struct ocfs2_group_desc **gds;
+	int max_gds;
+	int num_gds;
+	struct ocfs2_dinode **inodes;
 	int num_inodes;
 };
 
@@ -46,6 +51,7 @@ extern int verbose;
 
 errcode_t allocators_clear_unused(struct defrag_state *);
 errcode_t defrag_dirs(struct defrag_state *dst);
+errcode_t defrag_fs(struct defrag_state *dst);
 
 #endif /* __OCFS2_DEFRAG_H__ */
 
-- 
1.6.4.2


-- 
Goldwyn



More information about the Ocfs2-tools-devel mailing list