[Ocfs2-tools-devel] [PATCH 3/4] Defrag directories

Goldwyn Rodrigues rgoldwyn at gmail.com
Wed Mar 17 18:28:39 PDT 2010


Directories with holes left be deleted entries are defragged.
Algorithm:
1. Read first extent and coalesce entries.
2. Maintain a sliding window of extents, read the next extent
   and move dirents from last read to the first extent.
3. Truncate the directory file to the computed size.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>

---
 defrag.ocfs2/Makefile         |    2 +-
 defrag.ocfs2/defrag.c         |   18 ++-
 defrag.ocfs2/dir.c            |  335 +++++++++++++++++++++++++++++++++++++++++
 defrag.ocfs2/include/defrag.h |    3 +
 4 files changed, 355 insertions(+), 3 deletions(-)
 create mode 100644 defrag.ocfs2/dir.c

diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
index 8bc48e0..ed0bc37 100644
--- a/defrag.ocfs2/Makefile
+++ b/defrag.ocfs2/Makefile
@@ -13,7 +13,7 @@ LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 CFLAGS += -g
 
-CFILES =	defrag.c alloc.c
+CFILES =	defrag.c alloc.c dir.c
 
 HFILES = 	include/defrag.h
 
diff --git a/defrag.ocfs2/defrag.c b/defrag.ocfs2/defrag.c
index 22b71d7..4a5ad7e 100644
--- a/defrag.ocfs2/defrag.c
+++ b/defrag.ocfs2/defrag.c
@@ -48,6 +48,7 @@ static errcode_t check_superblock(struct defrag_state *dst)
 		printf("The superblock max_slots field is set to 0.\n");
 		ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
 	}
+	dst->fs_generation = di->i_fs_generation;
 
 	return ret;
 }
@@ -269,14 +270,27 @@ int main(int argc, char **argv)
 	printf("Pass 1: Clearing unused groups in Allocators\n");
 	ret = allocators_clear_unused(dst);
 	if (ret) {
-		fprintf(stderr, "Error while clearing unused inode groups\n"
-				"Please execute fsck.ocfs2\n");
 		retval |= DEFRAG_ERROR;
+		goto error;
+	}
+
+	printf("Pass 2: Defrag directories\n");
+	ret = defrag_dirs(dst);
+	if (ret) {
+		retval |= DEFRAG_ERROR;
+		goto error;
 	}
 
 	ocfs2_write_super(dst->dst_fs);
 	ocfs2_close(dst->dst_fs);
+	printf("All passes successfully completed.\n");
 	
 out:
 	return retval;
+
+error:
+	fprintf(stderr, "Error while defragging filesystem. Please execute"
+			"fsck.ocfs2\n");
+	return retval;
+
 }
diff --git a/defrag.ocfs2/dir.c b/defrag.ocfs2/dir.c
new file mode 100644
index 0000000..739d9da
--- /dev/null
+++ b/defrag.ocfs2/dir.c
@@ -0,0 +1,335 @@
+#include "defrag.h"
+#include <inttypes.h>
+
+#define MAX_DIR_EXTENTS 256 /* Maximum window size */
+
+/*Returns the write offset of the buffer */
+static int coalesce_dirents(ocfs2_filesys *fs, char *buf,
+		int offset, int end, struct ocfs2_dir_entry **p)
+{
+	struct ocfs2_dir_entry *dirent = NULL, *prev = NULL;
+	int s_rec_len, write_off = offset, bs, next_blk_lim;
+	next_blk_lim = bs = fs->fs_blocksize;
+
+	while (offset < end) {
+		dirent = (struct ocfs2_dir_entry *)(buf + offset);
+		if (dirent->inode == 0) {
+			offset += dirent->rec_len;
+			continue;
+		}
+		/* Set r_offset to next block start for corrupted blocks*/
+		if (dirent->rec_len <= 0) {
+			offset = ((offset + bs - 1) / bs) * bs;
+			next_blk_lim = offset + bs;
+			continue;
+		}
+
+		if (prev && prev->inode) {
+			prev->rec_len = OCFS2_DIR_REC_LEN(prev->name_len);
+			write_off += prev->rec_len;
+
+			/* If doing multi-block - check if moving dirent
+			   will move it to block boundary
+			 */
+			if (write_off + OCFS2_DIR_REC_LEN(dirent->name_len)
+					> next_blk_lim) {
+				prev->rec_len += next_blk_lim - write_off;
+				write_off = next_blk_lim;
+				next_blk_lim += bs;
+				verbosef("Reset - off: %u write:%u "
+					"next_blk:%u d:%u/%u p:%u\n",
+					offset, write_off, next_blk_lim,
+					dirent->name_len, dirent->rec_len,
+					prev->rec_len);
+			}
+
+		}
+		s_rec_len = dirent->rec_len;
+		if (write_off < offset) {
+
+			memmove(buf + write_off, buf + offset,
+					OCFS2_DIR_REC_LEN(dirent->name_len));
+			dirent = (struct ocfs2_dir_entry *)(buf + write_off);
+		}
+		verbosef("off: %u w:%u nb:%u d:%u/%u %.*s %llu\n",
+				offset, write_off, next_blk_lim,
+				dirent->name_len, dirent->rec_len,
+				dirent->name_len, dirent->name, dirent->inode);
+		offset += s_rec_len;
+		prev = dirent;
+	}
+	write_off += OCFS2_DIR_REC_LEN(dirent->name_len);
+	memset(buf + write_off, 0, end - write_off);
+	dirent->rec_len = end - write_off +
+		OCFS2_DIR_REC_LEN(dirent->name_len);
+	*p = prev;
+	return write_off;
+}
+
+
+struct defrag_dir_context {
+	struct ocfs2_extent_rec *er[MAX_DIR_EXTENTS];
+	char *buf[MAX_DIR_EXTENTS];
+	int r_offset;
+	int w_offset;
+	int ext_count;
+	struct ocfs2_dir_entry *prev;
+	uint64_t dirsize;
+};
+
+enum move_rets {
+	READER_EXHAUSTED = 0,
+	WRITER_EXHAUSTED
+};
+
+static enum move_rets move_dirents(ocfs2_filesys *fs,
+		struct defrag_dir_context *c)
+{
+	struct ocfs2_dir_entry *dirent = NULL;
+	int next_blk_lim, bs, len, idx, r_end, w_end;
+	char *rbuf, *wbuf;
+	bs = fs->fs_blocksize;
+	next_blk_lim = ((c->w_offset + bs - 1)/bs) * bs;
+
+	idx = c->ext_count - 1;
+	r_end = ocfs2_clusters_to_bytes(fs, c->er[idx]->e_leaf_clusters);
+	w_end = ocfs2_clusters_to_bytes(fs, c->er[0]->e_leaf_clusters);
+	rbuf = c->buf[idx];
+	wbuf = c->buf[0];
+
+	while (c->r_offset < r_end) {
+		dirent = (struct ocfs2_dir_entry *)(rbuf + c->r_offset);
+		if (dirent->rec_len <= 0) {
+			/* Set r_offset to next block start */
+			c->r_offset = ((c->r_offset + bs) / bs) * bs;
+			continue;
+		}
+		if (dirent->inode == 0)
+			goto next;
+
+
+		c->prev->rec_len = OCFS2_DIR_REC_LEN(c->prev->name_len);
+		len = OCFS2_DIR_REC_LEN(dirent->name_len);
+		if (c->w_offset + len > w_end) {
+			c->prev->rec_len += next_blk_lim - c->w_offset;
+			return WRITER_EXHAUSTED;
+		}
+
+		/* Make sure a block limit does not cut across a
+		   dirent */
+		if (c->w_offset + len > next_blk_lim) {
+			c->prev->rec_len += next_blk_lim - c->w_offset;
+			c->w_offset = next_blk_lim;
+			next_blk_lim += bs;
+		}
+
+		verbosef("w %d/%d r %d/%d %.*s i %lu s %lu\n",
+				c->w_offset, w_end,
+				c->r_offset, r_end, dirent->name_len,
+				dirent->name, (uint64_t)dirent->inode,
+				c->dirsize);
+		memmove((wbuf + c->w_offset), (rbuf + c->r_offset),
+				dirent->rec_len);
+		c->prev = (struct ocfs2_dir_entry *)(wbuf + c->w_offset);
+		c->w_offset += len;
+		c->dirsize += len;
+next:
+		c->r_offset += dirent->rec_len;
+	}
+	return READER_EXHAUSTED;
+}
+
+#ifdef DEBUG
+static void print_dirents(char *buf, int end)
+{
+	struct ocfs2_dir_entry *dirent;
+	int off = 0;
+
+	while (off < end) {
+		dirent = (struct ocfs2_dir_entry *)(buf+off);
+		verbosef("o %d/%d %.*s i:%llu\n", off, end,
+				dirent->name_len, dirent->name, dirent->inode);
+		if (!dirent->inode)
+			break;
+		off += dirent->rec_len;
+	}
+
+}
+#endif
+
+static int defrag_dir(ocfs2_filesys *fs,
+		struct ocfs2_extent_rec *rec,
+		int tree_depth, uint32_t ccount, uint64_t ref_blkno,
+		int ref_recno, void *priv_data)
+{
+	struct defrag_dir_context *c = (struct defrag_dir_context *)priv_data;
+	errcode_t ret = 0;
+	int i = c->ext_count, nblks, end;
+	enum move_rets m;
+
+	verbosef("Reading extent %llu w:%d r:%d\n", rec->e_blkno, c->w_offset,
+			c->r_offset);
+
+	c->r_offset = 0;
+	/* allocate/copy because underlying buffer can be freed */
+	ret = ocfs2_malloc(sizeof(struct ocfs2_extent_rec), &c->er[i]);
+	if (ret)
+		goto fail;
+	memcpy(c->er[i], rec, sizeof(struct ocfs2_extent_rec));
+	nblks = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters);
+	ret = ocfs2_malloc_blocks(fs->fs_io, nblks, &c->buf[i]);
+	if (ret)
+		goto fail;
+	ret = ocfs2_read_blocks(fs, c->er[i]->e_blkno, nblks, c->buf[i]);
+	if (ret)
+		goto fail;
+	c->ext_count++;
+
+	if (!i) {
+		/* First extent, just coalesce and return*/
+		end = nblks * fs->fs_blocksize;
+		c->w_offset = c->dirsize =
+			coalesce_dirents(fs, c->buf[0], 0, end, &c->prev);
+		verbosef("w_offset:%u dirsize %lu\n", c->w_offset, c->dirsize);
+		return 0;
+	}
+
+	do {
+		m = move_dirents(fs, c);
+		if (m == READER_EXHAUSTED)
+			return 0;
+		/* else m==WRITER_EXHAUSTED */
+		nblks = ocfs2_clusters_to_blocks(fs, c->er[0]->e_leaf_clusters);
+#ifdef DEBUG
+		print_dirents(c->buf[0],
+			ocfs2_clusters_to_bytes(fs, c->er[0]->e_leaf_clusters));
+#endif
+		io_write_block(fs->fs_io, c->er[0]->e_blkno, nblks, c->buf[0]);
+		verbosef("Writing extent %llu\n", c->er[0]->e_blkno);
+		ocfs2_free(&c->buf[0]);
+		ocfs2_free(&c->er[0]);
+		/* shift window */
+		for (i = 0; i < c->ext_count-1; i++) {
+			c->er[i] = c->er[i+1];
+			c->buf[i] = c->buf[i+1];
+		}
+		c->w_offset = 0;
+		c->ext_count--;
+	} while (m == WRITER_EXHAUSTED);
+
+	return 0;
+fail:
+	if (c->buf[i])
+		ocfs2_free(&c->buf[i]);
+	if (c->er[i])
+		ocfs2_free(&c->er[i]);
+	return OCFS2_EXTENT_ABORT;
+}
+
+errcode_t defrag_dirs(struct defrag_state *dst)
+{
+	ocfs2_inode_scan *scan;
+	ocfs2_filesys *fs = dst->dst_fs;
+	uint64_t blkno;
+	errcode_t ret = 0;
+	char *buf = NULL;
+	struct ocfs2_dinode *di;
+	int offset, end, blockbits, i, nblks, bs;
+	struct defrag_dir_context dc;
+	blockbits = OCFS2_RAW_SB(dst->dst_fs->fs_super)->s_blocksize_bits;
+	bs = fs->fs_blocksize;
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	ret = ocfs2_open_inode_scan(fs, &scan);
+	if (ret)
+		goto out;
+
+	di = (struct ocfs2_dinode *)buf;
+	memset(&dc, 0, sizeof(struct defrag_dir_context));
+	for (;;) {
+		ret = ocfs2_get_next_inode(scan, &blkno, buf);
+
+		if (blkno == 0)
+			break;
+
+		if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
+					strlen(OCFS2_INODE_SIGNATURE)))
+			continue;
+
+		ocfs2_swap_inode_to_cpu(fs, di);
+
+		if (di->i_fs_generation != dst->fs_generation)
+			continue;
+
+		if (!(di->i_flags & OCFS2_VALID_FL) ||
+				(di->i_flags & OCFS2_SYSTEM_FL))
+			continue;
+		verbosef("Inode %"PRIu64" blkno %"PRIu64" flags:0x%x "
+				"dyn_flags:0x%x\n",
+				(uint64_t)di->i_blkno, blkno,
+				di->i_flags, di->i_dyn_features);
+		dst->num_inodes++;
+		if (S_ISDIR(di->i_mode)) {
+			if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) {
+				struct ocfs2_dir_entry *prev = NULL;
+				offset = offsetof(struct ocfs2_dinode,
+						id2.i_data.id_data);
+				end = fs->fs_blocksize;
+				coalesce_dirents(fs, buf, offset, end, &prev);
+				ocfs2_write_inode(fs, blkno, (char *)di);
+				continue;
+			}
+			memset(&dc, 0, sizeof(struct defrag_dir_context));
+			ret = ocfs2_extent_iterate_inode(dst->dst_fs, di,
+					OCFS2_EXTENT_FLAG_DATA_ONLY, NULL,
+					defrag_dir, &dc);
+			if (ret)
+				goto out;
+
+			end = ocfs2_clusters_to_bytes(fs,
+					dc.er[0]->e_leaf_clusters);
+			memset(dc.buf[0] + dc.w_offset, 0, end - dc.w_offset);
+
+			/*Set the last prev */
+			end = ((dc.w_offset + bs - 1) / bs) * bs;
+			verbosef("w_off %u end %u bb:%d l %d i %llu\n",
+					dc.w_offset, end, blockbits,
+					dc.prev->rec_len, dc.prev->inode);
+			dc.prev->rec_len = end - dc.w_offset +
+				OCFS2_DIR_REC_LEN(dc.prev->name_len);
+			nblks = ocfs2_clusters_to_blocks(fs,
+					dc.er[0]->e_leaf_clusters);
+			io_write_block(fs->fs_io, dc.er[0]->e_blkno,
+					nblks, dc.buf[0]);
+
+			/*Making sure all the rest are set to zeros*/
+			for (i = 1; i < dc.ext_count; i++) {
+				nblks = ocfs2_clusters_to_blocks(fs,
+						dc.er[i]->e_leaf_clusters);
+				end = ocfs2_clusters_to_bytes(fs,
+						dc.er[i]->e_leaf_clusters);
+				verbosef("Clearing i: %d end %u\n", i, end);
+				memset(dc.buf[i], 0, end);
+				io_write_block(fs->fs_io, dc.er[i]->e_blkno,
+						nblks, dc.buf[i]);
+				ocfs2_free(&dc.buf[i]);
+				ocfs2_free(&dc.er[i]);
+			}
+
+			dc.dirsize = ((dc.dirsize + bs - 1) / bs) * bs;
+			verbosef("Truncating %lu from %lu to %lu\n",
+				(uint64_t)di->i_blkno, (uint64_t)di->i_size,
+				dc.dirsize);
+			if (dc.dirsize < di->i_size)
+				/* XXX can we call truncate if the inode is
+				   read in memory */
+				ocfs2_truncate(dst->dst_fs, di->i_blkno,
+						dc.dirsize);
+		}
+	}
+out:
+	if (scan)
+		ocfs2_close_inode_scan(scan);
+	ocfs2_free(&buf);
+	return ret;
+}
+
diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
index e845893..08b12c7 100644
--- a/defrag.ocfs2/include/defrag.h
+++ b/defrag.ocfs2/include/defrag.h
@@ -33,6 +33,8 @@
 
 struct defrag_state {
 	ocfs2_filesys 	*dst_fs;
+	uint32_t        fs_generation;
+	int num_inodes;
 };
 
 extern int verbose;
@@ -43,6 +45,7 @@ extern int verbose;
 
 
 errcode_t allocators_clear_unused(struct defrag_state *);
+errcode_t defrag_dirs(struct defrag_state *dst);
 
 #endif /* __OCFS2_DEFRAG_H__ */
 
-- 
1.6.4.2


-- 
Goldwyn



More information about the Ocfs2-tools-devel mailing list