[Ocfs2-tools-devel] [PATCH 3/4] defrag.ocfs2: Defrag directories

Goldwyn Rodrigues rgoldwyn at gmail.com
Tue Sep 7 20:49:25 PDT 2010


Algorithm:
Read all extents of the directory and calculate directory size.
Create a new directory with estimated directory size extent.
Link files to the new directory.
Swap extents of the old and new directory.
Delete the new directory contents and inode.

Unlinking files is not required because link does not increase link count.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
---
 defrag.ocfs2/Makefile         |    2 +-
 defrag.ocfs2/dir.c            |  388 +++++++++++++++++++++++++++++++++++++++++
 defrag.ocfs2/file.c           |    2 +
 defrag.ocfs2/include/defrag.h |    1 +
 4 files changed, 392 insertions(+), 1 deletions(-)
 create mode 100644 defrag.ocfs2/dir.c

diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
index e4314b8..685624e 100644
--- a/defrag.ocfs2/Makefile
+++ b/defrag.ocfs2/Makefile
@@ -17,7 +17,7 @@ LIBO2CB_DEPS = $(TOPDIR)/libo2cb/libo2cb.a

 CFLAGS += -g

-CFILES =	defrag.c file.c
+CFILES =	defrag.c file.c dir.c

 HFILES = 	include/defrag.h

diff --git a/defrag.ocfs2/dir.c b/defrag.ocfs2/dir.c
new file mode 100644
index 0000000..eb4572c
--- /dev/null
+++ b/defrag.ocfs2/dir.c
@@ -0,0 +1,388 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+   * vim: noexpandtab sw=8 ts=8 sts=0:
+   *
+   * dir.c
+   *
+   * Copyright (C) 2010 Novell. All rights reserved.
+   *
+   * This program is free software; you can redistribute it and/or
+   * modify it under the terms of the GNU General Public
+   * License version 2 as published by the Free Software Foundation.
+   *
+   * This program is distributed in the hope that it will be useful,
+   * but WITHOUT ANY WARRANTY; without even the implied warranty of
+   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   * General Public License for more details.
+   */
+#include "defrag.h"
+#include <inttypes.h>
+
+extern char *whoami;
+
+#define NUM_EXTENTS	1024
+
+struct defrag_dir_context {
+	struct defrag_state *dst;
+	ocfs2_cached_inode *new_inode;
+	struct ocfs2_dinode *dir;
+	uint64_t parent_blkno;
+	uint64_t r_size;
+	char *buf[NUM_EXTENTS];
+	int bufsize[NUM_EXTENTS];
+	int nbufs;
+	int dot_reset;
+	uint64_t dirsize;
+	int status;
+};
+
+
+static errcode_t link_dirents(struct defrag_dir_context *c)
+{
+	int i, offset, inc;
+	errcode_t ret = 0;
+	ocfs2_filesys *fs = c->dst->dst_fs;
+	struct ocfs2_dir_entry *dirent = NULL;
+	char name[OCFS2_MAX_FILENAME_LEN];
+
+	c->r_size = 0;
+	for (i = 0; i < c->nbufs; i++) {
+		offset = 0;
+		verbosef("buffer size: %d\n", c->bufsize[i]);
+		while (offset < c->bufsize[i]) {
+			dirent = (struct ocfs2_dir_entry *)(c->buf[i]
+					+ offset);
+
+			verbosef("Linking %.*s ino %"PRIu64" off %d len %d\n",
+				dirent->name_len, dirent->name,
+				(uint64_t)dirent->inode,
+				offset, dirent->rec_len);
+			if (dirent->rec_len <= 0) {
+				/* Reset to the next dirblock */
+				inc = fs->fs_blocksize -
+					((offset + fs->fs_blocksize)
+						% fs->fs_blocksize);
+				offset += inc;
+				c->r_size += inc;
+				verbosef("Incrementing offset by: %d\n", inc);
+				if (c->r_size >= c->dir->i_size)
+					goto out;
+				continue;
+			}
+			/* Skip . and .. */
+			if (dirent->name[0] == '.') {
+				verbosef("skipping %.*s\n", dirent->name_len,
+						dirent->name);
+				if (dirent->name_len == 1)
+					goto next;
+				if ((dirent->name[1] == '.') &&
+					(dirent->name_len == 2))
+					goto next;
+			}
+
+			if (dirent->inode == 0) {
+				verbosef("skipping zero dirent %d\n",
+						dirent->rec_len);
+				goto next;
+			}
+
+			memset(name, 0, dirent->name_len + 10);
+			strncpy(name, dirent->name, dirent->name_len);
+			ret = ocfs2_link(fs, c->new_inode->ci_blkno,
+					name, dirent->inode,
+					dirent->file_type);
+			if (ret)
+				goto error;
+next:
+			offset += dirent->rec_len;
+			c->r_size += dirent->rec_len;
+		}
+		ocfs2_free(&c->buf[i]);
+	}
+out:
+	return ocfs2_refresh_cached_inode(fs, c->new_inode);
+error:
+	com_err(whoami, ret, "while linking\n");
+	c->status = DEFRAG_ERROR;
+	return ret;
+}
+
+static int read_extents(ocfs2_filesys *fs,
+		struct ocfs2_extent_rec *rec,
+		int tree_dept, uint32_t ccount, uint64_t ref_blkno,
+		int rec_recno, void *private)
+{
+	struct defrag_dir_context *c = (struct defrag_dir_context *)private;
+	struct ocfs2_dir_entry *dirent = NULL;
+	int n = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters);
+	int offset = 0, inc;
+	char *buf = NULL;
+
+	errcode_t ret = ocfs2_malloc_blocks(fs->fs_io, n, &buf);
+	if (ret)
+		goto error;
+	c->buf[c->nbufs] = buf;
+	c->bufsize[c->nbufs] = n * fs->fs_blocksize;
+	c->nbufs++;
+	if (c->nbufs > NUM_EXTENTS)
+		goto error;
+
+	verbosef("Reading %d blocks blkno: %"PRIu64"\n", rec->e_leaf_clusters,
+			(uint64_t) rec->e_blkno);
+	ret = ocfs2_read_blocks(fs, rec->e_blkno, n, buf);
+	if (ret)
+		goto error;
+
+	/* Loop to read all dirents */
+
+	while (offset < n * fs->fs_blocksize) {
+		dirent = (struct ocfs2_dir_entry *)(buf + offset);
+		verbosef("dirent %.*s ino: %"PRIu64" len:%d off: %d\n",
+			dirent->name_len, dirent->name,
+			(uint64_t)dirent->inode, dirent->rec_len, offset);
+		if (dirent->rec_len <= 0) {
+			/* Reset to the next dirblock */
+			inc = fs->fs_blocksize - ((offset + fs->fs_blocksize)
+					% fs->fs_blocksize);
+			offset += inc;
+			c->r_size += inc;
+			if (c->r_size >= c->dir->i_size)
+				break;
+			continue;
+		}
+
+		if (dirent->inode == 0)
+			goto next;
+
+		if (dirent->name[0] == '.' &&
+			dirent->name[1] == '.' && dirent->name_len == 2)
+				c->parent_blkno = dirent->inode;
+
+next:
+		offset += dirent->rec_len;
+		c->r_size += dirent->rec_len;
+
+		if (dirent->inode)
+			c->dirsize += OCFS2_DIR_REC_LEN(dirent->name_len);
+
+		if (c->r_size >= c->dir->i_size)
+			break;
+
+	}
+	return 0;
+
+error:
+	verbosef("Error while interating %"PRIu64" of %"PRIu64"\n",
+		(uint64_t) rec->e_blkno, (uint64_t)c->dir->i_blkno);
+	c->status = DEFRAG_ERROR;
+	return ret;
+}
+
+
+static errcode_t new_dir(struct defrag_dir_context *c)
+{
+	uint64_t blkno, extent_blkno;
+	ocfs2_filesys *fs = c->dst->dst_fs;
+	char *buf = NULL;
+	errcode_t ret = 0;
+	uint32_t i, n, nblks, len;
+	struct ocfs2_dir_entry *de;
+
+	ret = ocfs2_new_inode(fs, &blkno, c->dir->i_mode);
+	if (ret)
+		goto error;
+
+	ret = ocfs2_read_cached_inode(fs, blkno, &c->new_inode);
+	if (ret)
+		goto error;
+
+	/* Return an inlined directory if too small */
+	if (c->dirsize < fs->fs_blocksize - offsetof(struct ocfs2_dinode, id2))
+		return ocfs2_init_dir(fs, blkno, c->parent_blkno);
+
+	c->dirsize = ((c->dirsize + fs->fs_blocksize) / fs->fs_blocksize) *
+				fs->fs_blocksize;
+
+	c->new_inode->ci_inode->i_dyn_features &= ~OCFS2_INLINE_DATA_FL;
+	ocfs2_dinode_new_extent_list(fs, c->new_inode->ci_inode);
+
+	n = ocfs2_bytes_to_clusters(fs, c->dirsize);
+	nblks = ocfs2_clusters_to_blocks(fs, n);
+	ocfs2_new_clusters(fs, 1, n, &extent_blkno, &len);
+
+	verbosef("Initializing new directory %"PRIu64" n %d ext %"PRIu64"\n",
+			blkno, n, extent_blkno);
+
+	ret = ocfs2_cached_inode_insert_extent(c->new_inode, 0, extent_blkno,
+			len, 0);
+	if (ret)
+		goto error;
+	c->new_inode->ci_inode->i_size = fs->fs_blocksize;
+	c->new_inode->ci_inode->i_clusters = len;
+
+	ocfs2_malloc_block(fs->fs_io, &buf);
+
+	memset(buf, 0, fs->fs_blocksize);
+	de = (struct ocfs2_dir_entry *) buf;
+	de->rec_len = fs->fs_blocksize;
+
+	for (i = 1; i < nblks; i++)
+		if (ocfs2_supports_dir_trailer(fs)) {
+			de->rec_len = ocfs2_dir_trailer_blk_off(fs);
+			ocfs2_init_dir_trailer(fs, c->new_inode->ci_inode,
+					extent_blkno + i, buf);
+			ocfs2_write_dir_block(fs, c->new_inode->ci_inode,
+					extent_blkno + i, buf);
+
+		}
+	de = (struct ocfs2_dir_entry *)(buf);
+	de->inode = blkno;
+	de->name_len = 1;
+	de->rec_len = OCFS2_DIR_REC_LEN(1);
+	de->name[0] = '.';
+	de->file_type = OCFS2_FT_DIR;
+
+	de = (struct ocfs2_dir_entry *) (buf + de->rec_len);
+	de->inode = c->parent_blkno;
+	de->name_len =	2;
+	strcpy(de->name, "..");
+	de->file_type = OCFS2_FT_DIR;
+	de->rec_len = fs->fs_blocksize - OCFS2_DIR_REC_LEN(1);
+	if (ocfs2_supports_dir_trailer(fs)) {
+		de->rec_len = ocfs2_dir_trailer_blk_off(fs) -
+			OCFS2_DIR_REC_LEN(1);
+		ocfs2_init_dir_trailer(fs, c->new_inode->ci_inode,
+				extent_blkno, buf);
+	}
+	verbosef("dotty dirent len %d\n", de->rec_len);
+
+	ret = ocfs2_write_dir_block(fs, c->new_inode->ci_inode,
+			extent_blkno, buf);
+
+	if (ret) {
+		com_err(whoami, ret, "while writing dir block\n");
+		goto error;
+	}
+
+	ocfs2_free(&buf);
+	ocfs2_write_cached_inode(fs, c->new_inode);
+	ocfs2_dx_dir_build(fs, blkno);
+	return 0;
+
+error:
+	com_err(whoami, ret, "while creating new dir\n");
+	return ret;
+}
+
+
+static int reset_trailer(ocfs2_filesys *fs, uint64_t blkno, uint64_t bcount,
+		uint16_t ext_flats, void *priv_data)
+{
+	char *buf = NULL;
+	int ret = 0;
+	errcode_t err = 0;
+	struct defrag_dir_context *c = (struct defrag_dir_context *)priv_data;
+	struct ocfs2_dir_entry *de = NULL;
+	struct ocfs2_dir_block_trailer *trailer = NULL;
+
+	if (c->dot_reset && !ocfs2_supports_dir_trailer(fs))
+		return OCFS2_BLOCK_ABORT;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		return OCFS2_BLOCK_ABORT;
+	err = ocfs2_read_blocks(fs, blkno, 1, buf);
+	if (!c->dot_reset) {
+		de = (struct ocfs2_dir_entry *)buf;
+		if ((de->name[0] == '.') && (de->name_len == 1))
+			de->inode = c->new_inode->ci_blkno;
+		c->dot_reset = 1;
+	}
+
+	if (ocfs2_supports_dir_trailer(fs)) {
+		trailer = ocfs2_dir_trailer_from_block(fs, buf);
+		trailer->db_parent_dinode = c->new_inode->ci_blkno;
+	}
+	ret = ocfs2_write_dir_block(fs, c->new_inode->ci_inode, blkno, buf);
+	ocfs2_free(&buf);
+	return 0;
+}
+
+
+errcode_t defrag_dir(struct defrag_state *dst, struct ocfs2_dinode *di)
+{
+	struct defrag_dir_context dc;
+	errcode_t ret = 0;
+
+	/* Ignore refcounted dir for now */
+	if (di->i_dyn_features & (OCFS2_INLINE_DATA_FL|OCFS2_HAS_REFCOUNT_FL))
+		return 0;
+
+	verbosef("Defragging dir %"PRIu64" size %"PRIu64"\n",
+		(uint64_t)di->i_blkno, (uint64_t)di->i_size);
+
+	/*Initialize dc */
+	memset(&dc, 0, sizeof(struct defrag_dir_context));
+	dc.dst = dst;
+	dc.dir = di;
+	verbosef("Iterating %"PRIu64" size %"PRIu64"\n",
+		(uint64_t)di->i_blkno, (uint64_t)di->i_size);
+
+	/* Read all directory extents */
+	ret = ocfs2_extent_iterate_inode(dst->dst_fs, di,
+			OCFS2_EXTENT_FLAG_DATA_ONLY, NULL,
+			read_extents, &dc);
+
+	if (dc.status == DEFRAG_ERROR) {
+		fprintf(stderr, "Error while reading dir %"PRIu64
+				"\n", (uint64_t) di->i_blkno);
+		return DEFRAG_ERROR;
+	}
+
+	/* Initialize the new directory */
+	ret = new_dir(&dc);
+	if (ret)
+		goto out;
+
+	ret = link_dirents(&dc);
+	if (ret)
+		goto out;
+
+
+	/* Change blkno of new_inode and write it */
+	dc.new_inode->ci_blkno = dc.dir->i_blkno;
+	dc.dir->i_blkno = dc.new_inode->ci_inode->i_blkno;
+	dc.new_inode->ci_inode->i_blkno = dc.new_inode->ci_blkno;
+	ret = ocfs2_write_cached_inode(dst->dst_fs, dc.new_inode);
+	if (ret) {
+		dc.new_inode->ci_blkno = dc.dir->i_blkno;
+		com_err(whoami, ret, "while writing inode\n");
+		goto out;
+	}
+
+	/* Reset trailer parent_blkno of all dirblocks */
+	ret = ocfs2_block_iterate_inode(dst->dst_fs,
+			dc.new_inode->ci_inode, 0, reset_trailer, &dc);
+
+	ocfs2_write_inode(dst->dst_fs, dc.dir->i_blkno, (char *)dc.dir);
+
+	/* refresh dir's blkno in new_inode, and clear it */
+	dc.new_inode->ci_blkno = dc.dir->i_blkno;
+	ocfs2_refresh_cached_inode(dst->dst_fs, dc.new_inode);
+
+out:
+	ret = ocfs2_dx_dir_truncate(dst->dst_fs, dc.new_inode->ci_blkno);
+	if (ret)
+		com_err(whoami, ret, "while clearing dx_root."
+				" Please run fsck.\n");
+	ret = ocfs2_truncate(dst->dst_fs, dc.new_inode->ci_blkno, 0);
+	if (ret)
+		com_err(whoami, ret, "while truncating file."
+				" Please run fsck.\n");
+	ret = ocfs2_delete_inode(dst->dst_fs, dc.new_inode->ci_blkno);
+	if (ret)
+		com_err(whoami, ret, "while deleting inode."
+				" Please run fsck.\n");
+
+	ocfs2_free_cached_inode(dst->dst_fs, dc.new_inode);
+	return ret;
+}
diff --git a/defrag.ocfs2/file.c b/defrag.ocfs2/file.c
index 01bf993..ec91a0a 100644
--- a/defrag.ocfs2/file.c
+++ b/defrag.ocfs2/file.c
@@ -498,6 +498,8 @@ errcode_t defrag_files_and_dirs(struct defrag_state *dst)

 		if (S_ISREG(di->i_mode))
 			ret = defrag_file(dst, di);
+		else if (S_ISDIR(di->i_mode))
+			ret = defrag_dir(dst, di);

 		if (ret)
 			break;
diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
index 264efa0..e0f1331 100644
--- a/defrag.ocfs2/include/defrag.h
+++ b/defrag.ocfs2/include/defrag.h
@@ -43,6 +43,7 @@ extern int verbose;

 errcode_t defrag_files_and_dirs(struct defrag_state *dst);
 errcode_t defrag_file(struct defrag_state *dst, struct ocfs2_dinode *);
+errcode_t defrag_dir(struct defrag_state *dst, struct ocfs2_dinode *);

 #endif /* __OCFS2_DEFRAG_H__ */

-- 
1.7.1


-- 
Goldwyn



More information about the Ocfs2-tools-devel mailing list