[Ocfs2-tools-devel] [PATCH 2/2] defrag.ocfs2: Clear unused inode groups

Goldwyn Rodrigues rgoldwyn at gmail.com
Sat Feb 20 18:48:06 PST 2010


Clears all inode groups which have no allocated inodes in all
chains of all inode allocators.

Currently, it gets more inode groups only after a mass delete. Any
inode allocations after that fill atleast one inode in each inode
group, in a round robin fashion, starting from the inode group which
has the most inodes free first.
Hopefully, this will have more inode groups to clear once the
new inode allocation sequence is in place.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
---
 Makefile                      |    2 +-
 defrag.ocfs2/Makefile         |    2 +-
 defrag.ocfs2/alloc.c          |  229 +++++++++++++++++++++++++++++++++++++++++
 defrag.ocfs2/defrag.c         |   13 ++-
 defrag.ocfs2/include/defrag.h |    4 +-
 5 files changed, 245 insertions(+), 5 deletions(-)
 create mode 100644 defrag.ocfs2/alloc.c

diff --git a/Makefile b/Makefile
index 88106fb..ecb56dc 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ CHKCONFIG_DEP = chkconfig
 COMPILE_PY = 1
 endif

-SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2
fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2
o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image listuuid
sizetest extras fswreck patches
+SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2
fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2
o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image listuuid
sizetest extras fswreck patches defrag.ocfs2

 ifdef BUILD_OCFS2CONSOLE
 SUBDIRS += ocfs2console
diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
index b59c6a4..8bc48e0 100644
--- a/defrag.ocfs2/Makefile
+++ b/defrag.ocfs2/Makefile
@@ -13,7 +13,7 @@ LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a

 CFLAGS += -g

-CFILES =	defrag.c
+CFILES =	defrag.c alloc.c

 HFILES = 	include/defrag.h

diff --git a/defrag.ocfs2/alloc.c b/defrag.ocfs2/alloc.c
new file mode 100644
index 0000000..e3b153d
--- /dev/null
+++ b/defrag.ocfs2/alloc.c
@@ -0,0 +1,229 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * Copyright (C) 1993-2004 Theodore Ts'o.
+ * Copyright (C) 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+#include <getopt.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <libgen.h>
+
+#include "defrag.h"
+
+
+extern int verbose;
+extern char *whoami;
+
+static errcode_t cr_clear_unused(ocfs2_filesys *fs, struct ocfs2_chain_rec *cr,
+		int chain_num, int *total, int *used)
+{
+	char *buf = NULL, *pbuf = NULL, *tbuf;
+	struct ocfs2_group_desc *gd = NULL, *pgd = NULL;
+	errcode_t ret = 0;
+	uint64_t blkno = cr->c_blkno;
+	int check_free, check_total;
+
+	if (!blkno)
+		return 0;
+	*total = *used = 0;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		return ret;
+	memset(buf, 0, fs->fs_blocksize);
+
+	ret = ocfs2_malloc_block(fs->fs_io, &pbuf);
+	if (ret)
+		return ret;
+	memset(pbuf, 0, fs->fs_blocksize);
+
+	check_total = check_free = 0;
+
+	while (blkno) {
+		ret = ocfs2_read_group_desc(fs, blkno, buf);
+		if (ret)
+			goto out;
+		gd = (struct ocfs2_group_desc *)buf;
+
+
+		if (gd->bg_blkno!=blkno) {
+			ret = OCFS2_CHAIN_ERROR;
+			goto out;
+		}
+		verbosef("gd blkno:%"PRIu64" free:%d tot:%d\n",
+				(uint64_t) gd->bg_blkno, gd->bg_free_bits_count,
+				gd->bg_bits);
+
+		if ((gd->bg_free_bits_count+1 == gd->bg_bits) &&
+				(gd->bg_bitmap[0]==1)) {
+			/* Clear corresponding in global bitmap */
+			ret = ocfs2_free_clusters(fs, gd->bg_bits, gd->bg_blkno);
+			if (ret)
+				return ret;
+
+			cr->c_total -= gd->bg_bits;
+			cr->c_free -= gd->bg_free_bits_count;
+			*used += 1;
+			*total += gd->bg_bits;
+			if (pgd) {
+				pgd->bg_next_group = gd->bg_next_group;
+				/* XXX - Resulting in dual write. Improvise */
+				ret = ocfs2_write_group_desc(fs,
+						pgd->bg_blkno, pbuf);
+				if (ret)
+					goto out;
+
+			} else
+				cr->c_blkno = gd->bg_next_group;
+		} else {
+			check_total += gd->bg_bits;
+			check_free += gd->bg_free_bits_count;
+		}
+
+		if (gd->bg_chain != chain_num) {
+			verbosef("Changing chain value of group desc %"PRIu64
+				" from %d to %d\n", (uint64_t)gd->bg_blkno,
+				gd->bg_chain, chain_num);
+			gd->bg_chain = chain_num;
+			ret = ocfs2_write_group_desc(fs, blkno, buf);
+			if (ret)
+				goto out;
+		}
+		/* Swap buf and previous buf */
+		tbuf = pbuf;
+		pbuf = buf;
+		buf = tbuf;
+
+		/* Get the next group desc block number in the chain */
+		pgd = (struct ocfs2_group_desc *)pbuf;
+		blkno = pgd->bg_next_group;
+	}
+
+	if ((check_free != cr->c_free) || (check_total != cr->c_total)) {
+		fprintf(stderr, "Chain rec[%d] count (%d/%d) does not match with"
+				"calculated (%d/%d)", chain_num, cr->c_free,
+				cr->c_total, check_free, check_total);
+		ret = OCFS2_CHAIN_ERROR;
+	}
+out:
+	if (buf)
+		ocfs2_free(&buf);
+	if (pbuf)
+		ocfs2_free(&pbuf);
+	return ret;
+}
+
+
+static errcode_t clear_unused_regions(ocfs2_filesys *fs, uint64_t blkno)
+{
+	char *buf = NULL;
+	struct ocfs2_dinode *inode;
+	int n, i;
+	errcode_t ret = 0;
+	struct ocfs2_chain_list *cl = &inode->id2.i_chain;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		return ret;
+	verbosef("Reading inode: %"PRIu64"\n", blkno);
+	ret = ocfs2_read_inode(fs, blkno, buf);
+	if (ret)
+		goto out_buf;
+
+	inode = (struct ocfs2_dinode *)buf;
+
+	ret = OCFS2_ET_INODE_NOT_VALID;
+	if (!(inode->i_flags & OCFS2_VALID_FL)) {
+		printf("Allocator inode %"PRIu64" not active\n", blkno);
+		goto out_buf;
+	}
+
+	if (!(inode->i_flags & OCFS2_CHAIN_FL)) {
+		printf("Allocator inode %"PRIu64" not a valid chain\n", blkno);
+		goto out_buf;
+	}
+	
+	cl = &inode->id2.i_chain;
+	n = ocfs2_chain_recs_per_inode(fs->fs_blocksize);
+	n--;
+	while ((cl->cl_recs[n].c_blkno==0) && n>0)
+		n--;
+	verbosef("Total chain recs to process: %d next_free:%d\n",
+			n, cl->cl_next_free_rec);
+
+	i = 0;
+	while (i<=n) {
+		struct ocfs2_chain_rec *cr = &cl->cl_recs[i];
+		int total=0, used=0;
+		ret = cr_clear_unused(fs, cr, i, &total, &used);
+		if (ret)
+			goto out;
+		/* Update inode parameters with the cleared regions */
+		inode->id1.bitmap1.i_used -= used;
+		inode->id1.bitmap1.i_total -= total;
+		verbosef("cr i:%d blkno:%"PRIu64" tot:%d free:%d\n", i,
+				(uint64_t)cr->c_blkno, cr->c_total, cr->c_free);
+
+		if (cr->c_blkno==0) {
+			/* Move the last rec to current position
+			   and process it next*/
+			cl->cl_recs[i] = cl->cl_recs[n];
+			memset(&cl->cl_recs[n], 0, sizeof(struct ocfs2_chain_rec));
+			n--;
+		}
+		else
+			i++;
+	}
+	cl->cl_next_free_rec = n+1;
+
+out:
+	/* Correct the rest of inode values */
+	inode->i_clusters = inode->id1.bitmap1.i_total;
+	inode->i_size = inode->i_clusters * fs->fs_clustersize;
+	ret = ocfs2_write_inode(fs, blkno, buf);
+out_buf:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+
+}
+
+
+
+/* Clears all unused inode groups and block groups */
+errcode_t allocators_clear_unused(struct defrag_state *dst)
+{
+	int slot, max_slots = OCFS2_RAW_SB(dst->dst_fs->fs_super)->s_max_slots;
+	errcode_t ret = 0;
+	uint64_t blkno;
+
+	for (slot = 0; slot < max_slots; slot++) {
+		ret = ocfs2_lookup_system_inode(dst->dst_fs,
INODE_ALLOC_SYSTEM_INODE,slot, &blkno);
+		if (ret)
+			return ret;
+		verbosef("Inode allocator[%d] - \n", slot);
+		clear_unused_regions(dst->dst_fs, blkno);
+	}
+	return ret;
+}
+
diff --git a/defrag.ocfs2/defrag.c b/defrag.ocfs2/defrag.c
index ad97d81..f6e86b0 100644
--- a/defrag.ocfs2/defrag.c
+++ b/defrag.ocfs2/defrag.c
@@ -132,6 +132,7 @@ static void print_uuid(struct defrag_state *dst)
 static void print_version(void)
 {
 	fprintf(stderr, "%s %s\n", whoami, VERSION);
+	fprintf(stdout, "WARNING: Unstable version.\n");
 }

 static void print_help(void)
@@ -228,11 +229,11 @@ int main(int argc, char **argv)

 	if (mount_flags & (OCFS2_MF_MOUNTED | OCFS2_MF_BUSY)) {
 		if (!(open_flags & OCFS2_FLAG_RW))
-			fprintf(stdout, "\nWARNING!!! Running fsck.ocfs2 (read-"
+			fprintf(stdout, "\nWARNING!!! Running defrag.ocfs2 (read-"
 					"only) on a mounted filesystem may detect "
 					"invalid errors.\n\n");
 		else
-			fprintf(stdout, "\nWARNING!!! Running fsck.ocfs2 on a "
+			fprintf(stdout, "\nWARNING!!! Running defrag.ocfs2 on a "
 					"mounted filesystem may cause SEVERE "
 					"filesystem damage.\n\n");
 	}
@@ -263,6 +264,14 @@ int main(int argc, char **argv)
 		retval = DEFRAG_ERROR;
 		goto out;
 	}
+        printf("Pass 1: Clearing unused groups in Allocators\n");
+        ret = allocators_clear_unused(dst);
+	if (ret) {
+		fprintf(stderr, "Error while clearing unused inode groups\n"
+				"Please execute fsck.ocfs2\n");
+		retval |= DEFRAG_ERROR;
+	}
+
 	ocfs2_write_super(dst->dst_fs);
 	ocfs2_close(dst->dst_fs);
 	
diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
index 7c25e89..e845893 100644
--- a/defrag.ocfs2/include/defrag.h
+++ b/defrag.ocfs2/include/defrag.h
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
- * defrag.h
+ * fsck.h
  *
  * Copyright (C) 2002 Oracle Corporation.  All rights reserved.
  *
@@ -42,5 +42,7 @@ extern int verbose;
 } while (0)


+errcode_t allocators_clear_unused(struct defrag_state *);
+
 #endif /* __OCFS2_DEFRAG_H__ */

-- 
1.6.4.2


-- 
Goldwyn



More information about the Ocfs2-tools-devel mailing list