[Ocfs2-tools-devel] [PATCH 2/4] Clear unallocated inode groups

tristan tristan.ye at oracle.com
Fri Mar 19 00:04:33 PDT 2010


Hi Goldwyn,

Comments inlined;)


Goldwyn Rodrigues wrote:
> Clears all inode groups which have no allocated inodes, freeing
> space in the global_bitmap
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
> ---
>  Makefile                      |    2 +-
>  defrag.ocfs2/Makefile         |    2 +-
>  defrag.ocfs2/alloc.c          |  230 +++++++++++++++++++++++++++++++++++++++++
>  defrag.ocfs2/defrag.c         |   21 +++-
>  defrag.ocfs2/include/defrag.h |    4 +-
>  5 files changed, 250 insertions(+), 9 deletions(-)
>  create mode 100644 defrag.ocfs2/alloc.c
>
> diff --git a/Makefile b/Makefile
> index 88106fb..ecb56dc 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -20,7 +20,7 @@ CHKCONFIG_DEP = chkconfig
>  COMPILE_PY = 1
>  endif
>  
> -SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image listuuid sizetest extras fswreck patches
> +SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2 mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl mount.ocfs2 ocfs2_controld o2image listuuid sizetest extras fswreck patches defrag.ocfs2
>  
>  ifdef BUILD_OCFS2CONSOLE
>  SUBDIRS += ocfs2console
> diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
> index b59c6a4..8bc48e0 100644
> --- a/defrag.ocfs2/Makefile
> +++ b/defrag.ocfs2/Makefile
> @@ -13,7 +13,7 @@ LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>  
>  CFLAGS += -g
>  
> -CFILES =	defrag.c
> +CFILES =	defrag.c alloc.c
>  
>  HFILES = 	include/defrag.h
>  
> diff --git a/defrag.ocfs2/alloc.c b/defrag.ocfs2/alloc.c
> new file mode 100644
> index 0000000..7eb7079
> --- /dev/null
> +++ b/defrag.ocfs2/alloc.c
> @@ -0,0 +1,230 @@
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> + * Copyright (C) 1993-2004 Theodore Ts'o.
> + * Copyright (C) 2004 Oracle.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License, version 2,  as published by the Free Software Foundation.
> + * 
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + * 
> + * You should have received a copy of the GNU General Public
> + * License along with this program; if not, write to the
> + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
> + * Boston, MA 021110-1307, USA.
> + *
> + */

Same issue of license.

> +#include <getopt.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <signal.h>
> +#include <libgen.h>

Could you please try to remove redundant headers?

> +
> +#include "defrag.h"
> +
> +
> +extern int verbose;
> +extern char *whoami;
> +
> +static errcode_t cr_clear_unused(ocfs2_filesys *fs, struct ocfs2_chain_rec *cr,
> +		int chain_num, int *total, int *used)
> +{
> +	char *buf = NULL, *pbuf = NULL, *tbuf;
> +	struct ocfs2_group_desc *gd = NULL, *pgd = NULL;
> +	errcode_t ret = 0;
> +	uint64_t blkno = cr->c_blkno;
> +	int check_free, check_total;
> +
> +	if (!blkno)
> +		return 0;
> +	*total = *used = 0;
> +
> +	ret = ocfs2_malloc_block(fs->fs_io, &buf);
> +	if (ret)
> +		return ret;
> +	memset(buf, 0, fs->fs_blocksize);
> +
> +	ret = ocfs2_malloc_block(fs->fs_io, &pbuf);
> +	if (ret)
> +		return ret;

please goto 'out' for releasing the dynamically allocated memory.

> +	memset(pbuf, 0, fs->fs_blocksize);
> +
> +	check_total = check_free = 0;
> +
> +	while (blkno) {
> +		ret = ocfs2_read_group_desc(fs, blkno, buf);
> +		if (ret)
> +			goto out;
> +		gd = (struct ocfs2_group_desc *)buf;
> +
> +
> +		if (gd->bg_blkno != blkno) {
> +			ret = OCFS2_CHAIN_ERROR;
> +			goto out;
> +		}
> +		verbosef("gd blkno:%"PRIu64" free:%d tot:%d\n",
> +				(uint64_t) gd->bg_blkno, gd->bg_free_bits_count,
> +				gd->bg_bits);
> +
> +		if ((gd->bg_free_bits_count+1 == gd->bg_bits) &&
> +				(gd->bg_bitmap[0] == 1)) {
should be more straightfoward using ocfs2_test_bit()

> +			/* Clear corresponding in global bitmap */
> +			ret = ocfs2_free_clusters(fs, gd->bg_bits, gd->bg_blkno);
> +			if (ret)
> +				return ret;
Oh, you need to goto 'out' here as well.

Btw, it will be much better if you place a tcom_err() after each 
operation from libocfs2 being called.
it helps when error occurs.
> +
> +			cr->c_total -= gd->bg_bits;
> +			cr->c_free -= gd->bg_free_bits_count;
> +			*used += 1;
> +			*total += gd->bg_bits;
> +			if (pgd) {
> +				pgd->bg_next_group = gd->bg_next_group;
> +				/* XXX - Resulting in dual write. Improvise */
> +				ret = ocfs2_write_group_desc(fs,
> +						pgd->bg_blkno, pbuf);
> +				if (ret)
> +					goto out;
> +
> +			} else
> +				cr->c_blkno = gd->bg_next_group;
> +		} else {
> +			check_total += gd->bg_bits;
> +			check_free += gd->bg_free_bits_count;
> +		}
> +
> +		if (gd->bg_chain != chain_num) {
> +			verbosef("Changing chain value of group desc %"PRIu64
> +				" from %d to %d\n", (uint64_t)gd->bg_blkno,
> +				gd->bg_chain, chain_num);
> +			gd->bg_chain = chain_num;

if (gd->bg_chain != chain_num), a corruption of the chain probably has 
already been there, shall we just fail it out? and then turn to 
fsck.ocfs2 for help?

> +			ret = ocfs2_write_group_desc(fs, blkno, buf);
> +			if (ret)
> +				goto out;
> +		}
> +		/* Swap buf and previous buf */
> +		tbuf = pbuf;
> +		pbuf = buf;
> +		buf = tbuf;

Oh, it is a bug, pgd and pbuf here should always represent a legal 
previous group, think the case when we remove a block group, the 
previous group which pbuf and pgd are going to present is no long the 
currently handled group(since it's a invalid/removed group now) as 
normal case, in that case, we're not required to swap the pbuf and buf, 
just leave as it was.

otherwise, we definitely break the chain in the case of removing 2 
contiguous block groups.

Correct way should be:

if (pgd->bg_next_group != gd->bg_next_group) {
	tbuf = pbuf;
	pbuf = buf;
	buf = tbuf;
}



> +
> +		/* Get the next group desc block number in the chain */
> +		pgd = (struct ocfs2_group_desc *)pbuf;
> +		blkno = pgd->bg_next_group;
> +	}
> +
> +	if ((check_free != cr->c_free) || (check_total != cr->c_total)) {
> +		fprintf(stderr, "Chain rec[%d] count (%d/%d) does not match "
> +			"with calculated (%d/%d)", chain_num, cr->c_free,
> +			cr->c_total, check_free, check_total);
> +		ret = OCFS2_CHAIN_ERROR;
> +	}
> +out:
> +	if (buf)
> +		ocfs2_free(&buf);
> +	if (pbuf)
> +		ocfs2_free(&pbuf);
> +	return ret;
> +}
> +
> +
> +static errcode_t clear_unused_regions(ocfs2_filesys *fs, uint64_t blkno)
> +{
> +	char *buf = NULL;
> +	struct ocfs2_dinode *inode;
> +	int n, i;
> +	errcode_t ret = 0;
> +	struct ocfs2_chain_list *cl = &inode->id2.i_chain;
> +
> +	ret = ocfs2_malloc_block(fs->fs_io, &buf);
> +	if (ret)
> +		return ret;
> +	verbosef("Reading inode: %"PRIu64"\n", blkno);
> +	ret = ocfs2_read_inode(fs, blkno, buf);
> +	if (ret)
> +		goto out_buf;
> +
> +	inode = (struct ocfs2_dinode *)buf;
> +
> +	ret = OCFS2_ET_INODE_NOT_VALID;
> +	if (!(inode->i_flags & OCFS2_VALID_FL)) {
> +		printf("Allocator inode %"PRIu64" not active\n", blkno);
> +		goto out_buf;
> +	}
> +
> +	if (!(inode->i_flags & OCFS2_CHAIN_FL)) {
> +		printf("Allocator inode %"PRIu64" not a valid chain\n", blkno);
> +		goto out_buf;
> +	}
> +	
> +	cl = &inode->id2.i_chain;
> +	n = ocfs2_chain_recs_per_inode(fs->fs_blocksize);
> +	n--;
> +	while ((cl->cl_recs[n].c_blkno == 0) && (n > 0))
> +		n--;
> +	verbosef("Total chain recs to process: %d next_free:%d\n",
> +			n, cl->cl_next_free_rec);
why bother to do this? directly doing like 'n = cl->cl_next_free_rec' 
does not work?
> +
> +	i = 0;
> +	while (i <= n) {
> +		struct ocfs2_chain_rec *cr = &cl->cl_recs[i];
> +		int total = 0, used = 0;
> +		ret = cr_clear_unused(fs, cr, i, &total, &used);
> +		if (ret)
> +			goto out;
> +		/* Update inode parameters with the cleared regions */
> +		inode->id1.bitmap1.i_used -= used;
> +		inode->id1.bitmap1.i_total -= total;
> +		verbosef("cr i:%d blkno:%"PRIu64" tot:%d free:%d\n", i,
> +				(uint64_t)cr->c_blkno, cr->c_total, cr->c_free);
> +
> +		if (cr->c_blkno == 0) {
> +			/* Move the last rec to current position
> +			   and process it next*/
> +			cl->cl_recs[i] = cl->cl_recs[n];
> +			memset(&cl->cl_recs[n], 0,
> +					sizeof(struct ocfs2_chain_rec));

Hmm, not sure if such a change to the sequence of records would bring 
some performance impact...

> +			n--;
> +		} else
> +			i++;
> +	}
> +	cl->cl_next_free_rec = n+1;
> +
> +out:
> +	/* Correct the rest of inode values */
> +	inode->i_clusters = inode->id1.bitmap1.i_total;
It's incorrect here, should be:

inode->i_clusters = inode->id1.bitmap1.i_total / cl->cl_bpc;



> +	inode->i_size = inode->i_clusters * fs->fs_clustersize;
> +	ret = ocfs2_write_inode(fs, blkno, buf);
> +out_buf:
> +	if (buf)
> +		ocfs2_free(&buf);
> +	return ret;
> +
> +}
> +
> +
> +
> +/* Clears all unused inode groups and block groups */
> +errcode_t allocators_clear_unused(struct defrag_state *dst)
> +{
> +	int slot, max_slots = OCFS2_RAW_SB(dst->dst_fs->fs_super)->s_max_slots;
> +	errcode_t ret = 0;
> +	uint64_t blkno;
> +
> +	for (slot = 0; slot < max_slots; slot++) {
> +		ret = ocfs2_lookup_system_inode(dst->dst_fs,
> +				INODE_ALLOC_SYSTEM_INODE,slot, &blkno);
> +		if (ret)
> +			return ret;
> +		verbosef("Inode allocator[%d] -\n", slot);
> +		clear_unused_regions(dst->dst_fs, blkno);
> +	}
> +	return ret;
> +}
> +
> diff --git a/defrag.ocfs2/defrag.c b/defrag.ocfs2/defrag.c
> index cf81aa6..22b71d7 100644
> --- a/defrag.ocfs2/defrag.c
> +++ b/defrag.ocfs2/defrag.c
> @@ -132,6 +132,7 @@ static void print_uuid(struct defrag_state *dst)
>  static void print_version(void)
>  {
>  	fprintf(stderr, "%s %s\n", whoami, VERSION);
> +	fprintf(stdout, "WARNING: Unstable version.\n");
>  }
>  
>  static void print_help(void)
> @@ -229,13 +230,13 @@ int main(int argc, char **argv)
>  
>  	if (mount_flags & (OCFS2_MF_MOUNTED | OCFS2_MF_BUSY)) {
>  		if (!(open_flags & OCFS2_FLAG_RW))
> -			fprintf(stdout, "\nWARNING!!! Running fsck.ocfs2 (read-"
> -					"only) on a mounted filesystem may detect "
> -					"invalid errors.\n\n");
> +			fprintf(stdout, "\nWARNING!!! Running defrag.ocfs2 "
> +					"(read-only) on a mounted filesystem "
> +					"may corrupt the filesystem.\n\n");
>  		else
> -			fprintf(stdout, "\nWARNING!!! Running fsck.ocfs2 on a "
> -					"mounted filesystem may cause SEVERE "
> -					"filesystem damage.\n\n");
> +			fprintf(stdout, "\nWARNING!!! Running defrag.ocfs2 on"
> +					" a mounted filesystem may cause "
> +					"SEVERE filesystem damage.\n\n");
>  	}
>  
>  	ret = open_and_check(dst, filename, open_flags, blkno, blksize);
> @@ -265,6 +266,14 @@ int main(int argc, char **argv)
>  		retval = DEFRAG_ERROR;
>  		goto out;
>  	}
> +	printf("Pass 1: Clearing unused groups in Allocators\n");
> +	ret = allocators_clear_unused(dst);
> +	if (ret) {
> +		fprintf(stderr, "Error while clearing unused inode groups\n"
> +				"Please execute fsck.ocfs2\n");
> +		retval |= DEFRAG_ERROR;
> +	}
> +
>  	ocfs2_write_super(dst->dst_fs);
>  	ocfs2_close(dst->dst_fs);
>  	
> diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
> index 7c25e89..e845893 100644
> --- a/defrag.ocfs2/include/defrag.h
> +++ b/defrag.ocfs2/include/defrag.h
> @@ -1,7 +1,7 @@
>  /* -*- mode: c; c-basic-offset: 8; -*-
>   * vim: noexpandtab sw=8 ts=8 sts=0:
>   *
> - * defrag.h
> + * fsck.h
A typo here?

>   *
>   * Copyright (C) 2002 Oracle Corporation.  All rights reserved.
>   *
> @@ -42,5 +42,7 @@ extern int verbose;
>  } while (0)
>  
>  
> +errcode_t allocators_clear_unused(struct defrag_state *);
> +
>  #endif /* __OCFS2_DEFRAG_H__ */
>  




More information about the Ocfs2-tools-devel mailing list