[Ocfs2-tools-devel] [PATCH 2/4] Clear unallocated inode groups

Goldwyn Rodrigues rgoldwyn at gmail.com
Mon Mar 22 11:08:46 PDT 2010


Hi Tristan,

Thanks for your comments.

On Fri, Mar 19, 2010 at 2:04 AM, tristan <tristan.ye at oracle.com> wrote:
>> Clears all inode groups which have no allocated inodes, freeing
>> space in the global_bitmap
>>
>> Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
>> ---
>>  Makefile                      |    2 +-
>>  defrag.ocfs2/Makefile         |    2 +-
>>  defrag.ocfs2/alloc.c          |  230
>> +++++++++++++++++++++++++++++++++++++++++
>>  defrag.ocfs2/defrag.c         |   21 +++-
>>  defrag.ocfs2/include/defrag.h |    4 +-
>>  5 files changed, 250 insertions(+), 9 deletions(-)
>>  create mode 100644 defrag.ocfs2/alloc.c
>>
>> diff --git a/Makefile b/Makefile
>> index 88106fb..ecb56dc 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -20,7 +20,7 @@ CHKCONFIG_DEP = chkconfig
>>  COMPILE_PY = 1
>>  endif
>>  -SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2
>> mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl
>> mount.ocfs2 ocfs2_controld o2image listuuid sizetest extras fswreck patches
>> +SUBDIRS = include libtools-internal libo2dlm libo2cb libocfs2 fsck.ocfs2
>> mkfs.ocfs2 mounted.ocfs2 tunefs.ocfs2 debugfs.ocfs2 o2cb_ctl ocfs2_hb_ctl
>> mount.ocfs2 ocfs2_controld o2image listuuid sizetest extras fswreck patches
>> defrag.ocfs2
>>  ifdef BUILD_OCFS2CONSOLE
>>  SUBDIRS += ocfs2console
>> diff --git a/defrag.ocfs2/Makefile b/defrag.ocfs2/Makefile
>> index b59c6a4..8bc48e0 100644
>> --- a/defrag.ocfs2/Makefile
>> +++ b/defrag.ocfs2/Makefile
>> @@ -13,7 +13,7 @@ LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>>  CFLAGS += -g
>>  -CFILES =      defrag.c
>> +CFILES =       defrag.c alloc.c
>>  HFILES =      include/defrag.h
>>  diff --git a/defrag.ocfs2/alloc.c b/defrag.ocfs2/alloc.c
>> new file mode 100644
>> index 0000000..7eb7079
>> --- /dev/null
>> +++ b/defrag.ocfs2/alloc.c
>> @@ -0,0 +1,230 @@
>> +/* -*- mode: c; c-basic-offset: 8; -*-
>> + * vim: noexpandtab sw=8 ts=8 sts=0:
>> + *
>> + * Copyright (C) 1993-2004 Theodore Ts'o.
>> + * Copyright (C) 2004 Oracle.  All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public
>> + * License, version 2,  as published by the Free Software Foundation.
>> + * + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + * + * You should have received a copy of the GNU General Public
>> + * License along with this program; if not, write to the
>> + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
>> + * Boston, MA 021110-1307, USA.
>> + *
>> + */
>
> Same issue of license.
>
>> +#include <getopt.h>
>> +#include <limits.h>
>> +#include <stdlib.h>
>> +#include <stdint.h>
>> +#include <string.h>
>> +#include <inttypes.h>
>> +#include <signal.h>
>> +#include <libgen.h>
>
> Could you please try to remove redundant headers?
>
>> +
>> +#include "defrag.h"
>> +
>> +
>> +extern int verbose;
>> +extern char *whoami;
>> +
>> +static errcode_t cr_clear_unused(ocfs2_filesys *fs, struct
>> ocfs2_chain_rec *cr,
>> +               int chain_num, int *total, int *used)
>> +{
>> +       char *buf = NULL, *pbuf = NULL, *tbuf;
>> +       struct ocfs2_group_desc *gd = NULL, *pgd = NULL;
>> +       errcode_t ret = 0;
>> +       uint64_t blkno = cr->c_blkno;
>> +       int check_free, check_total;
>> +
>> +       if (!blkno)
>> +               return 0;
>> +       *total = *used = 0;
>> +
>> +       ret = ocfs2_malloc_block(fs->fs_io, &buf);
>> +       if (ret)
>> +               return ret;
>> +       memset(buf, 0, fs->fs_blocksize);
>> +
>> +       ret = ocfs2_malloc_block(fs->fs_io, &pbuf);
>> +       if (ret)
>> +               return ret;
>
> please goto 'out' for releasing the dynamically allocated memory.
>
>> +       memset(pbuf, 0, fs->fs_blocksize);
>> +
>> +       check_total = check_free = 0;
>> +
>> +       while (blkno) {
>> +               ret = ocfs2_read_group_desc(fs, blkno, buf);
>> +               if (ret)
>> +                       goto out;
>> +               gd = (struct ocfs2_group_desc *)buf;
>> +
>> +
>> +               if (gd->bg_blkno != blkno) {
>> +                       ret = OCFS2_CHAIN_ERROR;
>> +                       goto out;
>> +               }
>> +               verbosef("gd blkno:%"PRIu64" free:%d tot:%d\n",
>> +                               (uint64_t) gd->bg_blkno,
>> gd->bg_free_bits_count,
>> +                               gd->bg_bits);
>> +
>> +               if ((gd->bg_free_bits_count+1 == gd->bg_bits) &&
>> +                               (gd->bg_bitmap[0] == 1)) {
>
> should be more straightfoward using ocfs2_test_bit()
>
>> +                       /* Clear corresponding in global bitmap */
>> +                       ret = ocfs2_free_clusters(fs, gd->bg_bits,
>> gd->bg_blkno);
>> +                       if (ret)
>> +                               return ret;
>
> Oh, you need to goto 'out' here as well.
>
> Btw, it will be much better if you place a tcom_err() after each operation
> from libocfs2 being called.
> it helps when error occurs.
>>
>> +
>> +                       cr->c_total -= gd->bg_bits;
>> +                       cr->c_free -= gd->bg_free_bits_count;
>> +                       *used += 1;
>> +                       *total += gd->bg_bits;
>> +                       if (pgd) {
>> +                               pgd->bg_next_group = gd->bg_next_group;
>> +                               /* XXX - Resulting in dual write.
>> Improvise */
>> +                               ret = ocfs2_write_group_desc(fs,
>> +                                               pgd->bg_blkno, pbuf);
>> +                               if (ret)
>> +                                       goto out;
>> +
>> +                       } else
>> +                               cr->c_blkno = gd->bg_next_group;
>> +               } else {
>> +                       check_total += gd->bg_bits;
>> +                       check_free += gd->bg_free_bits_count;
>> +               }
>> +
>> +               if (gd->bg_chain != chain_num) {
>> +                       verbosef("Changing chain value of group desc
>> %"PRIu64
>> +                               " from %d to %d\n",
>> (uint64_t)gd->bg_blkno,
>> +                               gd->bg_chain, chain_num);
>> +                       gd->bg_chain = chain_num;
>
> if (gd->bg_chain != chain_num), a corruption of the chain probably has
> already been there, shall we just fail it out? and then turn to fsck.ocfs2
> for help?

This happens because all groups in this chain had exhausted, and I
moved the last chain to this slot.


>
>> +                       ret = ocfs2_write_group_desc(fs, blkno, buf);
>> +                       if (ret)
>> +                               goto out;
>> +               }
>> +               /* Swap buf and previous buf */
>> +               tbuf = pbuf;
>> +               pbuf = buf;
>> +               buf = tbuf;
>
> Oh, it is a bug, pgd and pbuf here should always represent a legal previous
> group, think the case when we remove a block group, the previous group which
> pbuf and pgd are going to present is no long the currently handled
> group(since it's a invalid/removed group now) as normal case, in that case,
> we're not required to swap the pbuf and buf, just leave as it was.
>
> otherwise, we definitely break the chain in the case of removing 2
> contiguous block groups.
>
> Correct way should be:
>
> if (pgd->bg_next_group != gd->bg_next_group) {
>        tbuf = pbuf;
>        pbuf = buf;
>        buf = tbuf;
> }
>

Yes I agree, however, just continuing the loop (using "continue;")  on
deletion of a group and setting blkno should resolve this, instead of
using a if-then statement. It is a simpler approach.


>
>
>> +
>> +               /* Get the next group desc block number in the chain */
>> +               pgd = (struct ocfs2_group_desc *)pbuf;
>> +               blkno = pgd->bg_next_group;
>> +       }
>> +
>> +       if ((check_free != cr->c_free) || (check_total != cr->c_total)) {
>> +               fprintf(stderr, "Chain rec[%d] count (%d/%d) does not
>> match "
>> +                       "with calculated (%d/%d)", chain_num, cr->c_free,
>> +                       cr->c_total, check_free, check_total);
>> +               ret = OCFS2_CHAIN_ERROR;
>> +       }
>> +out:
>> +       if (buf)
>> +               ocfs2_free(&buf);
>> +       if (pbuf)
>> +               ocfs2_free(&pbuf);
>> +       return ret;
>> +}
>> +
>> +
>> +static errcode_t clear_unused_regions(ocfs2_filesys *fs, uint64_t blkno)
>> +{
>> +       char *buf = NULL;
>> +       struct ocfs2_dinode *inode;
>> +       int n, i;
>> +       errcode_t ret = 0;
>> +       struct ocfs2_chain_list *cl = &inode->id2.i_chain;
>> +
>> +       ret = ocfs2_malloc_block(fs->fs_io, &buf);
>> +       if (ret)
>> +               return ret;
>> +       verbosef("Reading inode: %"PRIu64"\n", blkno);
>> +       ret = ocfs2_read_inode(fs, blkno, buf);
>> +       if (ret)
>> +               goto out_buf;
>> +
>> +       inode = (struct ocfs2_dinode *)buf;
>> +
>> +       ret = OCFS2_ET_INODE_NOT_VALID;
>> +       if (!(inode->i_flags & OCFS2_VALID_FL)) {
>> +               printf("Allocator inode %"PRIu64" not active\n", blkno);
>> +               goto out_buf;
>> +       }
>> +
>> +       if (!(inode->i_flags & OCFS2_CHAIN_FL)) {
>> +               printf("Allocator inode %"PRIu64" not a valid chain\n",
>> blkno);
>> +               goto out_buf;
>> +       }
>> +
>> +       cl = &inode->id2.i_chain;
>> +       n = ocfs2_chain_recs_per_inode(fs->fs_blocksize);
>> +       n--;
>> +       while ((cl->cl_recs[n].c_blkno == 0) && (n > 0))
>> +               n--;
>> +       verbosef("Total chain recs to process: %d next_free:%d\n",
>> +                       n, cl->cl_next_free_rec);
>
> why bother to do this? directly doing like 'n = cl->cl_next_free_rec' does
> not work?
>>
>> +
>> +       i = 0;
>> +       while (i <= n) {
>> +               struct ocfs2_chain_rec *cr = &cl->cl_recs[i];
>> +               int total = 0, used = 0;
>> +               ret = cr_clear_unused(fs, cr, i, &total, &used);
>> +               if (ret)
>> +                       goto out;
>> +               /* Update inode parameters with the cleared regions */
>> +               inode->id1.bitmap1.i_used -= used;
>> +               inode->id1.bitmap1.i_total -= total;
>> +               verbosef("cr i:%d blkno:%"PRIu64" tot:%d free:%d\n", i,
>> +                               (uint64_t)cr->c_blkno, cr->c_total,
>> cr->c_free);
>> +
>> +               if (cr->c_blkno == 0) {
>> +                       /* Move the last rec to current position
>> +                          and process it next*/
>> +                       cl->cl_recs[i] = cl->cl_recs[n];
>> +                       memset(&cl->cl_recs[n], 0,
>> +                                       sizeof(struct ocfs2_chain_rec));
>
> Hmm, not sure if such a change to the sequence of records would bring some
> performance impact...

I think fsck also does that. However, the other option will be to move
all the recs left, which will require changing the chain numbers of
all the following groups (as opposed to changing for the groups of
only the chain just moved)

>
>> +                       n--;
>> +               } else
>> +                       i++;
>> +       }
>> +       cl->cl_next_free_rec = n+1;
>> +
>> +out:
>> +       /* Correct the rest of inode values */
>> +       inode->i_clusters = inode->id1.bitmap1.i_total;
>
> It's incorrect here, should be:
>
> inode->i_clusters = inode->id1.bitmap1.i_total / cl->cl_bpc;
>
>
>
>> +       inode->i_size = inode->i_clusters * fs->fs_clustersize;
>> +       ret = ocfs2_write_inode(fs, blkno, buf);
>> +out_buf:
>> +       if (buf)
>> +               ocfs2_free(&buf);
>> +       return ret;
>> +
>> +}
>> +
>> +
>> +
>> +/* Clears all unused inode groups and block groups */
>> +errcode_t allocators_clear_unused(struct defrag_state *dst)
>> +{
>> +       int slot, max_slots =
>> OCFS2_RAW_SB(dst->dst_fs->fs_super)->s_max_slots;
>> +       errcode_t ret = 0;
>> +       uint64_t blkno;
>> +
>> +       for (slot = 0; slot < max_slots; slot++) {
>> +               ret = ocfs2_lookup_system_inode(dst->dst_fs,
>> +                               INODE_ALLOC_SYSTEM_INODE,slot, &blkno);
>> +               if (ret)
>> +                       return ret;
>> +               verbosef("Inode allocator[%d] -\n", slot);
>> +               clear_unused_regions(dst->dst_fs, blkno);
>> +       }
>> +       return ret;
>> +}
>> +
>> diff --git a/defrag.ocfs2/defrag.c b/defrag.ocfs2/defrag.c
>> index cf81aa6..22b71d7 100644
>> --- a/defrag.ocfs2/defrag.c
>> +++ b/defrag.ocfs2/defrag.c
>> @@ -132,6 +132,7 @@ static void print_uuid(struct defrag_state *dst)
>>  static void print_version(void)
>>  {
>>        fprintf(stderr, "%s %s\n", whoami, VERSION);
>> +       fprintf(stdout, "WARNING: Unstable version.\n");
>>  }
>>  static void print_help(void)
>> @@ -229,13 +230,13 @@ int main(int argc, char **argv)
>>        if (mount_flags & (OCFS2_MF_MOUNTED | OCFS2_MF_BUSY)) {
>>                if (!(open_flags & OCFS2_FLAG_RW))
>> -                       fprintf(stdout, "\nWARNING!!! Running fsck.ocfs2
>> (read-"
>> -                                       "only) on a mounted filesystem may
>> detect "
>> -                                       "invalid errors.\n\n");
>> +                       fprintf(stdout, "\nWARNING!!! Running defrag.ocfs2
>> "
>> +                                       "(read-only) on a mounted
>> filesystem "
>> +                                       "may corrupt the
>> filesystem.\n\n");
>>                else
>> -                       fprintf(stdout, "\nWARNING!!! Running fsck.ocfs2
>> on a "
>> -                                       "mounted filesystem may cause
>> SEVERE "
>> -                                       "filesystem damage.\n\n");
>> +                       fprintf(stdout, "\nWARNING!!! Running defrag.ocfs2
>> on"
>> +                                       " a mounted filesystem may cause "
>> +                                       "SEVERE filesystem damage.\n\n");
>>        }
>>        ret = open_and_check(dst, filename, open_flags, blkno, blksize);
>> @@ -265,6 +266,14 @@ int main(int argc, char **argv)
>>                retval = DEFRAG_ERROR;
>>                goto out;
>>        }
>> +       printf("Pass 1: Clearing unused groups in Allocators\n");
>> +       ret = allocators_clear_unused(dst);
>> +       if (ret) {
>> +               fprintf(stderr, "Error while clearing unused inode
>> groups\n"
>> +                               "Please execute fsck.ocfs2\n");
>> +               retval |= DEFRAG_ERROR;
>> +       }
>> +
>>        ocfs2_write_super(dst->dst_fs);
>>        ocfs2_close(dst->dst_fs);
>>
>> diff --git a/defrag.ocfs2/include/defrag.h b/defrag.ocfs2/include/defrag.h
>> index 7c25e89..e845893 100644
>> --- a/defrag.ocfs2/include/defrag.h
>> +++ b/defrag.ocfs2/include/defrag.h
>> @@ -1,7 +1,7 @@
>>  /* -*- mode: c; c-basic-offset: 8; -*-
>>  * vim: noexpandtab sw=8 ts=8 sts=0:
>>  *
>> - * defrag.h
>> + * fsck.h
>
> A typo here?
>
>>  *
>>  * Copyright (C) 2002 Oracle Corporation.  All rights reserved.
>>  *
>> @@ -42,5 +42,7 @@ extern int verbose;
>>  } while (0)
>>  +errcode_t allocators_clear_unused(struct defrag_state *);
>> +
>>  #endif /* __OCFS2_DEFRAG_H__ */
>>
>
>

Thanks for your review. I agree with the rest of the changes and will
incorporate them in my next posting.

-- 
Goldwyn



More information about the Ocfs2-tools-devel mailing list