[Ocfs2-tools-devel] [PATCH 13/23] tunefs rework: Add op_set_slot_count.c
Joel Becker
Joel.Becker at oracle.com
Mon Aug 11 18:15:46 PDT 2008
On Mon, Aug 11, 2008 at 06:10:07PM -0700, Joel Becker wrote:
> On Mon, Aug 11, 2008 at 05:32:05PM -0700, Joel Becker wrote:
> > The set_slot_count operation changes the number of node slots on an
> > ocfs2 filesystem. Each slot has a journal and other associated files.
> > The number of slots is the number of nodes that can share the
> > filesystem. All filesystems must have at least one slot.
>
> This is what I get for rebasing late at night. The patch needed
> an update for Tao's ocfs2_init_dir()/ocfs2_link() change. This is the
> delta.
And here is the full patch.
Joel
---
diff --git a/tunefs.ocfs2/op_set_slot_count.c b/tunefs.ocfs2/op_set_slot_count.c
new file mode 100644
index 0000000..207d9b0
--- /dev/null
+++ b/tunefs.ocfs2/op_set_slot_count.c
@@ -0,0 +1,1083 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * op_set_slot_count.c
+ *
+ * ocfs2 tune utility for setting the number of slots available on the
+ * filesystem.
+ *
+ * Copyright (C) 2004, 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+
+#include "ocfs2/ocfs2.h"
+#include "ocfs2/bitops.h"
+
+#include "libocfs2ne.h"
+
+struct moved_group {
+ uint64_t blkno;
+ char *gd_buf;
+ struct moved_group *next;
+};
+
+struct relink_ctxt {
+ int inode_type;
+ struct ocfs2_chain_rec *cr;
+ uint16_t new_slot;
+ uint64_t dst_blkno;
+ char *src_inode;
+ char *dst_inode;
+ char *ex_buf;
+};
+
+struct remove_slot_ctxt {
+ ocfs2_filesys *fs;
+ uint16_t removed_slot;
+ errcode_t errcode;
+};
+
+
+static errcode_t add_slots(ocfs2_filesys *fs, int num_slots)
+{
+ errcode_t ret;
+ uint16_t old_num = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+ char fname[OCFS2_MAX_FILENAME_LEN];
+ uint64_t blkno;
+ int i, j, max_slots;
+ int ftype;
+
+ if (ocfs2_uses_extended_slot_map(OCFS2_RAW_SB(fs->fs_super))) {
+ ret = TUNEFS_ET_TOO_MANY_SLOTS_EXTENDED;
+ max_slots = INT16_MAX;
+ } else {
+ ret = TUNEFS_ET_TOO_MANY_SLOTS_OLD;
+ max_slots = OCFS2_MAX_SLOTS;
+ }
+ if (num_slots > max_slots)
+ goto bail;
+
+ ret = 0;
+ for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES; ++i) {
+ for (j = old_num; j < num_slots; ++j) {
+ ocfs2_sprintf_system_inode_name(fname,
+ OCFS2_MAX_FILENAME_LEN,
+ i, j);
+ verbosef(VL_APP, "Creating system file \"%s\"\n",
+ fname);
+
+ /* Goto next if file already exists */
+ ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, fname,
+ strlen(fname), NULL, &blkno);
+ if (!ret) {
+ verbosef(VL_APP,
+ "System file \"%s\" already exists\n",
+ fname);
+ continue;
+ }
+
+ /* create inode for system file */
+ ret = ocfs2_new_system_inode(fs, &blkno,
+ ocfs2_system_inodes[i].si_mode,
+ ocfs2_system_inodes[i].si_iflags);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while creating inode for "
+ "system file \"%s\"\n",
+ error_message(ret), fname);
+ goto bail;
+ }
+
+ ftype = (S_ISDIR(ocfs2_system_inodes[i].si_mode) ?
+ OCFS2_FT_DIR : OCFS2_FT_REG_FILE);
+
+ /* if dir, alloc space to it */
+ if (ftype == OCFS2_FT_DIR) {
+ ret = ocfs2_init_dir(fs, blkno,
+ fs->fs_sysdir_blkno);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while initializing "
+ "directory \"%s\"\n",
+ error_message(ret),
+ fname);
+ goto bail;
+ }
+ }
+
+ /* Add the inode to the system dir */
+ ret = ocfs2_link(fs, fs->fs_sysdir_blkno, fname,
+ blkno, ftype);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while linking inode %"PRIu64" "
+ "as \"%s\" in the system "
+ "directory\n",
+ error_message(ret), blkno, fname);
+ goto bail;
+ }
+ verbosef(VL_APP, "System file \"%s\" created\n",
+ fname);
+ }
+ }
+
+bail:
+ return ret;
+}
+
+static errcode_t change_sub_alloc_slot(ocfs2_filesys *fs,
+ uint64_t blkno,
+ struct relink_ctxt *ctxt)
+{
+ errcode_t ret;
+ struct ocfs2_dinode *di = NULL;
+ struct ocfs2_extent_block *eb = NULL;
+
+ if (ctxt->inode_type == EXTENT_ALLOC_SYSTEM_INODE) {
+ /* change sub alloc bit in the extent block. */
+ ret = ocfs2_read_extent_block(fs, blkno, ctxt->ex_buf);
+ if (ret)
+ goto bail;
+
+ eb = (struct ocfs2_extent_block *)ctxt->ex_buf;
+ eb->h_suballoc_slot = ctxt->new_slot;
+
+ ret = ocfs2_write_extent_block(fs, blkno, ctxt->ex_buf);
+ if (ret)
+ goto bail;
+ } else {
+ /* change sub alloc bit in the inode. */
+ ret = ocfs2_read_inode(fs, blkno, ctxt->ex_buf);
+ if (ret)
+ goto bail;
+
+ di = (struct ocfs2_dinode *)ctxt->ex_buf;
+ di->i_suballoc_slot = ctxt->new_slot;
+
+ ret = ocfs2_write_inode(fs, blkno, ctxt->ex_buf);
+ if (ret)
+ goto bail;
+ }
+bail:
+ return ret;
+}
+
+static errcode_t move_group(ocfs2_filesys *fs,
+ struct relink_ctxt *ctxt,
+ struct moved_group *group)
+{
+ errcode_t ret = 0;
+ uint16_t cr_pos;
+ struct ocfs2_group_desc *gd = NULL;
+ struct ocfs2_dinode *di = NULL;
+ struct ocfs2_chain_list *cl = NULL;
+ struct ocfs2_chain_rec *cr = NULL;
+
+ if (!group || !group->blkno || !group->gd_buf)
+ goto bail;
+
+ di = (struct ocfs2_dinode *)ctxt->dst_inode;
+ cl = &di->id2.i_chain;
+
+ /* calculate the insert position. */
+ if (cl->cl_next_free_rec < cl->cl_count)
+ cr_pos = cl->cl_next_free_rec;
+ else {
+ /* Now we have all the chain record filled with some groups.
+ * so we figure out all the groups we have and then calculate
+ * the proper place for our insert.
+ */
+ cr_pos = di->id1.bitmap1.i_total / (cl->cl_cpg * cl->cl_bpc);
+ cr_pos %= cl->cl_count;
+ }
+
+ cr = &cl->cl_recs[cr_pos];
+
+ gd = (struct ocfs2_group_desc *)group->gd_buf;
+ gd->bg_chain = cr_pos;
+ gd->bg_parent_dinode = ctxt->dst_blkno;
+
+ /* we can safely set the bg_next_group here since all the group
+ * below it in the moving chain is already moved to the new
+ * position and we don't need to worry about any "lost" groups.
+ *
+ * Please see how we build up the group list in move_chain_rec.
+ */
+ gd->bg_next_group = cr->c_blkno;
+
+ ret = ocfs2_write_group_desc(fs, group->blkno, group->gd_buf);
+ if (ret)
+ goto bail;
+
+ /* modify the chain record and the new files simultaneously. */
+ cr->c_blkno = gd->bg_blkno;
+ cr->c_total += gd->bg_bits;
+ cr->c_free += gd->bg_free_bits_count;
+
+ /* If the chain isn't full, increase the free_rec. */
+ if (cl->cl_next_free_rec != cl->cl_count)
+ cl->cl_next_free_rec++;
+
+ di->id1.bitmap1.i_total += gd->bg_bits;
+ di->id1.bitmap1.i_used += gd->bg_bits;
+ di->id1.bitmap1.i_used -= gd->bg_free_bits_count;
+ di->i_clusters += cl->cl_cpg;
+ di->i_size += cl->cl_cpg * fs->fs_clustersize;
+
+ ret = ocfs2_write_inode(fs, ctxt->dst_blkno, ctxt->dst_inode);
+
+bail:
+ return ret;
+}
+
+/*
+ * This function will iterate the chain_rec and do the following modifications:
+ * 1. record all the groups in the chains.
+ * 2. for every group, do:
+ * 1) modify Sub Alloc Slot in extent block/inodes accordingly.
+ * 2) change the GROUP_PARENT according to its future owner.
+ * 3) link the group to the new slot files.
+ */
+static errcode_t move_chain_rec(ocfs2_filesys *fs, struct relink_ctxt *ctxt)
+{
+ errcode_t ret = 0;
+ int i, start, end = 1;
+ uint64_t blkno, gd_blkno = ctxt->cr->c_blkno;
+ struct ocfs2_group_desc *gd = NULL;
+ struct moved_group *group = NULL, *group_head = NULL;
+
+ if (gd_blkno == 0)
+ goto bail;
+
+ /* Record the group in the relink_ctxt.
+ *
+ * We record the group in a reverse order, so the first group
+ * will be at the end of the group list. This is useful for
+ * fsck.ocfs2 when any error happens during the move of groups
+ * and we can safely move the group also.
+ */
+ while (gd_blkno) {
+ ret = ocfs2_malloc0(sizeof(struct moved_group), &group);
+ if (ret)
+ goto bail;
+ memset(group, 0, sizeof(struct moved_group));
+
+ /* We insert the group first in case of any further error
+ * will not cause memory leak.
+ */
+ group->next = group_head;
+ group_head = group;
+
+ ret = ocfs2_malloc_block(fs->fs_io, &group->gd_buf);
+ if (ret)
+ goto bail;
+
+ ret = ocfs2_read_group_desc(fs, gd_blkno, group->gd_buf);
+ if (ret)
+ goto bail;
+
+ group->blkno = gd_blkno;
+ gd = (struct ocfs2_group_desc *)group->gd_buf;
+ gd_blkno = gd->bg_next_group;
+ }
+
+ group = group_head;
+ while (group) {
+ gd = (struct ocfs2_group_desc *)group->gd_buf;
+
+ end = 1;
+ /* Modify the "Sub Alloc Slot" in the extent block/inodes. */
+ while (end < gd->bg_bits) {
+ start = ocfs2_find_next_bit_set(gd->bg_bitmap,
+ gd->bg_bits, end);
+ if (start >= gd->bg_bits)
+ break;
+
+ end = ocfs2_find_next_bit_clear(gd->bg_bitmap,
+ gd->bg_bits, start);
+
+ for (i = start; i < end; i++) {
+ blkno = group->blkno + i;
+
+ ret = change_sub_alloc_slot(fs, blkno, ctxt);
+ if (ret)
+ goto bail;
+
+ }
+ }
+
+ /* move the group to the new slots. */
+ ret = move_group(fs, ctxt, group);
+ if (ret)
+ goto bail;
+
+ group = group->next;
+ }
+
+bail:
+ group = group_head;
+ while (group) {
+ group_head = group->next;
+ if (group->gd_buf)
+ ocfs2_free(&group->gd_buf);
+ ocfs2_free(&group);
+ group = group_head;
+ }
+ return ret;
+}
+
+static errcode_t relink_system_alloc(ocfs2_filesys *fs,
+ uint16_t removed_slot,
+ uint16_t new_slots,
+ int inode_type)
+{
+ errcode_t ret;
+ int16_t i;
+ uint64_t blkno;
+ struct ocfs2_dinode *di = NULL;
+ struct ocfs2_chain_list *cl = NULL;
+ struct relink_ctxt ctxt;
+ char fname[OCFS2_MAX_FILENAME_LEN];
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ocfs2_sprintf_system_inode_name(fname, OCFS2_MAX_FILENAME_LEN,
+ inode_type, removed_slot);
+ verbosef(VL_APP, "Relinking system allocator \"%s\"\n", fname);
+
+ ret = ocfs2_lookup_system_inode(fs, inode_type,
+ removed_slot, &blkno);
+ if (ret) {
+ verbosef(VL_APP, "%s while looking up the allocator\n",
+ error_message(ret));
+ goto bail;
+ }
+
+ ret = ocfs2_malloc_block(fs->fs_io, &ctxt.src_inode);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while allocating the inode buffer\n",
+ error_message(ret));
+ goto bail;
+ }
+
+ ret = ocfs2_read_inode(fs, blkno, ctxt.src_inode);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while reading allocator inode %"PRIu64"\n",
+ error_message(ret), blkno);
+ goto bail;
+ }
+
+ di = (struct ocfs2_dinode *)ctxt.src_inode;
+
+ if (!(di->i_flags & OCFS2_VALID_FL) ||
+ !(di->i_flags & OCFS2_BITMAP_FL) ||
+ !(di->i_flags & OCFS2_CHAIN_FL)) {
+ verbosef(VL_APP, "Allocator inode %"PRIu64" is corrupt.\n",
+ blkno);
+ goto bail;
+ }
+
+ if (di->id1.bitmap1.i_total == 0)
+ goto bail;
+
+ /* Iterate all the groups and modify the group descriptors accordingly. */
+ ret = ocfs2_malloc_block(fs->fs_io, &ctxt.ex_buf);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while allocating an extent block buffer\n",
+ error_message(ret));
+ goto bail;
+ }
+
+ ret = ocfs2_malloc_block(fs->fs_io, &ctxt.dst_inode);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while allocating the destination inode buffer\n",
+ error_message(ret));
+ goto bail;
+ }
+
+ cl = &di->id2.i_chain;
+ ctxt.inode_type = inode_type;
+
+ /*iterate all the chain record and move them to the new slots. */
+ for (i = cl->cl_next_free_rec - 1; i >= 0; i--) {
+ ctxt.new_slot = i % new_slots;
+ ret = ocfs2_lookup_system_inode(fs, inode_type,
+ ctxt.new_slot,
+ &ctxt.dst_blkno);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while finding the target allocator "
+ "for slot %d\n",
+ error_message(ret), ctxt.new_slot);
+ goto bail;
+ }
+
+ ret = ocfs2_read_inode(fs, ctxt.dst_blkno, ctxt.dst_inode);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while reading target allocator inode "
+ "%"PRIu64"\n",
+ error_message(ret), ctxt.dst_blkno);
+ goto bail;
+ }
+
+ ctxt.cr = &cl->cl_recs[i];
+
+ ret = move_chain_rec(fs, &ctxt);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while trying to move a chain record "
+ "to the allocator in slot %d\n",
+ error_message(ret), ctxt.new_slot);
+ goto bail;
+ }
+ }
+
+
+ /* emtpy the original alloc files. */
+ di->id1.bitmap1.i_used = 0;
+ di->id1.bitmap1.i_total = 0;
+ di->i_clusters = 0;
+ di->i_size = 0;
+
+ cl = &di->id2.i_chain;
+ cl->cl_next_free_rec = 0;
+ memset(cl->cl_recs, 0, sizeof(struct ocfs2_chain_rec) * cl->cl_count);
+
+ ret = ocfs2_write_inode(fs, blkno, ctxt.src_inode);
+ if (ret)
+ verbosef(VL_APP,
+ "%s while writing out the empty allocator inode\n",
+ error_message(ret));
+
+bail:
+ if (ctxt.ex_buf)
+ ocfs2_free(&ctxt.ex_buf);
+ if (ctxt.dst_inode)
+ ocfs2_free(&ctxt.dst_inode);
+ if (ctxt.src_inode)
+ ocfs2_free(&ctxt.src_inode);
+
+ if (!ret)
+ verbosef(VL_APP, "Successfully relinked allocator \"%s\"\n",
+ fname);
+ return ret;
+}
+
+/* Empty the content of the specified journal file.
+ * Most of the code is copied from ocfs2_format_journal.
+ */
+static errcode_t empty_journal(ocfs2_filesys *fs,
+ ocfs2_cached_inode *ci)
+{
+ errcode_t ret = 0;
+ char *buf = NULL;
+ int bs_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+ uint64_t offset = 0;
+ uint32_t wrote, count;
+
+#define BUFLEN 1048576
+ ret = ocfs2_malloc_blocks(fs->fs_io, (BUFLEN >> bs_bits), &buf);
+ if (ret)
+ goto out;
+ memset(buf, 0, BUFLEN);
+
+ count = (uint32_t) ci->ci_inode->i_size;
+ while (count) {
+ ret = ocfs2_file_write(ci, buf, ocfs2_min((uint32_t) BUFLEN, count),
+ offset, &wrote);
+ if (ret)
+ goto out;
+ offset += wrote;
+ count -= wrote;
+ }
+
+out:
+ return ret;
+}
+
+static errcode_t empty_and_truncate_journal(ocfs2_filesys *fs,
+ uint16_t removed_slot)
+{
+ errcode_t ret;
+ uint64_t blkno;
+ ocfs2_cached_inode *ci = NULL;
+ char fname[OCFS2_MAX_FILENAME_LEN];
+
+ ocfs2_sprintf_system_inode_name(fname, OCFS2_MAX_FILENAME_LEN,
+ JOURNAL_SYSTEM_INODE,
+ removed_slot);
+ verbosef(VL_APP, "Truncating journal \"%s\"\n", fname);
+
+ ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE,
+ removed_slot, &blkno);
+ if (ret) {
+ verbosef(VL_APP, "%s while looking up journal \"%s\"\n",
+ error_message(ret), fname);
+ goto bail;
+ }
+
+ ret = ocfs2_read_cached_inode(fs, blkno, &ci);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while reading journal inode %"PRIu64"\n",
+ error_message(ret), blkno);
+ goto bail;
+ }
+
+ /* we have to empty the journal since it may contains some
+ * inode blocks which look like valid(except the i_blkno).
+ * So if this block range is used for future inode alloc
+ * files, fsck.ocfs2 may raise some error.
+ */
+ ret = empty_journal(fs, ci);
+ if (ret) {
+ verbosef(VL_APP, "%s while emptying journal \"%s\"\n",
+ error_message(ret), fname);
+ goto bail;
+ }
+
+ ret = ocfs2_truncate(fs, blkno, 0);
+ if (ret) {
+ verbosef(VL_APP, "%s while truncating journal \"%s\"\n",
+ error_message(ret), fname);
+ goto bail;
+ }
+
+ verbosef(VL_APP, "Journal \"%s\" truncated\n", fname);
+
+bail:
+ if (ci)
+ ocfs2_free_cached_inode(fs, ci);
+ return ret;
+}
+
+static errcode_t truncate_orphan_dir(ocfs2_filesys *fs,
+ uint16_t removed_slot)
+{
+ errcode_t ret;
+ uint64_t blkno;
+ char fname[OCFS2_MAX_FILENAME_LEN];
+
+ ocfs2_sprintf_system_inode_name(fname, OCFS2_MAX_FILENAME_LEN,
+ ORPHAN_DIR_SYSTEM_INODE,
+ removed_slot);
+ verbosef(VL_APP, "Truncating orphan dir \"%s\"\n", fname);
+
+ ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE,
+ removed_slot, &blkno);
+ if (!ret) {
+ ret = ocfs2_truncate(fs, blkno, 0);
+ if (!ret)
+ verbosef(VL_APP, "Orphan dir \"%s\" truncated\n",
+ fname);
+ else
+ verbosef(VL_APP,
+ "%s while truncating orphan dir \"%s\"\n",
+ error_message(ret), fname);
+ } else
+ verbosef(VL_APP,
+ "%s while looking up orphan dir \"%s\"\n",
+ error_message(ret), fname);
+
+ return ret;
+}
+
+static int remove_slot_iterate(struct ocfs2_dir_entry *dirent, int offset,
+ int blocksize, char *buf, void *priv_data)
+{
+ struct remove_slot_ctxt *ctxt =
+ (struct remove_slot_ctxt *)priv_data;
+ int taillen, ret_flags = 0;
+ errcode_t ret;
+ char dname[OCFS2_MAX_FILENAME_LEN];
+ char tail[OCFS2_MAX_FILENAME_LEN];
+
+ sprintf(tail, ":%04d", ctxt->removed_slot);
+ taillen = strlen(tail);
+
+ strncpy(dname, dirent->name, dirent->name_len);
+ dname[dirent->name_len] = '\0';
+
+ if (!strcmp(dname + (dirent->name_len - taillen), tail)) {
+ verbosef(VL_APP, "Unlinking system file \"%s\"\n",
+ dname);
+ ret = ocfs2_delete_inode(ctxt->fs, dirent->inode);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while unlinking system file \"%s\"\n",
+ error_message(ret), dname);
+ ret_flags |= OCFS2_DIRENT_ERROR;
+ ctxt->errcode = ret;
+ } else {
+ verbosef(VL_APP,
+ "Successfully unlinked system file "
+ "\"%s\"\n",
+ dname);
+ dirent->inode = 0;
+ ret_flags |= OCFS2_DIRENT_CHANGED;
+ }
+ }
+
+ return ret_flags;
+}
+
+static errcode_t remove_slot_entry(ocfs2_filesys *fs, uint16_t removed_slot)
+{
+ struct remove_slot_ctxt ctxt = {
+ .fs = fs,
+ .removed_slot = removed_slot,
+ .errcode = 0
+ };
+
+ ocfs2_dir_iterate(fs, fs->fs_sysdir_blkno,
+ OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
+ remove_slot_iterate, &ctxt);
+
+ return ctxt.errcode;
+}
+
+static errcode_t decrease_link_count(ocfs2_filesys *fs, uint16_t blkno)
+{
+ errcode_t ret;
+ char *buf = NULL;
+ struct ocfs2_dinode *di = NULL;
+
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ goto bail;
+
+ ret = ocfs2_read_inode(fs, blkno, buf);
+ if (ret)
+ goto bail;
+
+ di = (struct ocfs2_dinode *)buf;
+
+ if (di->i_links_count > 0)
+ di->i_links_count--;
+ else {
+ ret = OCFS2_ET_INODE_NOT_VALID;
+ goto bail;
+ }
+
+ ret = ocfs2_write_inode(fs, blkno, buf);
+bail:
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static int orphan_iterate(struct ocfs2_dir_entry *dirent, int offset,
+ int blocksize, char *buf, void *priv_data)
+{
+ int *has_orphan = (int *)priv_data;
+
+ *has_orphan = 1;
+
+ /* we have found some file/dir in the orphan_dir,
+ * so there is no need to go on the iteration.
+ */
+ return OCFS2_DIRENT_ABORT;
+}
+
+static errcode_t orphan_dir_check(ocfs2_filesys *fs,
+ uint16_t new_slots)
+{
+ errcode_t ret = 0;
+ uint64_t blkno;
+ int i, has_orphan = 0;
+ uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+ for (i = new_slots ; i < max_slots; ++i) {
+ ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE,
+ i, &blkno);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while looking up orphan dir for "
+ "slot %u during orphan dir check\n",
+ error_message(ret), i);
+ break;
+ }
+
+ ret = ocfs2_dir_iterate(fs, blkno,
+ OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
+ orphan_iterate, &has_orphan);
+
+ if (has_orphan) {
+ ret = TUNEFS_ET_ORPHAN_DIR_NOT_EMPTY;
+ verbosef(VL_APP,
+ "Entries found in orphan dir for slot %u\n",
+ i);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static errcode_t local_alloc_check(ocfs2_filesys *fs,
+ uint16_t new_slots)
+{
+ errcode_t ret = 0;
+ uint16_t i;
+ uint64_t blkno;
+ char *buf = NULL;
+ struct ocfs2_dinode *di = NULL;
+
+ uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while allocating inode buffer for local "
+ "alloc check\n",
+ error_message(ret));
+ goto bail;
+ }
+
+ for (i = new_slots ; i < max_slots; ++i) {
+ ret = ocfs2_lookup_system_inode(fs, LOCAL_ALLOC_SYSTEM_INODE,
+ i, &blkno);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while looking up local alloc for "
+ "slot %u during local alloc check\n",
+ error_message(ret), i);
+ break;
+ }
+
+ ret = ocfs2_read_inode(fs, blkno, buf);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while reading inode %"PRIu64" "
+ "during local alloc check\n",
+ error_message(ret), blkno);
+ break;
+ }
+
+ di = (struct ocfs2_dinode *)buf;
+ if (di->id1.bitmap1.i_total > 0) {
+ ret = TUNEFS_ET_LOCAL_ALLOC_NOT_EMPTY;
+ verbosef(VL_APP,
+ "Local alloc for slot %u is not empty\n",
+ i);
+ break;
+ }
+ }
+
+bail:
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static errcode_t truncate_log_check(ocfs2_filesys *fs,
+ uint16_t new_slots)
+{
+ errcode_t ret = 0;
+ uint16_t i;
+ uint64_t blkno;
+ char *buf = NULL;
+ struct ocfs2_dinode *di = NULL;
+
+ uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while allocating inode buffer for "
+ "truncate log check\n",
+ error_message(ret));
+ goto bail;
+ }
+
+ for (i = new_slots; i < max_slots; ++i) {
+ ret = ocfs2_lookup_system_inode(fs, TRUNCATE_LOG_SYSTEM_INODE,
+ i, &blkno);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while looking up truncate log for "
+ "slot %u during truncate log check\n",
+ error_message(ret), i);
+ goto bail;
+ }
+
+ ret = ocfs2_read_inode(fs, blkno, buf);
+ if (ret) {
+ verbosef(VL_APP,
+ "%s while reading inode %"PRIu64" "
+ "during truncate log check\n",
+ error_message(ret), blkno);
+ goto bail;
+ }
+
+ di = (struct ocfs2_dinode *)buf;
+
+ if (di->id2.i_dealloc.tl_used > 0) {
+ ret = TUNEFS_ET_TRUNCATE_LOG_NOT_EMPTY;
+ verbosef(VL_APP,
+ "Truncate log for slot %u is not empty\n",
+ i);
+ goto bail;
+ }
+ }
+
+bail:
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static errcode_t remove_slot_check(ocfs2_filesys *fs, int num_slots)
+{
+ errcode_t ret;
+
+ ret = orphan_dir_check(fs, num_slots);
+ if (!ret)
+ ret = local_alloc_check(fs, num_slots);
+ if (!ret)
+ ret = truncate_log_check(fs, num_slots);
+
+ return ret;
+}
+
+static errcode_t remove_slots(ocfs2_filesys *fs, int num_slots)
+{
+ errcode_t ret;
+ uint16_t old_num = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+ uint16_t removed_slot = old_num - 1;
+
+ ret = remove_slot_check(fs, num_slots);
+ if (ret)
+ goto bail;
+
+ /* This is cleared up in update_slot_count() if everything works */
+ ret = tunefs_set_in_progress(fs, OCFS2_TUNEFS_INPROG_REMOVE_SLOT);
+ if (ret)
+ goto bail;
+
+ /* we will remove the slots once at a time so that fsck.ocfs2 can work
+ * well and we can continue our work easily in case of any panic.
+ */
+ while (removed_slot >= num_slots) {
+ /* Link the specified extent alloc file to others. */
+ ret = relink_system_alloc(fs, removed_slot, num_slots,
+ EXTENT_ALLOC_SYSTEM_INODE);
+ if (ret)
+ goto bail;
+
+ /* Link the specified inode alloc file to others. */
+ ret = relink_system_alloc(fs, removed_slot, num_slots,
+ INODE_ALLOC_SYSTEM_INODE);
+ if (ret)
+ goto bail;
+
+ /* Truncate the orphan dir to release its clusters
+ * to the global bitmap.
+ */
+ ret = truncate_orphan_dir(fs, removed_slot);
+ if (ret)
+ goto bail;
+
+ /* empty the content of journal and truncate its clusters. */
+ ret = empty_and_truncate_journal(fs, removed_slot);
+ if (ret)
+ goto bail;
+
+ /* Now, we decrease the max_slots first and then remove the
+ * slots for the reason that:
+ *
+ * 1. ocfs2_lock_down_clusters needs to lock all the journal
+ * files. so if we delete the journal entry first and fail
+ * to decrease the max_slots, the whole cluster can't be
+ * locked any more due to the loss of journals.
+ *
+ * 2. Now all the resources except the inodes are freed
+ * so it is safe to decrease the slots first, and if any
+ * panic happens after we decrease the slots, we can ignore
+ * them, and actually if we want to increase the slot in the
+ * future, we can reuse these inodes.
+ */
+
+ /* The slot number is updated in the super block.*/
+ OCFS2_RAW_SB(fs->fs_super)->s_max_slots--;
+ ret = ocfs2_write_primary_super(fs);
+ if (ret)
+ goto bail;
+
+ /* The extra system dir entries should be removed. */
+ ret = remove_slot_entry(fs, removed_slot);
+ if (ret)
+ goto bail;
+
+ /* Decrease the i_links_count in system file directory
+ * since the orphan_dir is removed.
+ */
+ ret = decrease_link_count(fs, fs->fs_sysdir_blkno);
+ if (ret)
+ goto bail;
+
+ removed_slot--;
+ }
+
+bail:
+ return ret;
+}
+
+static errcode_t update_slot_count(ocfs2_filesys *fs, int num_slots)
+{
+ errcode_t ret = 0;
+ int orig_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+ if (num_slots == orig_slots) {
+ verbosef(VL_APP,
+ "Device \"%s\" already has %d node slots; "
+ "nothing to do\n",
+ fs->fs_devname, num_slots);
+ goto out;
+ }
+
+ if (!tunefs_interact("Change the number of node slots on device "
+ "\"%s\" from %d to %d? ",
+ fs->fs_devname, orig_slots, num_slots))
+ goto out;
+
+ tunefs_block_signals();
+ if (num_slots > orig_slots)
+ ret = add_slots(fs, num_slots);
+ else
+ ret = remove_slots(fs, num_slots);
+ if (ret)
+ goto out_unblock;
+
+ OCFS2_RAW_SB(fs->fs_super)->s_max_slots = num_slots;
+
+ if (num_slots > orig_slots) {
+ /* Grow the new journals to match the first slot */
+ verbosef(VL_APP,
+ "Allocating space for the new journals\n");
+ ret = tunefs_set_journal_size(fs, 0);
+ if (!ret)
+ verbosef(VL_APP, "Journal space allocated\n");
+ else {
+ verbosef(VL_APP,
+ "%s while trying to size the new journals\n",
+ error_message(ret));
+ goto out_unblock;
+ }
+ }
+
+ ret = ocfs2_format_slot_map(fs);
+ if (ret)
+ goto out_unblock;
+
+ if (num_slots < orig_slots) {
+ ret = tunefs_clear_in_progress(fs,
+ OCFS2_TUNEFS_INPROG_REMOVE_SLOT);
+ if (ret)
+ goto out_unblock;
+ }
+
+ ret = ocfs2_write_super(fs);
+
+out_unblock:
+ tunefs_unblock_signals();
+
+out:
+ return ret;
+}
+
+static int set_slot_count_parse_option(struct tunefs_operation *op,
+ char *arg)
+{
+ int rc = 1;
+ char *ptr = NULL;
+ long num_slots;
+
+ if (!arg) {
+ errorf("Number of slots not specified\n");
+ goto out;
+ }
+
+ num_slots = strtol(arg, &ptr, 10);
+ if ((num_slots == LONG_MIN) || (num_slots == LONG_MAX)) {
+ errorf("Number of slots is out of range: %s\n", arg);
+ goto out;
+ }
+ if (*ptr != '\0') {
+ errorf("Invalid number: \"%s\"\n", arg);
+ goto out;
+ }
+ if (num_slots < 1) {
+ errorf("At least one slot required\n");
+ goto out;
+ }
+ if (num_slots > INT_MAX) {
+ errorf("Number of slots is out of range: %s\n", arg);
+ goto out;
+ }
+ /*
+ * It's now safe to treat num_slots as an int.
+ *
+ * We'll re-check the maximum number of slots after we've opened
+ * the filesystem and determined the slot map format.
+ */
+
+ op->to_private = (void *)num_slots;
+ rc = 0;
+
+out:
+ return rc;
+}
+
+static int set_slot_count_run(struct tunefs_operation *op,
+ ocfs2_filesys *fs, int flags)
+{
+ errcode_t err;
+ int rc = 0;
+ int num_slots = (int)op->to_private;
+
+ err = update_slot_count(fs, num_slots);
+ if (err) {
+ tcom_err(err,
+ "- unable to update the number of slots on device "
+ "\"%s\"",
+ fs->fs_devname);
+ rc = 1;
+ }
+
+ return rc;
+}
+
+
+DEFINE_TUNEFS_OP(set_slot_count,
+ "Usage: op_set_slot_count [opts] <device> "
+ "<number_of_slots>\n",
+ TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+ set_slot_count_parse_option,
+ set_slot_count_run);
+
+#ifdef DEBUG_EXE
+int main(int argc, char *argv[])
+{
+ return tunefs_op_main(argc, argv, &set_slot_count_op);
+}
+#endif
--
Life's Little Instruction Book #207
"Swing for the fence."
Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127
More information about the Ocfs2-tools-devel
mailing list