[Ocfs2-tools-devel] [PATCH 12/22] tunefs rework: Add op_set_slot_count.c

Joel Becker joel.becker at oracle.com
Fri Jul 25 19:07:50 PDT 2008


The set_slot_count operation changes the number of node slots on an
ocfs2 filesystem.  Each slot has a journal and other associated files.
The number of slots is the number of nodes that can share the
filesystem.  All filesystems must have at least one slot.

This is the tunefs option '-N'.

Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
 tunefs.ocfs2/op_set_slot_count.c | 1092 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 1092 insertions(+), 0 deletions(-)
 create mode 100644 tunefs.ocfs2/op_set_slot_count.c

diff --git a/tunefs.ocfs2/op_set_slot_count.c b/tunefs.ocfs2/op_set_slot_count.c
new file mode 100644
index 0000000..932615d
--- /dev/null
+++ b/tunefs.ocfs2/op_set_slot_count.c
@@ -0,0 +1,1092 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * op_set_slot_count.c
+ *
+ * ocfs2 tune utility for setting the number of slots available on the
+ * filesystem.
+ *
+ * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+
+#include "ocfs2/ocfs2.h"
+#include "ocfs2/bitops.h"
+
+#include "libocfs2ne.h"
+
+struct moved_group {
+	uint64_t blkno;
+	char *gd_buf;
+	struct moved_group *next;
+};
+
+struct relink_ctxt {
+	int inode_type;
+	struct ocfs2_chain_rec *cr;
+	uint16_t new_slot;
+	uint64_t dst_blkno;
+	char *src_inode;
+	char *dst_inode;
+	char *ex_buf;
+};
+
+struct remove_slot_ctxt {
+	ocfs2_filesys *fs;
+	uint16_t removed_slot;
+	errcode_t errcode;
+};
+
+
+static errcode_t add_slots(ocfs2_filesys *fs, int num_slots)
+{
+	errcode_t ret;
+	uint16_t old_num = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+	char fname[OCFS2_MAX_FILENAME_LEN];
+	uint64_t blkno;
+	int i, j, max_slots;
+	int ftype;
+
+	if (ocfs2_uses_extended_slot_map(OCFS2_RAW_SB(fs->fs_super))) {
+		ret = TUNEFS_ET_TOO_MANY_SLOTS_EXTENDED;
+		max_slots = INT16_MAX;
+	} else {
+		ret = TUNEFS_ET_TOO_MANY_SLOTS_OLD;
+		max_slots = OCFS2_MAX_SLOTS;
+	}
+	if (num_slots > max_slots)
+		goto bail;
+
+	ret = 0;
+	for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES; ++i) {
+		for (j = old_num; j < num_slots; ++j) {
+			ocfs2_sprintf_system_inode_name(fname,
+							OCFS2_MAX_FILENAME_LEN,
+							i, j);
+			verbosef(VL_APP, "Creating system file \"%s\"\n",
+				 fname);
+
+			/* Goto next if file already exists */
+			ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, fname,
+					   strlen(fname), NULL, &blkno);
+			if (!ret) {
+				verbosef(VL_APP,
+					 "System file \"%s\" already exists\n",
+					 fname);
+				continue;
+			}
+
+			/* create inode for system file */
+			ret = ocfs2_new_system_inode(fs, &blkno,
+						     ocfs2_system_inodes[i].si_mode,
+						     ocfs2_system_inodes[i].si_iflags);
+			if (ret) {
+				verbosef(VL_APP,
+					 "%s while creating inode for "
+					 "system file \"%s\"\n",
+					 error_message(ret), fname);
+				goto bail;
+			}
+
+			ftype = (S_ISDIR(ocfs2_system_inodes[i].si_mode) ?
+				 OCFS2_FT_DIR : OCFS2_FT_REG_FILE);
+
+			/* if dir, alloc space to it */
+			if (ftype == OCFS2_FT_DIR) {
+				ret = ocfs2_expand_dir(fs, blkno,
+						       fs->fs_sysdir_blkno);
+				if (ret) {
+					verbosef(VL_APP,
+						 "%s while creating space "
+						 "for directory \"%s\"\n",
+						 error_message(ret),
+						 fname);
+					goto bail;
+				}
+			}
+
+			/* Add the inode to the system dir */
+			ret = ocfs2_link(fs, fs->fs_sysdir_blkno, fname,
+					 blkno, ftype);
+			if (!ret)
+				goto file_done;
+
+			if (ret == OCFS2_ET_DIR_NO_SPACE) {
+				ret = ocfs2_expand_dir(fs, fs->fs_sysdir_blkno,
+						       fs->fs_sysdir_blkno);
+				if (!ret)
+					ret = ocfs2_link(fs, fs->fs_sysdir_blkno,
+							 fname, blkno, ftype);
+			}
+			if (ret) {
+				verbosef(VL_APP,
+					"%s while linking inode %"PRIu64" "
+					"as \"%s\" in the system "
+					"directory\n",
+					error_message(ret), blkno, fname);
+				goto bail;
+			}
+file_done:
+			verbosef(VL_APP, "System file \"%s\" created\n",
+				 fname);
+		}
+	}
+
+bail:
+	return ret;
+}
+
+static errcode_t change_sub_alloc_slot(ocfs2_filesys *fs,
+				       uint64_t blkno,
+				       struct relink_ctxt *ctxt)
+{
+	errcode_t ret;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_extent_block *eb = NULL;
+
+	if (ctxt->inode_type == EXTENT_ALLOC_SYSTEM_INODE) {
+		/* change sub alloc bit in the extent block. */
+		ret = ocfs2_read_extent_block(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+
+		eb = (struct ocfs2_extent_block *)ctxt->ex_buf;
+		eb->h_suballoc_slot = ctxt->new_slot;
+
+		ret = ocfs2_write_extent_block(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+	} else {
+		/* change sub alloc bit in the inode. */
+		ret = ocfs2_read_inode(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+
+		di = (struct ocfs2_dinode *)ctxt->ex_buf;
+		di->i_suballoc_slot = ctxt->new_slot;
+
+		ret = ocfs2_write_inode(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+	}
+bail:
+	return ret;
+}
+
+static errcode_t move_group(ocfs2_filesys *fs,
+			    struct relink_ctxt *ctxt,
+			    struct moved_group *group)
+{
+	errcode_t ret = 0;
+	uint16_t cr_pos;
+	struct ocfs2_group_desc *gd = NULL;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_chain_list *cl = NULL;
+	struct ocfs2_chain_rec *cr = NULL;
+
+	if (!group || !group->blkno || !group->gd_buf)
+		goto bail;
+
+	di = (struct ocfs2_dinode *)ctxt->dst_inode;
+	cl = &di->id2.i_chain;
+
+	/* calculate the insert position. */
+	if (cl->cl_next_free_rec < cl->cl_count)
+		cr_pos = cl->cl_next_free_rec;
+	else {
+		/* Now we have all the chain record filled with some groups.
+		 * so we figure out all the groups we have and then calculate
+		 * the proper place for our insert.
+		 */
+		cr_pos = di->id1.bitmap1.i_total / (cl->cl_cpg * cl->cl_bpc);
+		cr_pos %= cl->cl_count;
+	}
+
+	cr = &cl->cl_recs[cr_pos];
+
+	gd = (struct ocfs2_group_desc *)group->gd_buf;
+	gd->bg_chain = cr_pos;
+	gd->bg_parent_dinode = ctxt->dst_blkno;
+
+	/* we can safely set the bg_next_group here since all the group
+	 * below it in the moving chain is already moved to the new
+	 * position and we don't need to worry about any "lost" groups.
+	 *
+	 * Please see how we build up the group list in move_chain_rec.
+	 */
+	gd->bg_next_group = cr->c_blkno;
+
+	ret = ocfs2_write_group_desc(fs, group->blkno, group->gd_buf);
+	if (ret)
+		goto bail;
+
+	/* modify the chain record and the new files simultaneously. */
+	cr->c_blkno = gd->bg_blkno;
+	cr->c_total += gd->bg_bits;
+	cr->c_free += gd->bg_free_bits_count;
+
+	/* If the chain isn't full, increase the free_rec. */
+	if (cl->cl_next_free_rec != cl->cl_count)
+		cl->cl_next_free_rec++;
+
+	di->id1.bitmap1.i_total += gd->bg_bits;
+	di->id1.bitmap1.i_used += gd->bg_bits;
+	di->id1.bitmap1.i_used -= gd->bg_free_bits_count;
+	di->i_clusters += cl->cl_cpg;
+	di->i_size += cl->cl_cpg * fs->fs_clustersize;
+
+	ret = ocfs2_write_inode(fs, ctxt->dst_blkno, ctxt->dst_inode);
+
+bail:
+	return ret;
+}
+
+/*
+ * This function will iterate the chain_rec and do the following modifications:
+ * 1. record all the groups in the chains.
+ * 2. for every group, do:
+ *    1) modify  Sub Alloc Slot in extent block/inodes accordingly.
+ *    2) change the GROUP_PARENT according to its future owner.
+ *    3) link the group to the new slot files.
+ */
+static errcode_t move_chain_rec(ocfs2_filesys *fs, struct relink_ctxt *ctxt)
+{
+	errcode_t ret = 0;
+	int i, start, end = 1;
+	uint64_t blkno, gd_blkno = ctxt->cr->c_blkno;
+	struct ocfs2_group_desc *gd = NULL;
+	struct moved_group *group = NULL, *group_head = NULL;
+
+	if (gd_blkno == 0)
+		goto bail;
+
+	/* Record the group in the relink_ctxt.
+	 *
+	 * We record the group in a reverse order, so the first group
+	 * will be at the end of the group list. This is useful for
+	 * fsck.ocfs2 when any error happens during the move of groups
+	 * and we can safely move the group also.
+	 */
+	while (gd_blkno) {
+		ret = ocfs2_malloc0(sizeof(struct moved_group), &group);
+		if (ret)
+			goto bail;
+		memset(group, 0, sizeof(struct moved_group));
+
+		/* We insert the group first in case of any further error
+		 * will not cause memory leak.
+		 */
+		group->next = group_head;
+		group_head = group;
+
+		ret = ocfs2_malloc_block(fs->fs_io, &group->gd_buf);
+		if (ret)
+			goto bail;
+
+		ret = ocfs2_read_group_desc(fs, gd_blkno, group->gd_buf);
+		if (ret)
+			goto bail;
+
+		group->blkno = gd_blkno;
+		gd = (struct ocfs2_group_desc *)group->gd_buf;
+		gd_blkno = gd->bg_next_group;
+	}
+
+	group = group_head;
+	while (group) {
+		gd = (struct ocfs2_group_desc *)group->gd_buf;
+
+		end = 1;
+		/* Modify the "Sub Alloc Slot" in the extent block/inodes. */
+		while (end < gd->bg_bits) {
+			start = ocfs2_find_next_bit_set(gd->bg_bitmap,
+							gd->bg_bits, end);
+			if (start >= gd->bg_bits)
+				break;
+
+			end = ocfs2_find_next_bit_clear(gd->bg_bitmap,
+							gd->bg_bits, start);
+
+			for (i = start; i < end; i++) {
+				blkno = group->blkno + i;
+
+				ret = change_sub_alloc_slot(fs, blkno, ctxt);
+				if (ret)
+					goto bail;
+
+			}
+		}
+
+		/* move the group to the new slots. */
+		ret = move_group(fs, ctxt, group);
+		if (ret)
+			goto bail;
+
+		group = group->next;
+	}
+
+bail:
+	group = group_head;
+	while (group) {
+		group_head = group->next;
+		if (group->gd_buf)
+			ocfs2_free(&group->gd_buf);
+		ocfs2_free(&group);
+		group = group_head;
+	}
+	return ret;
+}
+
+static errcode_t relink_system_alloc(ocfs2_filesys *fs,
+				     uint16_t removed_slot,
+				     uint16_t new_slots,
+				     int inode_type)
+{
+	errcode_t ret;
+	int16_t i;
+	uint64_t blkno;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_chain_list *cl = NULL;
+	struct relink_ctxt ctxt;
+	char fname[OCFS2_MAX_FILENAME_LEN];
+
+	memset(&ctxt, 0, sizeof(ctxt));
+
+	ocfs2_sprintf_system_inode_name(fname, OCFS2_MAX_FILENAME_LEN,
+					inode_type, removed_slot);
+	verbosef(VL_APP, "Relinking system allocator \"%s\"\n", fname);
+
+	ret = ocfs2_lookup_system_inode(fs, inode_type,
+					removed_slot, &blkno);
+	if (ret) {
+		verbosef(VL_APP, "%s while looking up the allocator\n",
+			 error_message(ret));
+		goto bail;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &ctxt.src_inode);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while allocating the inode buffer\n",
+			 error_message(ret));
+		goto bail;
+	}
+
+	ret = ocfs2_read_inode(fs, blkno, ctxt.src_inode);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while reading allocator inode %"PRIu64"\n",
+			 error_message(ret), blkno);
+		goto bail;
+	}
+
+	di = (struct ocfs2_dinode *)ctxt.src_inode;
+
+	if (!(di->i_flags & OCFS2_VALID_FL) ||
+	    !(di->i_flags & OCFS2_BITMAP_FL) ||
+	    !(di->i_flags & OCFS2_CHAIN_FL)) {
+		verbosef(VL_APP, "Allocator inode %"PRIu64" is corrupt.\n",
+			 blkno);
+		goto bail;
+	}
+
+	if (di->id1.bitmap1.i_total == 0)
+		goto bail;
+
+	/* Iterate all the groups and modify the group descriptors accordingly. */
+	ret = ocfs2_malloc_block(fs->fs_io, &ctxt.ex_buf);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while allocating an extent block buffer\n",
+			 error_message(ret));
+		goto bail;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &ctxt.dst_inode);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while allocating the destination inode buffer\n",
+			 error_message(ret));
+		goto bail;
+	}
+
+	cl = &di->id2.i_chain;
+	ctxt.inode_type = inode_type;
+
+	/*iterate all the chain record and move them to the new slots. */
+	for (i = cl->cl_next_free_rec - 1; i >= 0; i--) {
+		ctxt.new_slot = i % new_slots;
+		ret = ocfs2_lookup_system_inode(fs, inode_type,
+						ctxt.new_slot,
+						&ctxt.dst_blkno);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while finding the target allocator "
+				 "for slot %d\n",
+				 error_message(ret), ctxt.new_slot);
+			goto bail;
+		}
+
+		ret = ocfs2_read_inode(fs, ctxt.dst_blkno, ctxt.dst_inode);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while reading target allocator inode "
+				 "%"PRIu64"\n",
+				 error_message(ret), ctxt.dst_blkno);
+			goto bail;
+		}
+
+		ctxt.cr = &cl->cl_recs[i];
+
+		ret = move_chain_rec(fs, &ctxt);
+		if (ret) {
+			verbosef(VL_APP,
+				"%s while trying to move a chain record "
+				"to the allocator in slot %d\n",
+				error_message(ret), ctxt.new_slot);
+			goto bail;
+		}
+	}
+
+
+	/* emtpy the original alloc files. */
+	di->id1.bitmap1.i_used = 0;
+	di->id1.bitmap1.i_total = 0;
+	di->i_clusters = 0;
+	di->i_size = 0;
+
+	cl = &di->id2.i_chain;
+	cl->cl_next_free_rec = 0;
+	memset(cl->cl_recs, 0, sizeof(struct ocfs2_chain_rec) * cl->cl_count);
+
+	ret = ocfs2_write_inode(fs, blkno, ctxt.src_inode);
+	if (ret)
+		verbosef(VL_APP,
+			 "%s while writing out the empty allocator inode\n",
+			 error_message(ret));
+
+bail:
+	if (ctxt.ex_buf)
+		ocfs2_free(&ctxt.ex_buf);
+	if (ctxt.dst_inode)
+		ocfs2_free(&ctxt.dst_inode);
+	if (ctxt.src_inode)
+		ocfs2_free(&ctxt.src_inode);
+
+	if (!ret)
+		verbosef(VL_APP, "Successfully relinked allocator \"%s\"\n",
+			 fname);
+	return ret;
+}
+
+/* Empty the content of the specified journal file.
+ * Most of the code is copied from ocfs2_format_journal.
+ */
+static errcode_t empty_journal(ocfs2_filesys *fs,
+			       ocfs2_cached_inode *ci)
+{
+	errcode_t ret = 0;
+	char *buf = NULL;
+	int bs_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+	uint64_t offset = 0;
+	uint32_t wrote, count;
+
+#define BUFLEN	1048576
+	ret = ocfs2_malloc_blocks(fs->fs_io, (BUFLEN >> bs_bits), &buf);
+	if (ret)
+		goto out;
+	memset(buf, 0, BUFLEN);
+
+	count = (uint32_t) ci->ci_inode->i_size;
+	while (count) {
+		ret = ocfs2_file_write(ci, buf, ocfs2_min((uint32_t) BUFLEN, count),
+				       offset, &wrote);
+		if (ret)
+			goto out;
+		offset += wrote;
+		count -= wrote;
+	}
+
+out:
+	return ret;
+}
+
+static errcode_t empty_and_truncate_journal(ocfs2_filesys *fs,
+					    uint16_t removed_slot)
+{
+	errcode_t ret;
+	uint64_t blkno;
+	ocfs2_cached_inode *ci = NULL;
+	char fname[OCFS2_MAX_FILENAME_LEN];
+
+	ocfs2_sprintf_system_inode_name(fname, OCFS2_MAX_FILENAME_LEN,
+					JOURNAL_SYSTEM_INODE,
+					removed_slot);
+	verbosef(VL_APP, "Truncating journal \"%s\"\n", fname);
+
+	ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE,
+					removed_slot, &blkno);
+	if (ret) {
+		verbosef(VL_APP, "%s while looking up journal \"%s\"\n",
+			 error_message(ret), fname);
+		goto bail;
+	}
+
+	ret = ocfs2_read_cached_inode(fs, blkno, &ci);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while reading journal inode %"PRIu64"\n",
+			 error_message(ret), blkno);
+		goto bail;
+	}
+
+	/* we have to empty the journal since it may contains some
+	 * inode blocks which look like valid(except the i_blkno).
+	 * So if this block range is used for future inode alloc
+	 * files, fsck.ocfs2 may raise some error.
+	 */
+	ret = empty_journal(fs, ci);
+	if (ret) {
+		verbosef(VL_APP, "%s while emptying journal \"%s\"\n",
+			 error_message(ret), fname);
+		goto bail;
+	}
+
+	ret = ocfs2_truncate(fs, blkno, 0);
+	if (ret) {
+		verbosef(VL_APP, "%s while truncating journal \"%s\"\n",
+			 error_message(ret), fname);
+		goto bail;
+	}
+
+	verbosef(VL_APP, "Journal \"%s\" truncated\n", fname);
+
+bail:
+	if (ci)
+		ocfs2_free_cached_inode(fs, ci);
+	return ret;
+}
+
+static errcode_t truncate_orphan_dir(ocfs2_filesys *fs,
+				     uint16_t removed_slot)
+{
+	errcode_t ret;
+	uint64_t blkno;
+	char fname[OCFS2_MAX_FILENAME_LEN];
+
+	ocfs2_sprintf_system_inode_name(fname, OCFS2_MAX_FILENAME_LEN,
+					ORPHAN_DIR_SYSTEM_INODE,
+					removed_slot);
+	verbosef(VL_APP, "Truncating orphan dir \"%s\"\n", fname);
+
+	ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE,
+					removed_slot, &blkno);
+	if (!ret) {
+		ret = ocfs2_truncate(fs, blkno, 0);
+		if (!ret)
+			verbosef(VL_APP, "Orphan dir \"%s\" truncated\n",
+				 fname);
+		else
+			verbosef(VL_APP,
+				 "%s while truncating orphan dir \"%s\"\n",
+				 error_message(ret), fname);
+	} else
+		verbosef(VL_APP,
+			 "%s while looking up orphan dir \"%s\"\n",
+			 error_message(ret), fname);
+
+	return ret;
+}
+
+static int remove_slot_iterate(struct ocfs2_dir_entry *dirent, int offset,
+			       int blocksize, char *buf, void *priv_data)
+{
+	struct remove_slot_ctxt *ctxt =
+		(struct remove_slot_ctxt *)priv_data;
+	int taillen, ret_flags = 0;
+	errcode_t ret;
+	char dname[OCFS2_MAX_FILENAME_LEN];
+	char tail[OCFS2_MAX_FILENAME_LEN];
+
+	sprintf(tail, ":%04d", ctxt->removed_slot);
+	taillen = strlen(tail);
+
+	strncpy(dname, dirent->name, dirent->name_len);
+	dname[dirent->name_len] = '\0';
+
+	if (!strcmp(dname + (dirent->name_len - taillen), tail)) {
+		verbosef(VL_APP, "Unlinking system file \"%s\"\n",
+			 dname);
+		ret = ocfs2_delete_inode(ctxt->fs, dirent->inode);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while unlinking system file \"%s\"\n",
+				 error_message(ret), dname);
+			ret_flags |= OCFS2_DIRENT_ERROR;
+			ctxt->errcode = ret;
+		} else {
+			verbosef(VL_APP,
+				 "Successfully unlinked system file "
+				 "\"%s\"\n",
+				 dname);
+			dirent->inode = 0;
+			ret_flags |= OCFS2_DIRENT_CHANGED;
+		}
+	}
+
+	return ret_flags;
+}
+
+static errcode_t remove_slot_entry(ocfs2_filesys *fs, uint16_t removed_slot)
+{
+	struct remove_slot_ctxt ctxt = {
+		.fs = fs,
+		.removed_slot = removed_slot,
+		.errcode = 0
+	};
+
+	ocfs2_dir_iterate(fs, fs->fs_sysdir_blkno,
+			  OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
+			  remove_slot_iterate, &ctxt);
+
+	return ctxt.errcode;
+}
+
+static errcode_t decrease_link_count(ocfs2_filesys *fs, uint16_t blkno)
+{
+	errcode_t ret;
+	char *buf = NULL;
+	struct ocfs2_dinode *di  = NULL;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_read_inode(fs, blkno, buf);
+	if (ret)
+		goto bail;
+
+	di = (struct ocfs2_dinode *)buf;
+
+	if (di->i_links_count > 0)
+		di->i_links_count--;
+	else {
+		ret = OCFS2_ET_INODE_NOT_VALID;
+		goto bail;
+	}
+
+	ret = ocfs2_write_inode(fs, blkno, buf);
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+static int orphan_iterate(struct ocfs2_dir_entry *dirent, int offset,
+			  int blocksize, char *buf, void *priv_data)
+{
+	int *has_orphan = (int *)priv_data;
+
+	*has_orphan = 1;
+
+	/* we have found some file/dir in the orphan_dir,
+	 * so there is no need to go on the iteration.
+	 */
+	return OCFS2_DIRENT_ABORT;
+}
+
+static errcode_t orphan_dir_check(ocfs2_filesys *fs,
+				  uint16_t new_slots)
+{
+	errcode_t ret = 0;
+	uint64_t blkno;
+	int i, has_orphan = 0;
+	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	for (i = new_slots ; i < max_slots; ++i) {
+		ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE,
+						i, &blkno);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while looking up orphan dir for "
+				 "slot %u during orphan dir check\n",
+				 error_message(ret), i);
+			break;
+		}
+
+		ret = ocfs2_dir_iterate(fs, blkno,
+					OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
+					orphan_iterate, &has_orphan);
+
+		if (has_orphan) {
+			ret = TUNEFS_ET_ORPHAN_DIR_NOT_EMPTY;
+			verbosef(VL_APP,
+				 "Entries found in orphan dir for slot %u\n",
+				 i);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static errcode_t local_alloc_check(ocfs2_filesys *fs,
+				   uint16_t new_slots)
+{
+	errcode_t ret = 0;
+	uint16_t i;
+	uint64_t blkno;
+	char *buf = NULL;
+	struct ocfs2_dinode *di = NULL;
+
+	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while allocating inode buffer for local "
+			 "alloc check\n",
+			 error_message(ret));
+		goto bail;
+	}
+
+	for (i = new_slots ; i < max_slots; ++i) {
+		ret = ocfs2_lookup_system_inode(fs, LOCAL_ALLOC_SYSTEM_INODE,
+						i, &blkno);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while looking up local alloc for "
+				 "slot %u during local alloc check\n",
+				 error_message(ret), i);
+			break;
+		}
+
+		ret = ocfs2_read_inode(fs, blkno, buf);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while reading inode %"PRIu64" "
+				 "during local alloc check\n",
+				 error_message(ret), blkno);
+			break;
+		}
+
+		di = (struct ocfs2_dinode *)buf;
+		if (di->id1.bitmap1.i_total > 0) {
+			ret = TUNEFS_ET_LOCAL_ALLOC_NOT_EMPTY;
+			verbosef(VL_APP,
+				 "Local alloc for slot %u is not empty\n",
+				 i);
+			break;
+		}
+	}
+
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+static errcode_t truncate_log_check(ocfs2_filesys *fs,
+				    uint16_t new_slots)
+{
+	errcode_t ret = 0;
+	uint16_t i;
+	uint64_t blkno;
+	char *buf = NULL;
+	struct ocfs2_dinode *di = NULL;
+
+	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret) {
+		verbosef(VL_APP,
+			 "%s while allocating inode buffer for "
+			 "truncate log check\n",
+			 error_message(ret));
+		goto bail;
+	}
+
+	for (i = new_slots; i < max_slots; ++i) {
+		ret = ocfs2_lookup_system_inode(fs, TRUNCATE_LOG_SYSTEM_INODE,
+						i, &blkno);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while looking up truncate log for "
+				 "slot %u during truncate log check\n",
+				 error_message(ret), i);
+			goto bail;
+		}
+
+		ret = ocfs2_read_inode(fs, blkno, buf);
+		if (ret) {
+			verbosef(VL_APP,
+				 "%s while reading inode %"PRIu64" "
+				 "during truncate log check\n",
+				 error_message(ret), blkno);
+			goto bail;
+		}
+
+		di = (struct ocfs2_dinode *)buf;
+
+		if (di->id2.i_dealloc.tl_used > 0) {
+			ret = TUNEFS_ET_TRUNCATE_LOG_NOT_EMPTY;
+			verbosef(VL_APP,
+				 "Truncate log for slot %u is not empty\n",
+				 i);
+			goto bail;
+		}
+	}
+
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+static errcode_t remove_slot_check(ocfs2_filesys *fs, int num_slots)
+{
+	errcode_t ret;
+
+	ret = orphan_dir_check(fs, num_slots);
+	if (!ret)
+		ret = local_alloc_check(fs, num_slots);
+	if (!ret)
+		ret = truncate_log_check(fs, num_slots);
+
+	return ret;
+}
+
+static errcode_t remove_slots(ocfs2_filesys *fs, int num_slots)
+{
+	errcode_t ret;
+	uint16_t old_num = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+	uint16_t removed_slot = old_num - 1;
+
+	ret = remove_slot_check(fs, num_slots);
+	if (ret)
+		goto bail;
+
+	/* This is cleared up in update_slot_count() if everything works */
+	ret = tunefs_set_in_progress(fs, OCFS2_TUNEFS_INPROG_REMOVE_SLOT);
+	if (ret)
+		goto bail;
+
+	/* we will remove the slots once at a time so that fsck.ocfs2 can work
+	 * well and we can continue our work easily in case of any panic.
+	 */
+	while (removed_slot >= num_slots) {
+		/* Link the specified extent alloc file to others. */
+		ret = relink_system_alloc(fs, removed_slot, num_slots,
+					  EXTENT_ALLOC_SYSTEM_INODE);
+		if (ret)
+			goto bail;
+
+		/* Link the specified inode alloc file to others. */
+		ret = relink_system_alloc(fs, removed_slot, num_slots,
+					  INODE_ALLOC_SYSTEM_INODE);
+		if (ret)
+			goto bail;
+
+		/* Truncate the orphan dir to release its clusters
+		 * to the global bitmap.
+		 */
+		ret = truncate_orphan_dir(fs, removed_slot);
+		if (ret)
+			goto bail;
+
+		/* empty the content of journal and truncate its clusters. */
+		ret = empty_and_truncate_journal(fs, removed_slot);
+		if (ret)
+			goto bail;
+
+		/* Now, we decrease the max_slots first and then remove the
+		 * slots for the reason that:
+		 *
+		 * 1. ocfs2_lock_down_clusters needs to lock all the journal
+		 * files. so if we delete the journal entry first and fail
+		 * to decrease the max_slots, the whole cluster can't be
+		 * locked any more due to the loss of journals.
+		 *
+		 * 2. Now all the resources except the inodes are freed
+		 * so it is safe to decrease the slots first, and if any
+		 * panic happens after we decrease the slots, we can ignore
+		 * them, and actually if we want to increase the slot in the
+		 * future, we can reuse these inodes.
+		 */
+
+		/* The slot number is updated in the super block.*/
+		OCFS2_RAW_SB(fs->fs_super)->s_max_slots--;
+		ret = ocfs2_write_primary_super(fs);
+		if (ret)
+			goto bail;
+
+		/* The extra system dir entries should be removed. */
+		ret = remove_slot_entry(fs, removed_slot);
+		if (ret)
+			goto bail;
+
+		/* Decrease the i_links_count in system file directory
+		 * since the orphan_dir is removed.
+		 */
+		ret = decrease_link_count(fs, fs->fs_sysdir_blkno);
+		if (ret)
+			goto bail;
+
+		removed_slot--;
+	}
+
+bail:
+	return ret;
+}
+
+static errcode_t update_slot_count(ocfs2_filesys *fs, int num_slots)
+{
+	errcode_t ret = 0;
+	int orig_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	if (num_slots == orig_slots) {
+		verbosef(VL_APP,
+			 "Device \"%s\" already has %d node slots; "
+			 "nothing to do\n",
+			 fs->fs_devname, num_slots);
+		goto out;
+	}
+
+	if (!tunefs_interact("Change the number of node slots on device "
+			     "\"%s\" from %d to %d? ",
+			     fs->fs_devname, orig_slots, num_slots))
+		goto out;
+
+	tunefs_block_signals();
+	if (num_slots > orig_slots)
+		ret = add_slots(fs, num_slots);
+	else
+		ret = remove_slots(fs, num_slots);
+	if (ret)
+		goto out_unblock;
+
+	OCFS2_RAW_SB(fs->fs_super)->s_max_slots = num_slots;
+
+	if (num_slots > orig_slots) {
+		/* Grow the new journals to match the first slot */
+		verbosef(VL_APP,
+			 "Allocating space for the new journals\n");
+		ret = tunefs_set_journal_size(fs, 0);
+		if (!ret)
+			verbosef(VL_APP, "Journal space allocated\n");
+		else {
+			verbosef(VL_APP,
+				 "%s while trying to size the new journals\n",
+				 error_message(ret));
+			goto out_unblock;
+		}
+	}
+
+	ret = ocfs2_format_slot_map(fs);
+	if (ret)
+		goto out_unblock;
+
+	if (num_slots < orig_slots) {
+		ret = tunefs_clear_in_progress(fs,
+					       OCFS2_TUNEFS_INPROG_REMOVE_SLOT);
+		if (ret)
+			goto out_unblock;
+	}
+
+	ret = ocfs2_write_super(fs);
+
+out_unblock:
+	tunefs_unblock_signals();
+
+out:
+	return ret;
+}
+
+static int set_slot_count_parse_option(struct tunefs_operation *op,
+				       char *arg)
+{
+	int rc = 1;
+	char *ptr = NULL;
+	long num_slots;
+
+	if (!arg) {
+		errorf("Number of slots not specified\n");
+		goto out;
+	}
+
+	num_slots = strtol(arg, &ptr, 10);
+	if ((num_slots == LONG_MIN) || (num_slots == LONG_MAX)) {
+		errorf("Number of slots is out of range: %s\n", arg);
+		goto out;
+	}
+	if (*ptr != '\0') {
+		errorf("Invalid number: \"%s\"\n", arg);
+		goto out;
+	}
+	if (num_slots < 1) {
+		errorf("At least one slot required\n");
+		goto out;
+	}
+	if (num_slots > INT_MAX) {
+		errorf("Number of slots is out of range: %s\n", arg);
+		goto out;
+	}
+	/*
+	 * We'll check the maximum number of slots after we've opened
+	 * the filesystem and determined the slot map format.
+	 */
+
+	op->to_private = (void *)num_slots;
+	rc = 0;
+
+out:
+	return rc;
+}
+
+static int set_slot_count_run(struct tunefs_operation *op,
+			      ocfs2_filesys *fs, int flags)
+{
+	errcode_t err;
+	int rc = 0;
+	int num_slots = (int)op->to_private;
+
+	err = update_slot_count(fs, num_slots);
+	if (err) {
+		tcom_err(err,
+			 "- unable to update the number of slots on device "
+			 "\"%s\"",
+			 fs->fs_devname);
+		rc = 1;
+	}
+
+	return rc;
+}
+
+
+DEFINE_TUNEFS_OP(set_slot_count,
+		 "Usage: op_set_slot_count [opts] <device> "
+		 "<number_of_slots>\n",
+		 TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION,
+		 set_slot_count_parse_option,
+		 set_slot_count_run);
+
+#ifdef DEBUG_EXE
+int main(int argc, char *argv[])
+{
+	return tunefs_op_main(argc, argv, &set_slot_count_op);
+}
+#endif
-- 
1.5.6.3




More information about the Ocfs2-tools-devel mailing list