[Ocfs2-tools-commits] taoma commits r1384 - in trunk: debugfs.ocfs2 debugfs.ocfs2/include fsck.ocfs2 fsck.ocfs2/include libocfs2 libocfs2/include sizetest tunefs.ocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Aug 15 09:05:57 PDT 2007


Author: taoma
Date: 2007-08-15 09:05:43 -0700 (Wed, 15 Aug 2007)
New Revision: 1384

Added:
   trunk/tunefs.ocfs2/remove_slot.c
Modified:
   trunk/debugfs.ocfs2/dump.c
   trunk/debugfs.ocfs2/include/utils.h
   trunk/debugfs.ocfs2/utils.c
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
   trunk/fsck.ocfs2/include/journal.h
   trunk/fsck.ocfs2/journal.c
   trunk/fsck.ocfs2/pass0.c
   trunk/fsck.ocfs2/pass4.c
   trunk/libocfs2/include/ocfs2.h
   trunk/libocfs2/include/ocfs2_fs.h
   trunk/libocfs2/openfs.c
   trunk/sizetest/sizes.txt
   trunk/sizetest/sizetest.c
   trunk/tunefs.ocfs2/Makefile
   trunk/tunefs.ocfs2/tunefs.c
   trunk/tunefs.ocfs2/tunefs.h
Log:
Add remove-slot features in tunefs.ocfs2.

tunefs.ocfs2 has been able to increase the slot for a long time.
Now the support for removing slots is also added. There are also some changes
in debugfs.ocfs2 and fsck.ocfs2 to be fit for this new feature.

Signed-off-by: mfasheh
Signed-off-by: smushran

Modified: trunk/debugfs.ocfs2/dump.c
===================================================================
--- trunk/debugfs.ocfs2/dump.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/debugfs.ocfs2/dump.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -39,10 +39,12 @@
 	GString *compat = NULL;
 	GString *incompat = NULL;
 	GString *rocompat = NULL;
+	GString *tunefs_flag = NULL;
 
 	compat = g_string_new(NULL);
 	incompat = g_string_new(NULL);
 	rocompat = g_string_new(NULL);
+	tunefs_flag = g_string_new(NULL);
 
 	fprintf(out, "\tRevision: %u.%u\n", sb->s_major_rev_level, sb->s_minor_rev_level);
 	fprintf(out, "\tMount Count: %u   Max Mount Count: %u\n", sb->s_mnt_count,
@@ -57,12 +59,16 @@
 
 	get_compat_flag(sb->s_feature_compat, compat);
 	get_incompat_flag(sb->s_feature_incompat, incompat);
+	get_tunefs_flag(sb->s_feature_incompat,
+			sb->s_tunefs_flag, tunefs_flag);
 	get_rocompat_flag(sb->s_feature_ro_compat, rocompat);
 
 	fprintf(out, "\tFeature Compat: %u %s\n", sb->s_feature_compat,
 		compat->str);
 	fprintf(out, "\tFeature Incompat: %u %s\n", sb->s_feature_incompat,
 		incompat->str);
+	fprintf(out, "\tTunefs Incomplete: %u %s\n", sb->s_tunefs_flag,
+		tunefs_flag->str);
 	fprintf(out, "\tFeature RO compat: %u %s\n", sb->s_feature_ro_compat,
 		rocompat->str);
 

Modified: trunk/debugfs.ocfs2/include/utils.h
===================================================================
--- trunk/debugfs.ocfs2/include/utils.h	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/debugfs.ocfs2/include/utils.h	2007-08-15 16:05:43 UTC (rev 1384)
@@ -34,6 +34,7 @@
 } rdump_opts;
 
 void get_incompat_flag(uint32_t flag, GString *str);
+void get_tunefs_flag(uint32_t incompat_flag, uint16_t flag, GString *str);
 void get_compat_flag(uint32_t flag, GString *str);
 void get_rocompat_flag(uint32_t flag, GString *str);
 void get_vote_flag (uint32_t flag, GString *str);

Modified: trunk/debugfs.ocfs2/utils.c
===================================================================
--- trunk/debugfs.ocfs2/utils.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/debugfs.ocfs2/utils.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -42,10 +42,15 @@
 	if (flag & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
 		g_string_append(str, "Sparse ");
 
+	if (flag & OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG) {
+		g_string_append(str, "AbortedTunefs ");
+	}
+
 	if (flag & ~(OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV |
 		     OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG |
 		     OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT |
-		     OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC))
+		     OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC |
+		     OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG))
 		g_string_append(str, "Unknown ");
 
 	if (!str->len)
@@ -54,6 +59,22 @@
 	return;
 }
 
+void get_tunefs_flag(uint32_t incompat_flag, uint16_t flag, GString *str)
+{
+	if (!(incompat_flag & OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG)) {
+		g_string_append(str, "None");
+		return;
+	}
+
+	if (flag & OCFS2_TUNEFS_INPROG_REMOVE_SLOT)
+		g_string_append(str, "RemoveSlot ");
+
+	if (flag & ~OCFS2_TUNEFS_INPROG_REMOVE_SLOT)
+		g_string_append(str, "Unknown ");
+
+	return;
+}
+
 void get_compat_flag(uint32_t flag, GString *str)
 {
 	if (flag & OCFS2_FEATURE_COMPAT_BACKUP_SB)

Modified: trunk/fsck.ocfs2/fsck.c
===================================================================
--- trunk/fsck.ocfs2/fsck.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/fsck.ocfs2/fsck.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -182,6 +182,12 @@
 	if (sb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG)
 		sb->s_feature_incompat &= ~OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG;
 
+	if (sb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG) {
+		sb->s_feature_incompat &=
+				 ~OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG;
+		sb->s_tunefs_flag = 0;
+	}
+
 	if (ost->ost_num_clusters)
 		di->i_clusters = ost->ost_num_clusters;
 
@@ -262,6 +268,9 @@
 	else if ((OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_feature_incompat &
 		  OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG))
 		strcpy(reason, "incomplete volume resize detected");
+	else if ((OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_feature_incompat &
+		  OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG))
+		strcpy(reason, "incomplete tunefs operation detected");
 	else if (sb->s_state & OCFS2_ERROR_FS)
 		strcpy(reason, "contains a file system with errors");
 	else if (sb->s_max_mnt_count > 0 &&
@@ -655,6 +664,15 @@
 	printf("  max slots:          %u\n\n", 
 	       OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_slots);
 
+	if (open_flags & OCFS2_FLAG_RW) {
+		ret = o2fsck_check_journals(ost);
+		if (ret) {
+			printf("fsck saw unrecoverable errors in the journal "
+				"files and will not continue.\n");
+			goto unlock;
+		}
+	}
+
 	ret = maybe_replay_journals(ost, filename, open_flags, blkno, blksize);
 	if (ret) {
 		printf("fsck encountered unrecoverable errors while "

Modified: trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
===================================================================
--- trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2007-08-15 16:05:43 UTC (rev 1384)
@@ -150,6 +150,13 @@
 Answering yes updates the group descriptor's parent pointer to match the inode
 it resides in.
 
+.SS "GROUP_DUPLICATE"
+Group descriptors contain a pointer to the allocator inode which contains
+the chain they belong to.  A group descriptor was found in two allocator
+inodes so it may be duplicated.
+
+Answering yes removes the group descriptor from current allocator inode.
+
 .SS "GROUP_BLKNO"
 Group descriptors have a field which records their block location on disk.  A
 group descriptor was found at a given location but is recorded as being
@@ -657,6 +664,21 @@
 only disable the copying of the backup superblock and will not effect the
 remaining \fIfsck.ocfs2\fR processing.
 
+.SS "ORPHAN_DIR_MISSING"
+While files are being deleted they are placed in an internal directory, named
+orphan directory. If an orphan directory does't exist, an OCFS2 volume can't
+be mounted successfully. Fsck has found the orphan directory is missing and
+would like to create it for future use.
+
+Answering yes creates the orphan directory in the system directory.
+
+.SS "JOURNAL_FILE_INVALID"
+OCFS2 uses JDB for journalling and some journal files exist in the
+system directory. Fsck has found some journal file is whther empty or has
+bad journal superblock and would like to fix it for future use.
+
+Answering yes reformats the journal file in the system directory.
+
 .SH "SEE ALSO"
 .BR fsck.ocfs2(8)
 

Modified: trunk/fsck.ocfs2/include/journal.h
===================================================================
--- trunk/fsck.ocfs2/include/journal.h	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/fsck.ocfs2/include/journal.h	2007-08-15 16:05:43 UTC (rev 1384)
@@ -28,6 +28,7 @@
 
 errcode_t o2fsck_replay_journals(ocfs2_filesys *fs, int *replayed);
 errcode_t o2fsck_should_replay_journals(ocfs2_filesys *fs, int *should);
+errcode_t o2fsck_check_journals(o2fsck_state *ost);
 
 #endif /* __O2FSCK_JOURNAL_H__ */
 

Modified: trunk/fsck.ocfs2/journal.c
===================================================================
--- trunk/fsck.ocfs2/journal.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/fsck.ocfs2/journal.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -735,3 +735,87 @@
 
 	return ret;
 }
+
+static errcode_t check_journal_super(ocfs2_filesys *fs,
+				     ocfs2_cached_inode *ci)
+{
+	errcode_t ret;
+	int contig;
+	uint64_t blkno;
+	char *buf = NULL;
+
+	ret = ocfs2_malloc_blocks(fs->fs_io, 1, &buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_extent_map_init(fs, ci);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_extent_map_get_blocks(ci, 0, 1, &blkno, &contig);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_read_journal_superblock(fs, blkno, buf);
+out:
+	return ret;
+}
+
+/* When we remove slot in tunefs.ocfs2, there may be some panic and
+ * we may corrupt some journal files, so we have to check whether the
+ * journal file is corrupted and recreate it.
+ */
+errcode_t o2fsck_check_journals(o2fsck_state *ost)
+{
+	errcode_t ret = 0;
+	uint64_t blkno;
+	uint32_t num_clusters = 0;
+	ocfs2_filesys *fs = ost->ost_fs;
+	char fname[OCFS2_MAX_FILENAME_LEN];
+	uint16_t i, max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+	ocfs2_cached_inode *ci = NULL;
+
+	for (i = 0; i < max_slots; i++) {
+		ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE, i,
+						&blkno);
+		if (ret)
+			goto out;
+
+		ret = ocfs2_read_cached_inode(fs, blkno, &ci);
+		if (ret)
+			goto out;
+
+		if (ci->ci_inode->i_clusters > 0) {
+			/* check whether the file contains valid super block. */
+			ret = check_journal_super(fs, ci);
+			if (!ret) {
+				/* record the valid cluster size. */
+				num_clusters = ci->ci_inode->i_clusters;
+				continue;
+			}
+		}
+
+		if (num_clusters == 0) {
+			/* none of the journal is valid, servere errors. */
+			ret = OCFS2_ET_JOURNAL_TOO_SMALL;
+			goto out;
+		}
+
+		sprintf(fname,
+			ocfs2_system_inodes[JOURNAL_SYSTEM_INODE].si_name, i);
+		if (!prompt(ost, PY, PR_JOURNAL_FILE_INVALID,
+			    "journal file %s is invalid, regenerate it?",
+			    fname))
+			continue;
+
+		ret = ocfs2_make_journal(fs, blkno, num_clusters);
+		if (ret)
+			goto out;
+	}
+
+out:
+	if (ci)
+		ocfs2_free_cached_inode(fs, ci);
+	return ret;
+}
+

Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/fsck.ocfs2/pass0.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -91,11 +91,76 @@
 	}
 }
 
+/* check whether the group really exists in the specified chain of
+ * the specified allocator file.
+ */
+static errcode_t check_group_parent(ocfs2_filesys *fs, uint64_t group,
+				    uint64_t ino, uint16_t chain,int *exist)
+{
+	errcode_t ret;
+	uint64_t gd_blkno;
+	char *buf = NULL, *gd_buf = NULL;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_group_desc *gd = NULL;
+	struct ocfs2_chain_rec *cr = NULL;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_read_inode(fs, ino, buf);
+	if (ret) {
+		goto out;
+	}
+
+	di = (struct ocfs2_dinode *)buf;
+
+	if (!(di->i_flags & OCFS2_VALID_FL) ||
+	    !(di->i_flags & OCFS2_BITMAP_FL) ||
+	    !(di->i_flags & OCFS2_CHAIN_FL))
+		goto out;
+
+	if (di->id1.bitmap1.i_total == 0)
+		goto out;
+
+	if (di->id2.i_chain.cl_next_free_rec <= chain)
+		goto out;
+
+	cr = &di->id2.i_chain.cl_recs[chain];
+
+	ret = ocfs2_malloc_block(fs->fs_io, &gd_buf);
+	if (ret)
+		goto out;
+
+	gd_blkno = cr->c_blkno;
+	while (gd_blkno) {
+		if (gd_blkno ==  group) {
+			*exist = 1;
+			break;
+		}
+
+		ret = ocfs2_read_group_desc(fs, gd_blkno, gd_buf);
+		if (ret)
+			goto out;
+		gd = (struct ocfs2_group_desc *)gd_buf;
+
+		gd_blkno = gd->bg_next_group;
+	}
+
+out:
+	if (gd_buf)
+		ocfs2_free(&gd_buf);
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
 static errcode_t repair_group_desc(o2fsck_state *ost,
 				   struct ocfs2_dinode *di,
 				   struct chain_state *cs,
 				   struct ocfs2_group_desc *bg,
-				   uint64_t blkno)
+				   uint64_t blkno,
+				   int *clear_ref)
 {
 	errcode_t ret = 0;
 	int changed = 0;
@@ -121,14 +186,36 @@
 	/* XXX maybe for advanced pain we could check to see if these 
 	 * kinds of descs have valid generations for the inodes they
 	 * reference */
-	if ((bg->bg_parent_dinode != di->i_blkno) &&
-	    prompt(ost, PY, PR_GROUP_PARENT,
+	if ((bg->bg_parent_dinode != di->i_blkno)) {
+		int exist = 0;
+		ret = check_group_parent(ost->ost_fs, bg->bg_blkno,
+					 bg->bg_parent_dinode,
+					 bg->bg_chain, &exist);
+
+		/* If we finds that the group really exists in the specified
+		 * chain of the specified alloc inode, then this may be a
+		 * duplicated group and we may need to remove it from current
+		 * inode.
+		 */
+		if (!ret && exist && prompt(ost, PY, PR_GROUP_DUPLICATE,
 		   "Group descriptor at block %"PRIu64" is "
 		   "referenced by inode %"PRIu64" but thinks its parent inode "
+		   "is %"PRIu64" and we can also see it in that inode."
+		    " So it may be duplicated.  Remove it from this inode?",
+		    blkno, di->i_blkno, bg->bg_parent_dinode)) {
+			*clear_ref = 1;
+			goto out;
+		}
+
+		if (prompt(ost, PY, PR_GROUP_PARENT,
+		   "Group descriptor at block %"PRIu64" is "
+		   "referenced by inode %"PRIu64" but thinks its parent inode "
 		   "is %"PRIu64".  Fix the descriptor's parent inode?", blkno,
 		   di->i_blkno, bg->bg_parent_dinode)) {
-		bg->bg_parent_dinode = di->i_blkno;
-		changed = 1;
+			bg->bg_parent_dinode = di->i_blkno;
+			changed = 1;
+		}
+
 	}
 
 	if ((bg->bg_blkno != blkno) &&
@@ -179,7 +266,7 @@
 
 	cs->cs_total_bits += bg->bg_bits;
 	cs->cs_free_bits += bg->bg_free_bits_count;
-
+out:
 	return ret;
 }
 
@@ -474,10 +561,19 @@
 			break;
 		}
 
-		ret = repair_group_desc(ost, di, cs, bg2, blkno);
+		ret = repair_group_desc(ost, di, cs, bg2, blkno, &clear_ref);
 		if (ret)
 			goto out;
 
+		/* we found a duplicate chain, so we need to clear them from
+		 * current chain.
+		 *
+		 * Please note that all the groups below this group will also
+		 * be removed from this chain because this is the mechanism
+		 * of removing slots in tunefs.ocfs2.
+		 */
+		if (clear_ref)
+			break;
 
 		/* the loop will now start by reading bg1->next_group */
 		memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);

Modified: trunk/fsck.ocfs2/pass4.c
===================================================================
--- trunk/fsck.ocfs2/pass4.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/fsck.ocfs2/pass4.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -152,6 +152,46 @@
 	return ret_flags;
 }
 
+static errcode_t create_orphan_dir(o2fsck_state *ost, char *fname)
+{
+	errcode_t ret;
+	uint64_t blkno;
+	ocfs2_filesys *fs = ost->ost_fs;
+
+	/* create inode for system file */
+	ret = ocfs2_new_system_inode(fs, &blkno,
+			ocfs2_system_inodes[ORPHAN_DIR_SYSTEM_INODE].si_mode,
+			ocfs2_system_inodes[ORPHAN_DIR_SYSTEM_INODE].si_iflags);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_expand_dir(fs, blkno, fs->fs_sysdir_blkno);
+	if (ret)
+		goto bail;
+
+	/* Add the inode to the system dir */
+	ret = ocfs2_link(fs, fs->fs_sysdir_blkno, fname, blkno,
+			 OCFS2_FT_DIR);
+	if (ret == OCFS2_ET_DIR_NO_SPACE) {
+		ret = ocfs2_expand_dir(fs, fs->fs_sysdir_blkno,
+				       fs->fs_sysdir_blkno);
+		if (!ret)
+			ret = ocfs2_link(fs, fs->fs_sysdir_blkno,
+					 fname, blkno, OCFS2_FT_DIR);
+	}
+
+	if (ret)
+		goto bail;
+
+	/* we have created an orphan dir under system dir and updated the disk,
+	 * so we have to update the refs in ost accordingly.
+	 */
+	o2fsck_icount_delta(ost->ost_icount_refs, fs->fs_sysdir_blkno, 1);
+	o2fsck_icount_delta(ost->ost_icount_in_inodes, fs->fs_sysdir_blkno, 1);
+bail:
+	return ret;
+}
+
 static errcode_t replay_orphan_dir(o2fsck_state *ost)
 {
 	errcode_t ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
@@ -171,9 +211,26 @@
 
 		ret = ocfs2_lookup(ost->ost_fs, ost->ost_fs->fs_sysdir_blkno,
 				   name, bytes, NULL, &ino);
-		if (ret)
-			goto out;
+		if (ret) {
+			if (ret != OCFS2_ET_FILE_NOT_FOUND)
+				goto out;
 
+			/* orphan dir is missing, it may be caused by an
+			 * unsuccessful removing slots in tunefs.ocfs2.
+			 * so create it.
+			 */
+	   		if (prompt(ost, PY, PR_ORPHAN_DIR_MISSING,
+				   "%s is missing in system directory. "
+				   "Create it?", name)) {
+				ret = create_orphan_dir(ost, name);
+				if (ret) {
+					com_err(whoami, ret, "while creating"
+						"orphan directory %s", name);
+					continue;
+				}
+			}
+		}
+
 		ret = ocfs2_dir_iterate(ost->ost_fs, ino,
 					OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
 					replay_orphan_iterate, ost);

Modified: trunk/libocfs2/include/ocfs2.h
===================================================================
--- trunk/libocfs2/include/ocfs2.h	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/libocfs2/include/ocfs2.h	2007-08-15 16:05:43 UTC (rev 1384)
@@ -76,12 +76,15 @@
 #define OCFS2_LIB_FEATURE_INCOMPAT_SUPP		(OCFS2_FEATURE_INCOMPAT_SUPP | \
 						 OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV | \
 						 OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG | \
-						 OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
+						 OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT   | \
+						 OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG)
 
 #define OCFS2_LIB_FEATURE_RO_COMPAT_SUPP	OCFS2_FEATURE_RO_COMPAT_SUPP
 
 #define OCFS2_LIB_FEATURE_COMPAT_SUPP		OCFS2_FEATURE_COMPAT_SUPP
 
+#define OCFS2_LIB_ABORTED_TUNEFS_SUPP		OCFS2_TUNEFS_INPROG_REMOVE_SLOT
+
 /* define OCFS2_SB for ocfs2-tools */
 #define OCFS2_SB(sb)	(sb)
 

Modified: trunk/libocfs2/include/ocfs2_fs.h
===================================================================
--- trunk/libocfs2/include/ocfs2_fs.h	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/libocfs2/include/ocfs2_fs.h	2007-08-15 16:05:43 UTC (rev 1384)
@@ -109,6 +109,15 @@
 /* Support for sparse allocation in b-trees */
 #define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC	0x0010
 
+/* tunefs sets this incompat flag before starting the operation and clears it
+ * at the end. This flag protects users from inadvertently mounting the fs
+ * after an aborted run without fsck-ing.
+ */
+#define OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG	0x0020
+
+/* tunefs set this in s_tunefs_flag when removing slots. */
+#define OCFS2_TUNEFS_INPROG_REMOVE_SLOT		0x0001
+
 /*
  * backup superblock flag is used to indicate that this volume
  * has backup superblocks.
@@ -393,8 +402,8 @@
 	__le32 s_clustersize_bits;	/* Clustersize for this fs */
 /*40*/	__le16 s_max_slots;		/* Max number of simultaneous mounts
 					   before tunefs required */
-	__le16 s_reserved1;
-	__le32 s_reserved2;
+	__le16 s_tunefs_flag;
+	__le32 s_reserved1;
 	__le64 s_first_cluster_group;	/* Block offset of 1st cluster
 					 * group header */
 /*50*/	__u8  s_label[OCFS2_MAX_VOL_LABEL_LEN];	/* Label for mounting, etc. */

Modified: trunk/libocfs2/openfs.c
===================================================================
--- trunk/libocfs2/openfs.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/libocfs2/openfs.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -264,6 +264,16 @@
 		if (OCFS2_RAW_SB(fs->fs_super)->s_feature_compat &
 		    ~OCFS2_LIB_FEATURE_COMPAT_SUPP)
 			    goto out;
+
+		/* We need to check s_tunefs_flag also to make sure
+		 * fsck.ocfs2 won't try to clean up an aborted tunefs
+		 * that it doesn't know.
+		 */
+		if (OCFS2_HAS_INCOMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super),
+					OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG) &&
+		    (OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag &
+		     ~OCFS2_LIB_ABORTED_TUNEFS_SUPP))
+			goto out;
 	}
 
 	ret = OCFS2_ET_UNSUPP_FEATURE;

Modified: trunk/sizetest/sizes.txt
===================================================================
--- trunk/sizetest/sizes.txt	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/sizetest/sizes.txt	2007-08-15 16:05:43 UTC (rev 1384)
@@ -58,8 +58,8 @@
 0x038	s_blocksize_bits    	+0x04
 0x03C	s_clustersize_bits  	+0x04
 0x040	s_max_slots         	+0x02
-0x042	s_reserved1         	+0x02
-0x044	s_reserved2         	+0x04
+0x042	s_tunefs_flag         	+0x02
+0x044	s_reserved1         	+0x04
 0x048	s_first_cluster_group	+0x08
 0x050	s_label             	+0x40
 0x090	s_uuid              	+0x10

Modified: trunk/sizetest/sizetest.c
===================================================================
--- trunk/sizetest/sizetest.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/sizetest/sizetest.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -137,8 +137,8 @@
 	SHOW_OFFSET(struct ocfs2_super_block, s_blocksize_bits);
 	SHOW_OFFSET(struct ocfs2_super_block, s_clustersize_bits);
 	SHOW_OFFSET(struct ocfs2_super_block, s_max_slots);
+	SHOW_OFFSET(struct ocfs2_super_block, s_tunefs_flag);
 	SHOW_OFFSET(struct ocfs2_super_block, s_reserved1);
-	SHOW_OFFSET(struct ocfs2_super_block, s_reserved2);
 	SHOW_OFFSET(struct ocfs2_super_block, s_first_cluster_group);
 	SHOW_OFFSET(struct ocfs2_super_block, s_label);
 	SHOW_OFFSET(struct ocfs2_super_block, s_uuid);

Modified: trunk/tunefs.ocfs2/Makefile
===================================================================
--- trunk/tunefs.ocfs2/Makefile	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/tunefs.ocfs2/Makefile	2007-08-15 16:05:43 UTC (rev 1384)
@@ -30,7 +30,7 @@
 
 MANS = tunefs.ocfs2.8
 
-CFILES = tunefs.c query.c
+CFILES = tunefs.c query.c remove_slot.c
 HFILES = tunefs.h
 
 OBJS = $(subst .c,.o,$(CFILES))

Added: trunk/tunefs.ocfs2/remove_slot.c
===================================================================
--- trunk/tunefs.ocfs2/remove_slot.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/tunefs.ocfs2/remove_slot.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -0,0 +1,788 @@
+/*
+ * remove_slot.c
+ *
+ * The function for removing slots from ocfs2 volume.
+ *
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <bitops.h>
+#include <ocfs2.h>
+
+#include <assert.h>
+
+#include "tunefs.h"
+
+extern ocfs2_tune_opts opts;
+
+struct moved_group {
+	uint64_t blkno;
+	char *gd_buf;
+	struct moved_group *next;
+};
+
+struct relink_ctxt {
+	int inode_type;
+	struct ocfs2_chain_rec *cr;
+	uint16_t new_slot;
+	uint64_t dst_blkno;
+	char *src_inode;
+	char *dst_inode;
+	char *ex_buf;
+};
+
+struct remove_slot_ctxt {
+	ocfs2_filesys *fs;
+	uint16_t removed_slot;
+	errcode_t errcode;
+};
+
+static errcode_t change_sub_alloc_slot(ocfs2_filesys *fs,
+				       uint64_t blkno,
+				       struct relink_ctxt *ctxt)
+{
+	errcode_t ret;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_extent_block *eb = NULL;
+
+	if (ctxt->inode_type == EXTENT_ALLOC_SYSTEM_INODE) {
+		/* change sub alloc bit in the extent block. */
+		ret = ocfs2_read_extent_block(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+
+		eb = (struct ocfs2_extent_block *)ctxt->ex_buf;
+		eb->h_suballoc_slot = ctxt->new_slot;
+
+		ret = ocfs2_write_extent_block(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+	} else {
+		/* change sub alloc bit in the inode. */
+		ret = ocfs2_read_inode(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+
+		di = (struct ocfs2_dinode *)ctxt->ex_buf;
+		di->i_suballoc_slot = ctxt->new_slot;
+
+		ret = ocfs2_write_inode(fs, blkno, ctxt->ex_buf);
+		if (ret)
+			goto bail;
+	}
+bail:
+	return ret;
+}
+
+static errcode_t move_group(ocfs2_filesys *fs,
+			    struct relink_ctxt *ctxt,
+			    struct moved_group *group)
+{
+	errcode_t ret = 0;
+	uint16_t cr_pos;
+	struct ocfs2_group_desc *gd = NULL;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_chain_list *cl = NULL;
+	struct ocfs2_chain_rec *cr = NULL;
+
+	if (!group || !group->blkno || !group->gd_buf)
+		goto bail;
+
+	di = (struct ocfs2_dinode *)ctxt->dst_inode;
+	cl = &di->id2.i_chain;
+
+	/* calculate the insert position. */
+	if (cl->cl_next_free_rec < cl->cl_count)
+		cr_pos = cl->cl_next_free_rec;
+	else {
+		/* Now we have all the chain record filled with some groups.
+		 * so we figure out all the groups we have and then calculate
+		 * the proper place for our insert.
+		 */
+		cr_pos = di->id1.bitmap1.i_total / (cl->cl_cpg * cl->cl_bpc);
+		cr_pos %= cl->cl_count;
+	}
+
+	cr = &cl->cl_recs[cr_pos];
+
+	gd = (struct ocfs2_group_desc *)group->gd_buf;
+	gd->bg_chain = cr_pos;
+	gd->bg_parent_dinode = ctxt->dst_blkno;
+
+	/* we can safely set the bg_next_group here since all the group
+	 * below it in the moving chain is already moved to the new
+	 * position and we don't need to worry about any "lost" groups.
+	 *
+	 * Please see how we build up the group list in move_chain_rec.
+	 */
+	gd->bg_next_group = cr->c_blkno;
+
+	ret = ocfs2_write_group_desc(fs, group->blkno, group->gd_buf);
+	if (ret)
+		goto bail;
+
+	/* modify the chain record and the new files simultaneously. */
+	cr->c_blkno = gd->bg_blkno;
+	cr->c_total += gd->bg_bits;
+	cr->c_free += gd->bg_free_bits_count;
+
+	/* If the chain isn't full, increase the free_rec. */
+	if (cl->cl_next_free_rec != cl->cl_count)
+		cl->cl_next_free_rec++;
+
+	di->id1.bitmap1.i_total += gd->bg_bits;
+	di->id1.bitmap1.i_used += gd->bg_bits;
+	di->id1.bitmap1.i_used -= gd->bg_free_bits_count;
+	di->i_clusters += cl->cl_cpg;
+	di->i_size += cl->cl_cpg * fs->fs_clustersize;
+
+	ret = ocfs2_write_inode(fs, ctxt->dst_blkno, ctxt->dst_inode);
+
+bail:
+	return ret;
+}
+
+/*
+ * This function will iterate the chain_rec and do the following modifications:
+ * 1. record all the groups in the chains.
+ * 2. for every group, do:
+ *    1) modify  Sub Alloc Slot in extent block/inodes accordingly.
+ *    2) change the GROUP_PARENT according to its future owner.
+ *    3) link the group to the new slot files.
+ */
+static errcode_t move_chain_rec(ocfs2_filesys *fs, struct relink_ctxt *ctxt)
+{
+	errcode_t ret = 0;
+	int i, start, end = 1;
+	uint64_t blkno, gd_blkno = ctxt->cr->c_blkno;
+	struct ocfs2_group_desc *gd = NULL;
+	struct moved_group *group = NULL, *group_head = NULL;
+
+	if (gd_blkno == 0)
+		goto bail;
+
+	/* Record the group in the relink_ctxt.
+	 *
+	 * We record the group in a reverse order, so the first group
+	 * will be at the end of the group list. This is useful for
+	 * fsck.ocfs2 when any error happens during the move of groups
+	 * and we can safely move the group also.
+	 */
+	while (gd_blkno) {
+		ret = ocfs2_malloc0(sizeof(struct moved_group), &group);
+		if (ret)
+			goto bail;
+		memset(group, 0, sizeof(struct moved_group));
+
+		/* We insert the group first in case of any further error
+		 * will not cause memory leak.
+		 */
+		group->next = group_head;
+		group_head = group;
+
+		ret = ocfs2_malloc_block(fs->fs_io, &group->gd_buf);
+		if (ret)
+			goto bail;
+
+		ret = ocfs2_read_group_desc(fs, gd_blkno, group->gd_buf);
+		if (ret)
+			goto bail;
+
+		group->blkno = gd_blkno;
+		gd = (struct ocfs2_group_desc *)group->gd_buf;
+		gd_blkno = gd->bg_next_group;
+	}
+
+	group = group_head;
+	while (group) {
+		gd = (struct ocfs2_group_desc *)group->gd_buf;
+
+		end = 1;
+		/* Modify the "Sub Alloc Slot" in the extent block/inodes. */
+		while (end < gd->bg_bits) {
+			start = ocfs2_find_next_bit_set(gd->bg_bitmap,
+							gd->bg_bits, end);
+			if (start >= gd->bg_bits)
+				break;
+
+			end = ocfs2_find_next_bit_clear(gd->bg_bitmap,
+							gd->bg_bits, start);
+
+			for (i = start; i < end; i++) {
+				blkno = group->blkno + i;
+
+				ret = change_sub_alloc_slot(fs, blkno, ctxt);
+				if (ret)
+					goto bail;
+
+			}
+		}
+
+		/* move the group to the new slots. */
+		ret = move_group(fs, ctxt, group);
+		if (ret)
+			goto bail;
+
+		group = group->next;
+	}
+
+bail:
+	group = group_head;
+	while (group) {
+		group_head = group->next;
+		if (group->gd_buf)
+			ocfs2_free(&group->gd_buf);
+		ocfs2_free(&group);
+		group = group_head;
+	}
+	return ret;
+}
+
+static errcode_t relink_system_alloc(ocfs2_filesys *fs,
+				     uint16_t removed_slot,
+				     uint16_t new_slots,
+				     int inode_type)
+{
+	errcode_t ret;
+	int16_t i;
+	uint64_t blkno;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_chain_list *cl = NULL;
+	struct relink_ctxt ctxt;
+
+	memset(&ctxt, 0, sizeof(ctxt));
+
+	ret = ocfs2_lookup_system_inode(fs, inode_type,
+					removed_slot, &blkno);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &ctxt.src_inode);
+	if (ret) {
+		com_err(opts.progname, ret, "while allocating a block "
+			"during relinking system alloc");
+		goto bail;
+	}
+
+	ret = ocfs2_read_inode(fs, blkno, ctxt.src_inode);
+	if (ret) {
+		com_err(opts.progname, ret, "while reading inode "
+			"%"PRIu64" during relinking system alloc", blkno);
+		goto bail;
+	}
+
+	di = (struct ocfs2_dinode *)ctxt.src_inode;
+
+	if (!(di->i_flags & OCFS2_VALID_FL) ||
+	    !(di->i_flags & OCFS2_BITMAP_FL) ||
+	    !(di->i_flags & OCFS2_CHAIN_FL)) {
+		com_err(opts.progname, 0, "system  alloc %"PRIu64" corrupts."
+			"during relinking system alloc", blkno);
+		goto bail;
+	}
+
+	if (di->id1.bitmap1.i_total == 0)
+		goto bail;
+
+	/* Iterate all the groups and modify the group descriptors accordingly. */
+	ret = ocfs2_malloc_block(fs->fs_io, &ctxt.ex_buf);
+	if (ret) {
+		com_err(opts.progname, ret, "while allocating a block "
+			"during relinking system alloc");
+		goto bail;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &ctxt.dst_inode);
+	if (ret) {
+		com_err(opts.progname, ret, "while allocating a block "
+			"during relinking system alloc");
+		goto bail;
+	}
+
+	cl = &di->id2.i_chain;
+	ctxt.inode_type = inode_type;
+
+	/*iterate all the chain record and move them to the new slots. */
+	for (i = cl->cl_next_free_rec - 1; i >= 0; i--) {
+		ctxt.new_slot = i % new_slots;
+		ret = ocfs2_lookup_system_inode(fs, inode_type,
+						ctxt.new_slot,
+						&ctxt.dst_blkno);
+		if (ret)
+			goto bail;
+
+		ret = ocfs2_read_inode(fs, ctxt.dst_blkno, ctxt.dst_inode);
+		if (ret)
+			goto bail;
+
+		ctxt.cr = &cl->cl_recs[i];
+
+		ret = move_chain_rec(fs, &ctxt);
+		if (ret) {
+			com_err(opts.progname, ret,
+				"while iterating system alloc file");
+			goto bail;
+		}
+	}
+
+
+	/* emtpy the original alloc files. */
+	di->id1.bitmap1.i_used = 0;
+	di->id1.bitmap1.i_total = 0;
+	di->i_clusters = 0;
+	di->i_size = 0;
+
+	cl = &di->id2.i_chain;
+	cl->cl_next_free_rec = 0;
+	memset(cl->cl_recs, 0, sizeof(struct ocfs2_chain_rec) * cl->cl_count);
+
+	ret = ocfs2_write_inode(fs, blkno, ctxt.src_inode);
+
+bail:
+	if (ctxt.ex_buf)
+		ocfs2_free(&ctxt.ex_buf);
+	if (ctxt.dst_inode)
+		ocfs2_free(&ctxt.dst_inode);
+	if (ctxt.src_inode)
+		ocfs2_free(&ctxt.src_inode);
+
+	return ret;
+}
+
+/* Empty the content of the specified journal file.
+ * Most of the code is copied from ocfs2_format_journal.
+ */
+static errcode_t empty_journal(ocfs2_filesys *fs,
+			       ocfs2_cached_inode *ci)
+{
+	errcode_t ret = 0;
+	char *buf = NULL;
+	int bs_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+	uint64_t offset = 0;
+	uint32_t wrote, count;
+
+	ret = ocfs2_extent_map_init(fs, ci);
+	if (ret)
+		goto out;
+
+#define BUFLEN	1048576
+	ret = ocfs2_malloc_blocks(fs->fs_io, (BUFLEN >> bs_bits), &buf);
+	if (ret)
+		goto out;
+	memset(buf, 0, BUFLEN);
+
+	count = (uint32_t) ci->ci_inode->i_size;
+	while (count) {
+		ret = ocfs2_file_write(ci, buf, ocfs2_min((uint32_t) BUFLEN, count),
+				       offset, &wrote);
+		if (ret)
+			goto out;
+		offset += wrote;
+		count -= wrote;
+	}
+
+out:
+	return ret;
+}
+
+static errcode_t empty_and_truncate_journal(ocfs2_filesys *fs,
+					    uint16_t removed_slot)
+{
+	errcode_t ret;
+	uint64_t blkno;
+	ocfs2_cached_inode *ci = NULL;
+
+	ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE,
+					removed_slot, &blkno);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_read_cached_inode(fs, blkno, &ci);
+	if (ret)
+		goto bail;
+
+	/* we have to empty the journal since it may contains some
+	 * inode blocks which look like valid(except the i_blkno).
+	 * So if this block range is used for future inode alloc
+	 * files, fsck.ocfs2 may raise some error.
+	 */
+	ret = empty_journal(fs, ci);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_truncate(fs, blkno, 0);
+	if (ret)
+		goto bail;
+bail:
+	if (ci)
+		ocfs2_free_cached_inode(fs, ci);
+	return ret;
+}
+
+static errcode_t truncate_orphan_dir(ocfs2_filesys *fs,
+				     uint16_t removed_slot)
+{
+	errcode_t ret;
+	uint64_t blkno;
+
+	ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE,
+					removed_slot, &blkno);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_truncate(fs, blkno, 0);
+bail:
+	return ret;
+}
+
+static int remove_slot_iterate(struct ocfs2_dir_entry *dirent, int offset,
+			       int blocksize, char *buf, void *priv_data)
+{
+	struct remove_slot_ctxt *ctxt = (struct remove_slot_ctxt *)priv_data;
+	char tmp = dirent->name[dirent->name_len];
+	int ret_flags = 0;
+	errcode_t ret;
+	char fname[SYSTEM_FILE_NAME_MAX];
+
+	sprintf(fname, "%04d", ctxt->removed_slot);
+
+	dirent->name[dirent->name_len] = '\0';
+	if (strstr(dirent->name, fname)) {
+		ret = ocfs2_delete_inode(ctxt->fs, dirent->inode);
+		if (ret) {
+			ret_flags |= OCFS2_DIRENT_ERROR;
+			ctxt->errcode = ret;
+			goto out;
+		}
+
+		dirent->inode = 0;
+		ret_flags |= OCFS2_DIRENT_CHANGED;
+	}
+
+out:
+	dirent->name[dirent->name_len] = tmp;
+	return ret_flags;
+}
+
+static errcode_t remove_slot_entry(ocfs2_filesys *fs, uint16_t removed_slot)
+{
+	struct remove_slot_ctxt ctxt = {
+		.fs = fs,
+		.removed_slot = removed_slot,
+		.errcode = 0
+	};
+
+	ocfs2_dir_iterate(fs, fs->fs_sysdir_blkno,
+			  OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
+			  remove_slot_iterate, &ctxt);
+
+	return ctxt.errcode;
+}
+
+static errcode_t decrease_link_count(ocfs2_filesys *fs, uint16_t blkno)
+{
+	errcode_t ret;
+	char *buf = NULL;
+	struct ocfs2_dinode *di  = NULL;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_read_inode(fs, blkno, buf);
+	if (ret)
+		goto bail;
+
+	di = (struct ocfs2_dinode *)buf;
+
+	if (di->i_links_count > 0)
+		di->i_links_count--;
+	else {
+		ret = OCFS2_ET_INODE_NOT_VALID;
+		goto bail;
+	}
+
+	ret = ocfs2_write_inode(fs, blkno, buf);
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+errcode_t remove_slots(ocfs2_filesys *fs)
+{
+	errcode_t ret = 0;
+	uint16_t old_num = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+	uint16_t removed_slot = old_num - 1;
+
+	/* we will remove the slots once at a time so that fsck.ocfs2 can work
+	 * well and we can continue our work easily in case of any panic.
+	 */
+	while (removed_slot >= opts.num_slots) {
+		/* Link the specified extent alloc file to others. */
+		ret = relink_system_alloc(fs, removed_slot, opts.num_slots,
+					  EXTENT_ALLOC_SYSTEM_INODE);
+		if (ret)
+			goto bail;
+
+		/* Link the specified inode alloc file to others. */
+		ret = relink_system_alloc(fs, removed_slot, opts.num_slots,
+					  INODE_ALLOC_SYSTEM_INODE);
+		if (ret)
+			goto bail;
+
+		/* Truncate the orphan dir to release its clusters
+		 * to the global bitmap.
+		 */
+		ret = truncate_orphan_dir(fs, removed_slot);
+		if (ret)
+			goto bail;
+
+		/* empty the content of journal and truncate its clusters. */
+		ret = empty_and_truncate_journal(fs, removed_slot);
+		if (ret)
+			goto bail;
+
+		/* Now, we decrease the max_slots first and then remove the
+		 * slots for the reason that:
+		 *
+		 * 1. ocfs2_lock_down_clusters needs to lock all the journal
+		 * files. so if we delete the journal entry first and fail
+		 * to decrease the max_slots, the whole cluster can't be
+		 * locked any more due to the loss of journals.
+		 *
+		 * 2. Now all the resources except the inodes are freed
+		 * so it is safe to decrease the slots first, and if any
+		 * panic happens after we decrease the slots, we can ignore
+		 * them, and actually if we want to increase the slot in the
+		 * future, we can reuse these inodes.
+		 */
+
+		/* The slot number is updated in the super block.*/
+		OCFS2_RAW_SB(fs->fs_super)->s_max_slots--;
+		ret = ocfs2_write_super(fs);
+		if (ret)
+			goto bail;
+
+		/* The extra system dir entries should be removed. */
+		ret = remove_slot_entry(fs, removed_slot);
+		if (ret)
+			goto bail;
+
+		/* Decrease the i_links_count in system file directory
+		 * since the orphan_dir is removed.
+		 */
+		ret = decrease_link_count(fs, fs->fs_sysdir_blkno);
+		if (ret)
+			goto bail;
+
+		removed_slot--;
+	}
+
+bail:
+	return ret;
+}
+
+static int orphan_iterate(struct ocfs2_dir_entry *dirent, int offset,
+			  int blocksize, char *buf, void *priv_data)
+{
+	int *has_orphan = (int *)priv_data;
+
+	*has_orphan = 1;
+
+	/* we have found some file/dir in the orphan_dir,
+	 * so there is no need to go on the iteration.
+	 */
+	return OCFS2_DIRENT_ABORT;
+}
+
+static errcode_t orphan_dir_check(ocfs2_filesys *fs,
+				  uint16_t new_slots,
+				  int *has_orphan)
+{
+	errcode_t ret = 0;
+	uint64_t blkno;
+	int i;
+	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	for (i = new_slots ; i < max_slots; ++i) {
+		ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE,
+						i, &blkno);
+		if (ret) {
+			com_err(opts.progname, ret, "while looking up "
+				"orphan dir for slot %u during orphan dir "
+				"check", i);
+			goto bail;
+		}
+
+		ret = ocfs2_dir_iterate(fs, blkno,
+					OCFS2_DIRENT_FLAG_EXCLUDE_DOTS, NULL,
+					orphan_iterate, has_orphan);
+
+		if (*has_orphan) {
+			com_err(opts.progname, 0, "orphan dir for slot %u "
+				"has entries", i);
+			goto bail;
+		}
+	}
+
+bail:
+	return ret;
+}
+
+static errcode_t local_alloc_check(ocfs2_filesys *fs,
+				   uint16_t new_slots,
+				  int *has_local_alloc)
+{
+	errcode_t ret = 0;
+	uint16_t i;
+	uint64_t blkno;
+	char *buf = NULL;
+	struct ocfs2_dinode *di = NULL;
+
+	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret) {
+		com_err(opts.progname, ret, "while allocating a block "
+			"during local alloc check");
+		goto bail;
+	}
+
+	for (i = new_slots ; i < max_slots; ++i) {
+		ret = ocfs2_lookup_system_inode(fs, LOCAL_ALLOC_SYSTEM_INODE,
+						i, &blkno);
+		if (ret) {
+			com_err(opts.progname, ret, "while looking up "
+				"local alloc for slot %u during local alloc "
+				"check", i);
+			goto bail;
+		}
+
+		ret = ocfs2_read_inode(fs, blkno, buf);
+		if (ret) {
+			com_err(opts.progname, ret, "while reading inode "
+				"%"PRIu64" during local alloc check", blkno);
+			goto bail;
+		}
+
+		di = (struct ocfs2_dinode *)buf;
+
+		if (di->id1.bitmap1.i_total > 0) {
+			*has_local_alloc = 1;
+			com_err(opts.progname, 0, "local alloc for slot %u "
+				"isn't empty", i);
+			goto bail;
+		}
+	}
+
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+static errcode_t truncate_log_check(ocfs2_filesys *fs,
+				    uint16_t new_slots,
+				    int *has_truncate_log)
+{
+	errcode_t ret = 0;
+	uint16_t i;
+	uint64_t blkno;
+	char *buf = NULL;
+	struct ocfs2_dinode *di = NULL;
+
+	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret) {
+		com_err(opts.progname, ret, "while allocating a block "
+			"during truncate log check");
+		goto bail;
+	}
+
+	for (i = new_slots; i < max_slots; ++i) {
+		ret = ocfs2_lookup_system_inode(fs, TRUNCATE_LOG_SYSTEM_INODE,
+						i, &blkno);
+		if (ret) {
+			com_err(opts.progname, ret, "while looking up "
+				"truncate log for slot %u during truncate log "
+				"check", i);
+			goto bail;
+		}
+
+		ret = ocfs2_read_inode(fs, blkno, buf);
+		if (ret) {
+			com_err(opts.progname, ret, "while reading inode "
+				"%"PRIu64" during truncate log check", blkno);
+			goto bail;
+		}
+
+		di = (struct ocfs2_dinode *)buf;
+
+		if (di->id2.i_dealloc.tl_used > 0) {
+			*has_truncate_log = 1;
+			com_err(opts.progname, 0, "truncate log for slot %u "
+				"isn't empty", i);
+			goto bail;
+		}
+	}
+
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+errcode_t remove_slot_check(ocfs2_filesys *fs)
+{
+	errcode_t ret;
+	int has_orphan = 0, has_truncate_log = 0, has_local_alloc = 0;
+
+	/* we don't allow remove_slot to coexist with other tunefs
+	 * options to keep things simple.
+	 */
+	if (opts.backup_super ||opts.vol_label ||
+	     opts.mount || opts.jrnl_size || opts.num_blocks) {
+		com_err(opts.progname, 0, "Cannot remove slot"
+			" along with other tasks");
+		exit(1);
+	}
+
+	ret = orphan_dir_check(fs, opts.num_slots, &has_orphan);
+	if (ret || has_orphan) {
+		ret = 1;
+		goto bail;
+	}
+
+	ret = local_alloc_check(fs, opts.num_slots, &has_local_alloc);
+	if (ret || has_local_alloc) {
+		ret = 1;
+		goto bail;
+	}
+
+	ret = truncate_log_check(fs, opts.num_slots, &has_truncate_log);
+	if (ret || has_truncate_log) {
+		ret = 1;
+		goto bail;
+	}
+bail:
+	return ret;
+}

Modified: trunk/tunefs.ocfs2/tunefs.c
===================================================================
--- trunk/tunefs.ocfs2/tunefs.c	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/tunefs.ocfs2/tunefs.c	2007-08-15 16:05:43 UTC (rev 1384)
@@ -249,10 +249,10 @@
 					"than %d",
 					OCFS2_MAX_SLOTS);
 				exit(1);
-			} else if (opts.num_slots < 2) {
+			} else if (opts.num_slots < 1) {
 				com_err(opts.progname, 0,
 					"Number of node slots must be at "
-					"least 2");
+					"least 1");
 				exit(1);
 			}
 			break;
@@ -863,7 +863,10 @@
 	errcode_t ret = 0;
 
 	block_signals(SIG_BLOCK);
-	ret = add_slots(fs);
+	if (opts.num_slots > OCFS2_RAW_SB(fs->fs_super)->s_max_slots)
+		ret = add_slots(fs);
+	else
+		ret = remove_slots(fs);
 	block_signals(SIG_UNBLOCK);
 	if (ret)
 		return ret;
@@ -1253,7 +1256,7 @@
 	int upd_incompat = 0;
 	int upd_backup_super = 0;
 	char *tmpstr;
-	uint16_t tmp;
+	uint16_t max_slots;
 	uint64_t def_jrnl_size = 0;
 	uint64_t num_clusters;
 	int dirty = 0;
@@ -1301,6 +1304,13 @@
 		goto close;
 	}
 
+	if (OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &
+	    OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG) {
+		fprintf(stderr, "Aborted tunefs operation detected. "
+			"Run fsck.ocfs2 -f <device>.\n");
+		goto close;
+	}
+
 	if (resize)
 		get_vol_size(fs);
 
@@ -1377,20 +1387,29 @@
 	}
 
 	/* validate num slots */
+	max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
 	if (opts.num_slots) {
-		tmp = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
-		if (opts.num_slots > tmp) {
-			printf("Changing number of node slots from %d to %d\n",
-			       tmp, opts.num_slots);
+		if (opts.num_slots < max_slots) {
+			ret = remove_slot_check(fs);
+			if (ret) {
+				com_err(opts.progname, 0,
+					"remove slot check failed. ");
+				goto unlock;
+			}
+		}
+		else if (opts.num_slots > max_slots) {
+			if (!opts.jrnl_size)
+				opts.jrnl_size = def_jrnl_size;
+
 		} else {
-			com_err(opts.progname, 0, "Node slots (%d) has to be "
-				"more than the configured node slots (%d)",
-			       opts.num_slots, tmp);
-			goto unlock;
+			printf("Giving the same number of nodes. "
+				"Ignore the change of slots.");
+			opts.num_slots = 0;
 		}
 
-		if (!opts.jrnl_size)
-			opts.jrnl_size = def_jrnl_size;
+		if (opts.num_slots)
+			printf("Changing number of node slots from %d to %d\n",
+			       max_slots, opts.num_slots);
 	}
 
 	/* validate journal size */
@@ -1433,9 +1452,17 @@
 	}
 
 	/* Set resize incompat flag on superblock */
-	if (opts.num_blocks) {
-		OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat |=
-			OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG;
+	if (opts.num_blocks || opts.num_slots < max_slots) {
+		if (opts.num_blocks)
+			OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat |=
+				OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG;
+		else {
+			OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat |=
+				OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG;
+			OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag |=
+				OCFS2_TUNEFS_INPROG_REMOVE_SLOT;
+		}
+
 		ret = ocfs2_write_super(fs);
 		if (ret) {
 			com_err(opts.progname, ret,
@@ -1467,8 +1494,15 @@
 				"while updating node slots");
 			goto unlock;
 		}
+		/* Clear remove slot incompat flag on superblock */
+		if (opts.num_slots < max_slots) {
+			OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &=
+				~OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG;
+			OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag &=
+				~OCFS2_TUNEFS_INPROG_REMOVE_SLOT;
+		}
 		if (upd_slots)
-			printf("Added node slots\n");
+			printf("Changed node slots\n");
 	}
 
 	/* change mount type */

Modified: trunk/tunefs.ocfs2/tunefs.h
===================================================================
--- trunk/tunefs.ocfs2/tunefs.h	2007-08-15 01:10:31 UTC (rev 1383)
+++ trunk/tunefs.ocfs2/tunefs.h	2007-08-15 16:05:43 UTC (rev 1384)
@@ -92,3 +92,5 @@
 
 void print_query(char *queryfmt);
 
+errcode_t remove_slots(ocfs2_filesys *fs);
+errcode_t remove_slot_check(ocfs2_filesys *fs);




More information about the Ocfs2-tools-commits mailing list