[Ocfs2-tools-devel] [PATCH 11/12] tunefs.ocfs2: Enable and disable the metaecc feature.

Joel Becker Joel.Becker at oracle.com
Tue Dec 30 03:40:49 PST 2008


On Mon, Dec 29, 2008 at 07:23:47PM -0800, Joel Becker wrote:
> The metaecc feature requires a bit of work.
> 
> To disable it is easy; just clear the feature bit.  All structures left
> behind are fully compatible with filesystems that don't know about
> metaecc.
> 
> Enabling it is hard work.  We must run though every block in the
> filesystem and compute its checksum and error correction values.  But
> even that's not all.  Directory blocks need trailers to store the ECC
> values, and the directories might not have them yet.  More on the
> trailers below.

	Of course, the minute you send out a nice patchset, you find a
big bug.  The bug I found is in this patch.

> The preparation step runs through every inode in the filesystem.  If the
> inode does not need directory trailers - that is, it is not a directory,
> is an inline directory, or already has trailers - we add all of its
> metadata blocks to an rbtree.  This caches the blocks for later
> re-write.  We don't want to have to re-read them in the writeout step.
> If the inode is a directory requiring trailers, we call
> tunefs_prepare_dir_trailer() to determine how much space it needs.  At
> the end of this step, we have a list of all directories requiring
> trailers, and we know whether we have enough space to do the work.

	If you look at this step, you'll see we cache the blocks of all
inodes except directories that need trailers.  But when we install the
trailers, we might need to grow the target directories.  This will
modify the allocators.  When we write out the cached allocator blocks,
they'll be wrong.
	The included patch stores off the block numbers of each chain
allocator during the first pass, and only scans them after we're done
with allocation.  This ensures we have the up-to-date version of the
allocators.
	Two smaller bugs are also fixed.  First, we should explicitly
set ctxt.ad_blocks to RB_ROOT.  It actually works with the original
zeroing of ocfs2_malloc0(), but we should do it right.  Second, we were
passing the original inode buffer into our metadata iteration functions
instead of the copy we have in our block tree.  That's fixed too.  
	The 'metaecc' branch of my tree has been rebased so that these
changes are incorporated into the tunefs patch.

Joel

diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c
index da9620e..2e16998 100644
--- a/tunefs.ocfs2/feature_metaecc.c
+++ b/tunefs.ocfs2/feature_metaecc.c
@@ -628,10 +628,21 @@ struct block_to_ecc {
 	errcode_t (*e_write)(ocfs2_filesys *fs, struct block_to_ecc *block);
 };
 
+/*
+ * We have to do chain allocators at the end, because we may use them
+ * as we add dirblock trailers.  Really, we only need the inode block
+ * number.
+ */
+struct chain_to_ecc {
+	struct list_head ce_list;
+	uint64_t ce_blkno;
+};
+
 struct add_ecc_context {
 	errcode_t ae_ret;
 	uint32_t ae_clusters;
 	struct list_head ae_dirs;
+	struct list_head ae_chains;
 	struct rb_root ae_blocks;
 };
 
@@ -855,11 +866,33 @@ static void empty_ecc_blocks(struct add_ecc_context *ctxt)
 	}
 }
 
+static errcode_t add_ecc_chain(struct add_ecc_context *ctxt,
+			       uint64_t blkno)
+{
+	errcode_t ret;
+	struct chain_to_ecc *cte;
+
+	ret = ocfs2_malloc0(sizeof(struct chain_to_ecc), &cte);
+	if (!ret) {
+		cte->ce_blkno = blkno;
+		list_add_tail(&cte->ce_list, &ctxt->ae_chains);
+	}
+
+	return ret;
+}
+
 static void empty_add_ecc_context(struct add_ecc_context *ctxt)
 {
 	struct tunefs_trailer_context *tc;
+	struct chain_to_ecc *cte;
 	struct list_head *n, *pos;
 
+	list_for_each_safe(pos, n, &ctxt->ae_chains) {
+		cte = list_entry(pos, struct chain_to_ecc, ce_list);
+		list_del(&cte->ce_list);
+		ocfs2_free(&cte);
+	}
+
 	list_for_each_safe(pos, n, &ctxt->ae_dirs) {
 		tc = list_entry(pos, struct tunefs_trailer_context, d_list);
 		tunefs_trailer_context_free(tc);
@@ -868,7 +901,6 @@ static void empty_add_ecc_context(struct add_ecc_context *ctxt)
 	empty_ecc_blocks(ctxt);
 }
 
-
 struct add_ecc_iterate {
 	struct add_ecc_context *ic_ctxt;
 	struct ocfs2_dinode *ic_di;
@@ -996,6 +1028,63 @@ out:
 	return iret;
 }
 
+/*
+ * This walks all the chain allocators we've stored off and adds their
+ * blocks to the list.
+ */
+static errcode_t find_chain_blocks(ocfs2_filesys *fs,
+				   struct add_ecc_context *ctxt)
+{
+	errcode_t ret;
+	struct list_head *pos;
+	struct chain_to_ecc *cte;
+	struct ocfs2_dinode *di;
+	struct block_to_ecc *block;
+	char *buf;
+	struct add_ecc_iterate iter = {
+		.ic_ctxt = ctxt,
+	};
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	list_for_each(pos, &ctxt->ae_chains) {
+		cte = list_entry(pos, struct chain_to_ecc, ce_list);
+		ret = ocfs2_read_inode(fs, cte->ce_blkno, buf);
+		if (ret)
+			break;
+
+		di = (struct ocfs2_dinode *)buf;
+		ret = block_insert_dinode(fs, ctxt, di);
+		if (ret)
+			break;
+
+		/* We need the inode, look it back up */
+		block = block_lookup(ctxt, di->i_blkno);
+		if (!block) {
+			ret = TUNEFS_ET_INTERNAL_FAILURE;
+			break;
+		}
+
+		/* Now using our copy of the inode */
+		di = (struct ocfs2_dinode *)block->e_buf;
+		assert(di->i_blkno == cte->ce_blkno);
+
+		iter.ic_di = di;
+		ret = ocfs2_chain_iterate(fs, di->i_blkno, chain_iterate,
+					  &iter);
+		if (ret)
+			break;
+	}
+
+	ocfs2_free(&buf);
+
+out:
+	return ret;
+}
+
+
 static errcode_t inode_iterate(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 			       void *user_data)
 {
@@ -1005,9 +1094,17 @@ static errcode_t inode_iterate(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 	struct add_ecc_context *ctxt = user_data;
 	struct add_ecc_iterate iter = {
 		.ic_ctxt = ctxt,
-		.ic_di = di,
 	};
 
+	/*
+	 * We have to handle chain allocators later, after the dir
+	 * trailer code has done any allocation it needs.
+	 */
+	if (di->i_flags & OCFS2_CHAIN_FL) {
+		ret = add_ecc_chain(ctxt, di->i_blkno);
+		goto out;
+	}
+
 	ret = block_insert_dinode(fs, ctxt, di);
 	if (ret)
 		goto out;
@@ -1028,13 +1125,7 @@ static errcode_t inode_iterate(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 
 	/* Now using our copy of the inode */
 	di = (struct ocfs2_dinode *)block->e_buf;
-
-	if (di->i_flags & OCFS2_CHAIN_FL) {
-		ret = ocfs2_chain_iterate(fs, di->i_blkno,
-					  chain_iterate,
-					  &iter);
-		goto out;
-	}
+	iter.ic_di = di;
 
 	/*
 	 * Ok, it's a regular file or directory.
@@ -1178,6 +1269,8 @@ static int enable_metaecc(ocfs2_filesys *fs, int flags)
 
 	memset(&ctxt, 0, sizeof(ctxt));
 	INIT_LIST_HEAD(&ctxt.ae_dirs);
+	INIT_LIST_HEAD(&ctxt.ae_chains);
+	ctxt.ae_blocks = RB_ROOT;
 	ret = find_blocks(fs, &ctxt);
 	if (ret) {
 		if (ret == OCFS2_ET_NO_SPACE)
@@ -1208,9 +1301,13 @@ static int enable_metaecc(ocfs2_filesys *fs, int flags)
 	if (ret)
 		goto out_cleanup;
 
+	/* We're done with allocation, scan the chain allocators */
+	ret = find_chain_blocks(fs, &ctxt);
+	if (ret)
+		goto out_cleanup;
+
 	/* Set the feature bit in-memory and rewrite all our blocks */
 	OCFS2_SET_INCOMPAT_FEATURE(super, OCFS2_FEATURE_INCOMPAT_META_ECC);
-
 	ret = write_ecc_blocks(fs, &ctxt);
 	if (ret)
 		goto out_cleanup;
-- 

"You don't make the poor richer by making the rich poorer."
	- Sir Winston Churchill

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127



More information about the Ocfs2-tools-devel mailing list