[Ocfs2-tools-devel] [PATCH 42/44] fsck.ocfs2: User refcount to handle dup_clusters.

Tao Ma tao.ma at oracle.com
Mon Dec 28 01:01:27 PST 2009


In the old times, when we meet with a duplicate
cluster, we clone the 2 files. It is OK but it
is also time-consuming and space-consuming.

Now With reflink, we will try to create refcount
tree first for these files and add refcount rec
so that we avoid duplicating inodes.

Signed-off-by: Tao Ma <tao.ma at oracle.com>
---
 fsck.ocfs2/fsck.ocfs2.checks.8.in |    7 ++
 fsck.ocfs2/pass1b.c               |  164 ++++++++++++++++++++++++++++++++++---
 2 files changed, 158 insertions(+), 13 deletions(-)

diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in b/fsck.ocfs2/fsck.ocfs2.checks.8.in
index a1a1689..6c47d2a 100644
--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
+++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
@@ -677,6 +677,13 @@ does not allow this.
 Answering yes will remove this inode, thus breaking its claim on the
 overcommitted clusters.
 
+.SS "DUP_CLUSTERS_ADD_REFCOUNT"
+An inode claims clusters that are also claimed by another inode.  ocfs2
+does not allow this.
+
+Answering yes will try to add a refcount record for all these inodes, so
+that they will share the cluster.
+
 \" pass2.c
 
 .SS "DIRENT_DOTTY_DUP"
diff --git a/fsck.ocfs2/pass1b.c b/fsck.ocfs2/pass1b.c
index 357ce25..dc56bff 100644
--- a/fsck.ocfs2/pass1b.c
+++ b/fsck.ocfs2/pass1b.c
@@ -108,6 +108,9 @@ struct dup_inode {
 
 	/* What we've done to it. */
 	unsigned int	di_state;
+
+	/* the refcount tree it has. */
+	uint64_t	di_refcount_loc;
 };
 
 /*
@@ -121,6 +124,11 @@ struct dup_cluster_owner {
 	 * for the dup inode rbtree.
 	 */
 	uint64_t		dco_ino;
+	/*
+	 * virtual offset in the extent tree.
+	 * Only valid for an extent tree, 0 for a chain file.
+	 */
+	uint32_t		dco_cpos;
 };
 
 struct dup_cluster {
@@ -242,7 +250,7 @@ static void dup_inode_insert(struct dup_context *dct,
  * into the context.
  */
 static errcode_t dup_insert(struct dup_context *dct, uint32_t cluster,
-			    uint64_t ino, uint32_t i_flags)
+			    struct ocfs2_dinode *dinode, uint32_t v_cpos)
 {
 	errcode_t ret;
 	struct list_head *p;
@@ -267,8 +275,9 @@ static errcode_t dup_insert(struct dup_context *dct, uint32_t cluster,
 			"structures");
 		goto out;
 	}
-	new_di->di_ino = ino;
-	new_di->di_flags = i_flags;
+	new_di->di_ino = dinode->i_blkno;
+	new_di->di_flags = dinode->i_flags;
+	new_di->di_refcount_loc = dinode->i_refcount_loc;
 
 	ret = ocfs2_malloc0(sizeof(struct dup_cluster_owner), &new_dco);
 	if (ret) {
@@ -277,7 +286,8 @@ static errcode_t dup_insert(struct dup_context *dct, uint32_t cluster,
 			"structures");
 		goto out;
 	}
-	new_dco->dco_ino = ino;
+	new_dco->dco_ino = dinode->i_blkno;
+	new_dco->dco_cpos = v_cpos;
 
 	dc = dup_cluster_lookup(dct, cluster);
 	if (!dc) {
@@ -286,7 +296,7 @@ static errcode_t dup_insert(struct dup_context *dct, uint32_t cluster,
 		new_dc = NULL;
 	}
 
-	di = dup_inode_lookup(dct, ino);
+	di = dup_inode_lookup(dct, dinode->i_blkno);
 	if (!di) {
 		dup_inode_insert(dct, new_di);
 		di = new_di;
@@ -296,7 +306,7 @@ static errcode_t dup_insert(struct dup_context *dct, uint32_t cluster,
 	dco = NULL;
 	list_for_each(p, &dc->dc_owners) {
 		dco = list_entry(p, struct dup_cluster_owner, dco_list);
-		if (dco->dco_ino == ino)
+		if (dco->dco_ino == dinode->i_blkno)
 			break;
 		dco = NULL;
 	}
@@ -360,7 +370,8 @@ struct process_extents_context {
 };
 
 static errcode_t process_dup_clusters(struct process_extents_context *pc,
-				      uint32_t p_cpos, uint32_t clusters)
+				      uint32_t p_cpos, uint32_t clusters,
+				      uint32_t v_cpos)
 {
 	int was_set;
 	errcode_t ret = 0;
@@ -380,8 +391,7 @@ static errcode_t process_dup_clusters(struct process_extents_context *pc,
 			verbosef("Marking multiply-claimed cluster %"PRIu32
 				 " as claimed by inode %"PRIu64"\n",
 				 p_cpos, pc->di->i_blkno);
-			ret = dup_insert(pc->dct, p_cpos, pc->di->i_blkno,
-					 pc->di->i_flags);
+			ret = dup_insert(pc->dct, p_cpos, pc->di, v_cpos);
 			if (ret) {
 				com_err(whoami, ret,
 					"while marking duplicate cluster "
@@ -393,6 +403,7 @@ static errcode_t process_dup_clusters(struct process_extents_context *pc,
 		}
 
 		p_cpos++;
+		v_cpos++;
 		clusters--;
 	}
 
@@ -410,7 +421,7 @@ static int process_inode_chains(ocfs2_filesys *fs, uint64_t gd_blkno,
 
 	pc->ret = process_dup_clusters(pc,
 				       ocfs2_blocks_to_clusters(fs, gd_blkno),
-				       clusters);
+				       clusters, 0);
 
 	return pc->ret ? OCFS2_CHAIN_ERROR | OCFS2_CHAIN_ABORT : 0;
 }
@@ -429,7 +440,8 @@ static int process_inode_extents(ocfs2_filesys *fs,
 	pc->ret = process_dup_clusters(pc,
 				       ocfs2_blocks_to_clusters(fs,
 								rec->e_blkno),
-				       rec->e_leaf_clusters);
+				       rec->e_leaf_clusters,
+				       rec->e_cpos);
 
 	return pc->ret ? OCFS2_EXTENT_ERROR | OCFS2_EXTENT_ABORT : 0;
 }
@@ -534,7 +546,8 @@ static int process_xattr_buckets(ocfs2_filesys *fs,
 	pc->ret = process_dup_clusters(pc,
 				       ocfs2_blocks_to_clusters(fs,
 								rec->e_blkno),
-				       rec->e_leaf_clusters);
+				       rec->e_leaf_clusters,
+				       rec->e_cpos);
 	if (pc->ret)
 		goto out;
 
@@ -1009,6 +1022,37 @@ static int print_func(struct dup_cluster *dc, struct dup_inode *di,
 	return 0;
 }
 
+/*
+ * Check whether we can create refcount for the file.
+ * So a file is valid only if:
+ * 1. It has no refcount tree.
+ * 2. It has the same tree as others.
+ * Store refcount_loc if we find one.
+ *
+ * if there is other file that does't have the same tree, set refcount_loc
+ * to UINT64_MAX and stop the search.
+ */
+static int find_refcount_func(struct dup_cluster *dc, struct dup_inode *di,
+			      struct dup_cluster_owner *dco, void *priv_data)
+{
+	uint64_t *refcount_loc = priv_data;
+
+	if (!di->di_refcount_loc)
+		return 0;
+
+	if (!*refcount_loc) {
+		*refcount_loc = di->di_refcount_loc;
+		return 0;
+	}
+
+	if (di->di_refcount_loc != *refcount_loc) {
+		*refcount_loc = UINT64_MAX;
+		return 1;
+	}
+
+	return 0;
+}
+
 /* Context for fix_dups_func() */
 struct fix_dup_context {
 	o2fsck_state *fd_ost;
@@ -1358,12 +1402,92 @@ static int fix_dups_func(struct dup_cluster *dc, struct dup_inode *di,
 	return ret;
 }
 
+struct create_refcount {
+	o2fsck_state *cr_ost;
+	uint64_t cr_refcount_loc;
+	errcode_t cr_err;
+};
+
+static int create_refcount_func(struct dup_cluster *dc, struct dup_inode *di,
+				struct dup_cluster_owner *dco, void *priv_data)
+{
+	errcode_t ret;
+	struct create_refcount *cr = priv_data;
+	ocfs2_filesys *fs = cr->cr_ost->ost_fs;
+
+	if (!di->di_refcount_loc) {
+		ret = ocfs2_attach_refcount_tree(fs, di->di_ino,
+						 cr->cr_refcount_loc);
+		if (ret) {
+			com_err(whoami, ret,
+				"while attaching file %"PRIu64" to"
+				"refcount tree %"PRIu64, di->di_ino,
+				cr->cr_refcount_loc);
+			goto out;
+		}
+		di->di_refcount_loc = cr->cr_refcount_loc;
+	}
+
+	ret = ocfs2_change_refcount_flag(fs, di->di_ino,
+					 dco->dco_cpos, 1, dc->dc_cluster,
+					 OCFS2_EXT_REFCOUNTED, 0);
+	if (ret) {
+		com_err(whoami, ret,
+			"while mark extent refcounted at %u in file %"PRIu64,
+			dco->dco_cpos, di->di_ino);
+		goto out;
+	}
+
+	ret = ocfs2_increase_refcount(fs, di->di_ino, dc->dc_cluster, 1);
+	if (ret)
+		com_err(whoami, ret,
+			"while increasing refcount at %u for file %"PRIu64,
+			dc->dc_cluster, di->di_ino);
+
+out:
+	cr->cr_err = ret;
+	return ret;
+}
+/*
+ * Create refcount record for all the files sharing the same clusters.
+ * Create a new refcount tree if all the files don't have it(refcount_loc = 0).
+ * If a file don't have refcount tree, attach it to that tree.
+ */
+static errcode_t o2fsck_create_refcount(o2fsck_state *ost,
+					struct dup_context *dct,
+					struct dup_cluster *dc,
+					uint64_t refcount_loc)
+{
+	errcode_t ret = 0;
+	struct create_refcount cr = {
+		.cr_ost = ost,
+		.cr_refcount_loc = refcount_loc,
+	};
+
+	if (!refcount_loc) {
+		ret = ocfs2_create_refcount_tree(ost->ost_fs,
+						 &refcount_loc);
+		if (ret) {
+			com_err(whoami, ret,
+				"while allocating a new refcount block");
+			goto out;
+		}
+		cr.cr_refcount_loc = refcount_loc;
+	}
+
+	for_each_owner(dct, dc, create_refcount_func, &cr);
+
+	ret = cr.cr_err;
+out:
+	return ret;
+}
+
 static errcode_t o2fsck_pass1d(o2fsck_state *ost, struct dup_context *dct)
 {
 	errcode_t ret = 0;
 	struct dup_cluster *dc;
 	struct rb_node *node = rb_first(&dct->dup_clusters);
-	uint64_t dups;
+	uint64_t dups, refcount_loc;
 	struct fix_dup_context fd = {
 		.fd_ost = ost,
 		.fd_dct = dct,
@@ -1383,6 +1507,20 @@ static errcode_t o2fsck_pass1d(o2fsck_state *ost, struct dup_context *dct)
 		       "inodes:\n",
 		       dc->dc_cluster);
 		for_each_owner(dct, dc, print_func, NULL);
+
+		/* We try create refcount tree first. */
+		refcount_loc = 0;
+		for_each_owner(dct, dc, find_refcount_func, &refcount_loc);
+		if (refcount_loc != UINT64_MAX &&
+		    prompt(ost, PY, PR_DUP_CLUSTERS_ADD_REFCOUNT,
+			   "Create refcount record for it?")) {
+			ret = o2fsck_create_refcount(ost, dct, dc,
+						     refcount_loc);
+			if (ret)
+				break;
+			continue;
+		}
+
 		for_each_owner(dct, dc, fix_dups_func, &fd);
 		if (fd.fd_err) {
 			ret = fd.fd_err;
-- 
1.5.5




More information about the Ocfs2-tools-devel mailing list