[Ocfs2-tools-devel] [PATCH 39/50] fsck.ocfs2: Check refcount of clusters.

Tao Ma tao.ma at oracle.com
Mon Jan 11 07:31:25 PST 2010


As we have added refcounted clusters into rb-tree,
now we can rebuild the refcount tree according to
these result.

Signed-off-by: Tao Ma <tao.ma at oracle.com>
---
 fsck.ocfs2/fsck.ocfs2.checks.8.in |   12 +
 fsck.ocfs2/include/refcount.h     |    1 +
 fsck.ocfs2/pass1.c                |    3 +
 fsck.ocfs2/refcount.c             |  550 +++++++++++++++++++++++++++++++++++++
 include/ocfs2/ocfs2.h             |    6 +
 libocfs2/refcount.c               |   12 +-
 6 files changed, 578 insertions(+), 6 deletions(-)

diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in b/fsck.ocfs2/fsck.ocfs2.checks.8.in
index b1e7013..47e1873 100644
--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
+++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
@@ -657,6 +657,18 @@ refering an invalid refcount block.
 
 Answering yes remove this refcount block and clear refcount flag from this file.
 
+.SS "REFCOUNT_REC_REDUNDANT"
+Refcount record is used to store the refcount for physical clusters. Some
+refcount record is found to have no physical clusters corresponding to it.
+
+Answering yes remove the refcount record.
+
+.SS "REFCOUNT_COUNT_INVALID"
+Refcount record is used to store the refcount for physical clusters. A record
+record is found whichs claims the wrong refcount for some physical clusters.
+
+Answering yes update the corresponding refcount record.
+
 \" pass1b.c
 
 .SS "DUP_CLUSTERS_SYSFILE_CLONE"
diff --git a/fsck.ocfs2/include/refcount.h b/fsck.ocfs2/include/refcount.h
index 4e4f901..b44dd95 100644
--- a/fsck.ocfs2/include/refcount.h
+++ b/fsck.ocfs2/include/refcount.h
@@ -28,5 +28,6 @@ errcode_t o2fsck_mark_clusters_refcounted(o2fsck_state *ost,
 					  uint64_t p_cpos,
 					  uint32_t clusters,
 					  uint32_t v_cpos);
+errcode_t o2fsck_check_refcounted_clusters(o2fsck_state *ost);
 #endif /* __O2FSCK_REFCOUNT_H__ */
 
diff --git a/fsck.ocfs2/pass1.c b/fsck.ocfs2/pass1.c
index e605aca..5e16a01 100644
--- a/fsck.ocfs2/pass1.c
+++ b/fsck.ocfs2/pass1.c
@@ -1436,6 +1436,9 @@ errcode_t o2fsck_pass1(o2fsck_state *ost)
 
 	mark_local_allocs(ost);
 	mark_truncate_logs(ost);
+	ret = o2fsck_check_refcounted_clusters(ost);
+	if (ret)
+		com_err(whoami, ret, "while checking refcounted clusters");
 	write_cluster_alloc(ost);
 	write_inode_alloc(ost);
 
diff --git a/fsck.ocfs2/refcount.c b/fsck.ocfs2/refcount.c
index 62dfdce..db7606d 100644
--- a/fsck.ocfs2/refcount.c
+++ b/fsck.ocfs2/refcount.c
@@ -55,6 +55,18 @@ struct refcount_tree {
 	uint64_t rf_blkno;
 	struct list_head files_list;
 	int is_valid;
+	char *root_buf;
+	/*
+	 * leaf_buf, rec, index points to a refcount record we are checking.
+	 * r_offset is the offset in refcount rec we have checked already.
+	 */
+	char *leaf_buf;
+	struct ocfs2_refcount_rec rec;
+	int index;
+	int r_offset;
+
+	/* the position we have checked against this tree. */
+	uint64_t p_cend;
 };
 
 static errcode_t check_rb(o2fsck_state *ost, uint64_t blkno,
@@ -474,3 +486,541 @@ add_clusters:
 	refcount_extent_insert(file, extent);
 	return 0;
 }
+
+/*
+ * Given a refcount tree, find the lowest p_cpos of all
+ * the files sharing the same tree.
+ * We also remove them from the refcount tree.
+ */
+static int get_refcounted_extent(struct refcount_tree *tree,
+				 uint64_t *p_cpos,
+				 uint32_t *p_clusters,
+				 uint32_t *p_refcount)
+{
+	struct refcount_extent *extent;
+	struct refcount_file *file;
+	struct list_head *p, *next;
+	struct rb_node *node;
+	uint64_t cpos = UINT64_MAX;
+	uint32_t clusters, refcount;
+	int found = 0;
+
+	list_for_each_safe(p, next, &tree->files_list) {
+		file = list_entry(p, struct refcount_file, list);
+		node = rb_first(&file->ref_extents);
+
+		/*
+		 * If the file has no extent, go to next file.
+		 * XXX: We can improve it here by removing the empty file.
+		 */
+		if (!node)
+			continue;
+
+		found = 1;
+
+		extent = rb_entry(node, struct refcount_extent, ext_node);
+		if (extent->p_cpos < cpos) {
+			/* We meet with a new start. */
+			clusters = cpos - extent->p_cpos < extent->clusters ?
+				   cpos - extent->p_cpos : extent->clusters;
+			cpos = extent->p_cpos;
+			refcount = 1;
+		} else if (extent->p_cpos == cpos) {
+			clusters = clusters < extent->clusters ?
+				   clusters : extent->clusters;
+			refcount++;
+		} else if (extent->p_cpos < cpos + clusters) {
+			/*
+			 * extent->p_cpos > cpos, change clusters accordingly.
+			 */
+			clusters = extent->p_cpos - cpos;
+		}
+	}
+
+	if (!found)
+		return 0;
+
+	*p_cpos = cpos;
+	*p_clusters = clusters;
+	*p_refcount = refcount;
+
+	return 1;
+}
+
+static void remove_refcounted_extent(struct refcount_tree *tree,
+				     uint64_t cpos,
+				     uint32_t clusters)
+{
+	struct refcount_extent *extent;
+	struct refcount_file *file;
+	struct list_head *p, *next;
+	struct rb_node *node;
+
+	/* Remove the tuple from the refcounted file. */
+	list_for_each_safe(p, next, &tree->files_list) {
+		file = list_entry(p, struct refcount_file, list);
+		node = rb_first(&file->ref_extents);
+
+		/* If the file has no extent, go to next file. */
+		if (!node)
+			continue;
+
+		extent = rb_entry(node, struct refcount_extent, ext_node);
+		assert(extent->p_cpos >= cpos);
+
+		if (cpos + clusters <= extent->p_cpos)
+			continue;
+
+		assert(extent->p_cpos + extent->clusters >= cpos + clusters);
+
+		if (cpos + clusters == extent->p_cpos + extent->clusters) {
+			rb_erase(&extent->ext_node, &file->ref_extents);
+			ocfs2_free(&extent);
+		} else {
+			extent->clusters =
+				(extent->p_cpos + extent->clusters) -
+				(cpos + clusters);
+			extent->p_cpos = cpos + clusters;
+		}
+	}
+}
+
+/*
+ * Check the all the files in the tree and if there is a tree contains
+ * the (p_cpos, len) with refcounted flag, we clear it.
+ */
+static errcode_t o2fsck_clear_refcount(o2fsck_state *ost,
+				       struct refcount_tree *tree,
+				       uint64_t p_cpos,
+				       uint32_t len)
+{
+	errcode_t ret = 0;
+	struct refcount_extent *extent;
+	struct refcount_file *file;
+	struct list_head *p, *next;
+	struct rb_node *node;
+	uint32_t v_start;
+
+	list_for_each_safe(p, next, &tree->files_list) {
+		file = list_entry(p, struct refcount_file, list);
+		node = rb_first(&file->ref_extents);
+
+		/* If the file has no extent, go to next file. */
+		if (!node)
+			continue;
+
+		extent = rb_entry(node, struct refcount_extent, ext_node);
+		assert(extent->p_cpos >= p_cpos);
+
+		if (p_cpos + len <= extent->p_cpos)
+			continue;
+
+		assert(extent->p_cpos + extent->clusters >= p_cpos + len);
+
+		v_start = p_cpos - extent->p_cpos + extent->v_cpos;
+
+		ret = ocfs2_change_refcount_flag(ost->ost_fs, file->i_blkno,
+						 v_start, len, p_cpos,
+						 0, OCFS2_EXT_REFCOUNTED);
+		if (ret) {
+			com_err(whoami, ret, "while clearing refcount flag at "
+				"%u in file %"PRIu64, v_start, file->i_blkno);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * o2fsck_refcount_punch_hole and o2fsck_change_refcount are just wrappers
+ * for the corresponding libocfs2 functions with one addition: re-read
+ * root refcount block since we may have changed the tree during the operation.
+ */
+static errcode_t o2fsck_refcount_punch_hole(o2fsck_state *ost,
+					    struct refcount_tree *tree,
+					    uint64_t p_cpos, uint32_t len)
+{
+	errcode_t ret;
+
+	ret = ocfs2_refcount_punch_hole(ost->ost_fs, tree->rf_blkno,
+					p_cpos, len);
+	if (ret) {
+		com_err(whoami, ret, "while punching hole in "
+			"(%"PRIu64", %u) in refcount tree %"PRIu64,
+			p_cpos, len, tree->rf_blkno);
+		goto out;
+	}
+
+	/* re-read the root blkno since we may have changed it somehow. */
+	ret = ocfs2_read_refcount_block(ost->ost_fs,
+					tree->rf_blkno, tree->root_buf);
+
+out:
+	return ret;
+}
+
+static errcode_t o2fsck_change_refcount(o2fsck_state *ost,
+					struct refcount_tree *tree,
+					uint64_t p_cpos, uint32_t len,
+					uint32_t refcount)
+{
+	errcode_t ret;
+
+	ret = ocfs2_change_refcount(ost->ost_fs, tree->rf_blkno,
+				    p_cpos, len, refcount);
+	if (ret) {
+		com_err(whoami, ret, "while changing refcount in "
+			"(%"PRIu64", %u) in refcount tree %"PRIu64" to %u",
+			p_cpos, len, tree->rf_blkno, refcount);
+		goto out;
+	}
+
+	/* re-read the root blkno since we may have changed it somehow. */
+	ret = ocfs2_read_refcount_block(ost->ost_fs,
+					tree->rf_blkno, tree->root_buf);
+
+out:
+	return ret;
+
+}
+
+
+static errcode_t o2fsck_check_clusters_in_refcount(o2fsck_state *ost,
+						   struct refcount_tree *tree,
+						   uint64_t p_cpos,
+						   uint32_t clusters,
+						   uint32_t refcount)
+{
+	errcode_t ret = 0;
+	uint32_t rec_len;
+	struct ocfs2_refcount_rec tmp_rec, *rec = &tree->rec;
+	uint64_t cpos, range;
+	unsigned int len;
+	int index;
+
+	if (!clusters)
+		return 0;
+
+	/*
+	 * the previous check ended at tree->p_cend, and now we get
+	 * p_cpos, so any refcount record between p_cend and p_cpos
+	 * should be considered as redundant.
+	 */
+	cpos = tree->p_cend;
+	range = p_cpos - cpos;
+	while (range) {
+		len = range > UINT_MAX ? UINT_MAX : range;
+
+		ret = ocfs2_get_refcount_rec(ost->ost_fs, tree->root_buf,
+				     cpos, len, &tmp_rec,
+				     &index, tree->leaf_buf);
+		if (ret) {
+			com_err(whoami, ret, "while getting refcount rec at "
+				"%"PRIu64" in tree %"PRIu64,
+				cpos, tree->rf_blkno);
+			goto out;
+		}
+
+		if ((tmp_rec.r_refcount || tmp_rec.r_clusters != len) &&
+		    prompt(ost, PY, PR_REFCOUNT_REC_REDUNDANT,
+			   "refcount records among clusters (%"PRIu64
+			   ", %u) are found with no physical clusters "
+			   "corresponding to them. Remove them?", cpos, len)) {
+			ret = o2fsck_refcount_punch_hole(ost, tree, cpos, len);
+			if (ret) {
+				com_err(whoami, ret, "while punching "
+					"hole in (%"PRIu64", %u) in refcount "
+					"tree %"PRIu64,
+					cpos, len, tree->rf_blkno);
+				goto out;
+			}
+
+			/*
+			 * We have punch a hole at (cpos, len),
+			 * so fix the recorded index and r_offset in the tree
+			 * if we have read them.
+			 */
+			if (tree->index != -1) {
+				if (cpos + len >=
+				    rec->r_cpos + rec->r_clusters) {
+					tree->r_offset = 0;
+					tree->index = -1;
+				} else
+					tree->r_offset =
+						cpos + len - rec->r_cpos;
+			}
+		}
+
+		cpos += len;
+		range -= len;
+	}
+
+again:
+	if (tree->index == -1) {
+		ret = ocfs2_get_refcount_rec(ost->ost_fs, tree->root_buf,
+					     p_cpos, clusters, rec,
+					     &tree->index, tree->leaf_buf);
+		if (ret) {
+			com_err(whoami, ret, "while getting refcount rec at "
+				"%"PRIu64" in tree %"PRIu64,
+				p_cpos, tree->rf_blkno);
+			goto out;
+		}
+	}
+
+	/*
+	 * Actually ocfs2_get_refcount_rec will fake some refcount record
+	 * in case it can't find p_cpos in the refcount tree. So we really
+	 * shouldn't meet with a case rec->r_cpos + tree->r_offset > p_cpos.
+	 * And as for rec->r_cpos + tree->r_offset < p_pcos, it should already
+	 * be resolved by the previous punch_hole.
+	 */
+	assert(rec->r_cpos + tree->r_offset == p_cpos);
+
+	rec_len = ocfs2_min(p_cpos + clusters,
+			    (uint64_t)rec->r_cpos + rec->r_clusters) - p_cpos;
+	if (rec->r_refcount != refcount) {
+		if (prompt(ost, PY, PR_REFCOUNT_COUNT_INVALID,
+			   "clusters %"PRIu64 " with len %u have %u refcount "
+			   "while there are %u files point to them. "
+			   "Correct the refcount value?",
+			   p_cpos, rec_len, rec->r_refcount, refcount)) {
+			ret = o2fsck_change_refcount(ost, tree,
+						     p_cpos, rec_len, refcount);
+			if (ret) {
+				com_err(whoami, ret, "while updating refcount "
+					"%u at %"PRIu64" len %u in tree "
+					"%"PRIu64, refcount, p_cpos,
+					rec_len, tree->rf_blkno);
+				goto out;
+			}
+		} else {
+			/*
+			 * XXX:
+			 * Do we need to ask user for adding them to dup?
+			 */
+			o2fsck_mark_clusters_allocated(ost, p_cpos, rec_len);
+			ret = o2fsck_refcount_punch_hole(ost, tree,
+							 p_cpos, rec_len);
+			if (ret) {
+				com_err(whoami, ret, "while punching "
+					"hole at %"PRIu64"in refcount "
+					"tree %"PRIu64, p_cpos,
+					tree->rf_blkno);
+				goto out;
+			}
+
+			ret = o2fsck_clear_refcount(ost, tree, p_cpos, rec_len);
+			if (ret) {
+				com_err(whoami, ret,
+					"while clearing refcount for "
+					"cluster %"PRIu64" len %u in %"PRIu64,
+					p_cpos, rec_len, tree->rf_blkno);
+				goto out;
+			}
+		}
+	}
+
+	if (p_cpos + clusters <= rec->r_cpos + rec->r_clusters) {
+		/* we have finished checking (p_cpos, clusters). */
+		if (p_cpos + clusters < rec->r_cpos + rec->r_clusters) {
+			/*
+			 * this refcount_rec still have more clusters.
+			 * record the end of the r_clusters.
+			 * the next (p_cpos, clusters) will still use
+			 * this refcount_rec.
+			 */
+			tree->r_offset = p_cpos + clusters - rec->r_cpos;
+		} else {
+			tree->index = -1;
+			tree->r_offset = 0 ;
+		}
+		goto out;
+	}
+
+	/*
+	 * now we have finished checking current refcount_rec,
+	 * p_cpos + clusters > rec->r_cpos + rec->r_clusters,
+	 * need to read next refcount_rec.
+	 */
+	clusters += p_cpos;
+	p_cpos = rec->r_cpos + rec->r_clusters;
+	clusters -= p_cpos;
+	tree->r_offset = 0;
+	tree->index = -1;
+	goto again;
+
+out:
+	tree->p_cend = p_cpos + clusters;
+	return ret;
+}
+
+/*
+ * Given a refcount tree, check the refcounted clusters and their refcount.
+ */
+static errcode_t o2fsck_check_refcount(o2fsck_state *ost,
+				       struct refcount_tree *tree)
+{
+	int val;
+	errcode_t ret;
+	uint64_t p_cpos, p_cpos_checked = 0;
+	uint32_t clusters, refcount;
+	uint64_t chk_p_cpos, orig_p_cpos;
+	uint32_t chk_clusters, orig_clusters;
+
+	ret = ocfs2_malloc_block(ost->ost_fs->fs_io, &tree->root_buf);
+	if (ret) {
+		com_err(whoami, ret, "while allocating a block-sized buffer "
+			"for a refcount block");
+		goto out;
+	}
+
+	ret = ocfs2_malloc_block(ost->ost_fs->fs_io, &tree->leaf_buf);
+	if (ret) {
+		com_err(whoami, ret, "while allocating a block-sized buffer "
+			"for a refcount block");
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(ost->ost_fs,
+					tree->rf_blkno, tree->root_buf);
+	if (ret) {
+		com_err(whoami, ret, "reading root refcount block at %"PRIu64,
+			tree->rf_blkno);
+		goto out;
+	}
+
+	tree->index = -1;
+	tree->r_offset = 0;
+
+	while (get_refcounted_extent(tree, &orig_p_cpos,
+				     &orig_clusters, &refcount)) {
+		o2fsck_mark_clusters_allocated(ost, orig_p_cpos, orig_clusters);
+
+		chk_p_cpos = orig_p_cpos;
+		chk_clusters = orig_clusters;
+
+		while (chk_clusters) {
+			p_cpos = chk_p_cpos;
+			clusters = chk_clusters;
+			if (ost->ost_duplicate_clusters) {
+				while (clusters) {
+					ocfs2_bitmap_test(
+						ost->ost_duplicate_clusters,
+						p_cpos, &val);
+					if (val)
+						break;
+
+					clusters--;
+					p_cpos++;
+				}
+				/*
+				 * If there is a cluster which is already setted
+				 * by other owner, p_cpos now points to it.
+				 * Otherwise p_cpos points to the end.
+				 *
+				 * So we check the refcounted clusters
+				 * [chk_p_cpos, p_cpos) and then
+				 * punch a hole in refcount tree at p_cpos.
+				 */
+				clusters = p_cpos - chk_p_cpos;
+			}
+
+			ret = o2fsck_check_clusters_in_refcount(ost,
+								tree,
+								chk_p_cpos,
+								clusters,
+								refcount);
+			if (ret) {
+				com_err(whoami, ret, "while checking "
+					"refcounted clusters");
+				goto out;
+			}
+			p_cpos_checked = chk_p_cpos + clusters;
+
+			if (chk_clusters > clusters) {
+				/*
+				 * We haven't finished our check and the reason
+				 * is that p_cpos is setted in dup_clusters, so
+				 * punch a hole, clear the refcount flag for
+				 * p_cpos and continue our check.
+				 */
+				ret = o2fsck_refcount_punch_hole(ost, tree,
+							p_cpos_checked, 1);
+				if (ret) {
+					com_err(whoami, ret, "while punching "
+						"hole at %"PRIu64"in refcount "
+						"tree %"PRIu64, p_cpos_checked,
+						tree->rf_blkno);
+					goto out;
+				}
+				ret = o2fsck_clear_refcount(ost, tree,
+							    p_cpos_checked, 1);
+				if (ret) {
+					com_err(whoami, ret,
+						"while clearing refcount for "
+						"cluster %"PRIu64" in %"PRIu64,
+						p_cpos_checked, tree->rf_blkno);
+					goto out;
+				}
+				p_cpos_checked++;
+			}
+
+			chk_clusters = chk_p_cpos + chk_clusters -
+					p_cpos_checked;
+			chk_p_cpos = p_cpos_checked;
+		}
+		remove_refcounted_extent(tree, orig_p_cpos, orig_clusters);
+	}
+
+	/*
+	 * Remove all the refcount rec passed "p_cpos_end" from the tree since
+	 * there is no corresponding refcounted clusters.
+	 */
+	ret = ocfs2_refcount_punch_hole(ost->ost_fs, tree->rf_blkno,
+					p_cpos_checked,
+					UINT_MAX - p_cpos_checked);
+	if (ret)
+		com_err(whoami, ret, "while deleting redundant refcount rec");
+out:
+	if (tree->root_buf)
+		ocfs2_free(&tree->root_buf);
+	if (tree->leaf_buf)
+		ocfs2_free(&tree->leaf_buf);
+	return ret;
+}
+
+errcode_t o2fsck_check_refcounted_clusters(o2fsck_state *ost)
+{
+	errcode_t ret = 0;
+	struct refcount_tree *tree;
+	struct rb_node *node;
+	struct list_head *p, *next;
+	struct refcount_file *file;
+
+	if (!ocfs2_refcount_tree(OCFS2_RAW_SB(ost->ost_fs->fs_super)))
+		return 0;
+
+	while ((node = rb_first(&ost->ost_refcount_trees)) != NULL) {
+		tree = rb_entry(node, struct refcount_tree, ref_node);
+
+		if (tree->is_valid) {
+			ret = o2fsck_check_refcount(ost, tree);
+			if (ret)
+				goto out;
+		}
+
+		list_for_each_safe(p, next, &tree->files_list) {
+			file = list_entry(p, struct refcount_file, list);
+			node = rb_first(&file->ref_extents);
+			assert(!node);
+			list_del(&file->list);
+			ocfs2_free(&file);
+		}
+		rb_erase(&tree->ref_node, &ost->ost_refcount_trees);
+		ocfs2_free(&tree);
+	}
+out:
+	return ret;
+}
diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
index 2f24edd..65e10e7 100644
--- a/include/ocfs2/ocfs2.h
+++ b/include/ocfs2/ocfs2.h
@@ -439,6 +439,12 @@ errcode_t ocfs2_refcount_punch_hole(ocfs2_filesys *fs, uint64_t rf_blkno,
 errcode_t ocfs2_change_refcount(ocfs2_filesys *fs, uint64_t rf_blkno,
 				uint64_t p_start, uint32_t len,
 				uint32_t refcount);
+int ocfs2_get_refcount_rec(ocfs2_filesys *fs,
+			   char *ref_root_buf,
+			   uint64_t cpos, unsigned int len,
+			   struct ocfs2_refcount_rec *ret_rec,
+			   int *index,
+			   char *ret_buf);
 errcode_t ocfs2_swap_dir_entries_from_cpu(void *buf, uint64_t bytes);
 errcode_t ocfs2_swap_dir_entries_to_cpu(void *buf, uint64_t bytes);
 void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer);
diff --git a/libocfs2/refcount.c b/libocfs2/refcount.c
index d8fe16e..58afe32 100644
--- a/libocfs2/refcount.c
+++ b/libocfs2/refcount.c
@@ -280,12 +280,12 @@ out:
  *    and end at a small value between cpos+len and start of the next record.
  *    This fake record has r_refcount = 0.
  */
-static int ocfs2_get_refcount_rec(ocfs2_filesys *fs,
-				  char *ref_root_buf,
-				  uint64_t cpos, unsigned int len,
-				  struct ocfs2_refcount_rec *ret_rec,
-				  int *index,
-				  char *ret_buf)
+int ocfs2_get_refcount_rec(ocfs2_filesys *fs,
+			   char *ref_root_buf,
+			   uint64_t cpos, unsigned int len,
+			   struct ocfs2_refcount_rec *ret_rec,
+			   int *index,
+			   char *ret_buf)
 {
 	int ret = 0, i, found;
 	uint32_t low_cpos;
-- 
1.5.5




More information about the Ocfs2-tools-devel mailing list