[Ocfs2-tools-devel] [PATCH 39/50] fsck.ocfs2: Check refcount of clusters.
Tao Ma
tao.ma at oracle.com
Mon Jan 11 07:31:25 PST 2010
As we have added refcounted clusters into rb-tree,
now we can rebuild the refcount tree according to
these result.
Signed-off-by: Tao Ma <tao.ma at oracle.com>
---
fsck.ocfs2/fsck.ocfs2.checks.8.in | 12 +
fsck.ocfs2/include/refcount.h | 1 +
fsck.ocfs2/pass1.c | 3 +
fsck.ocfs2/refcount.c | 550 +++++++++++++++++++++++++++++++++++++
include/ocfs2/ocfs2.h | 6 +
libocfs2/refcount.c | 12 +-
6 files changed, 578 insertions(+), 6 deletions(-)
diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in b/fsck.ocfs2/fsck.ocfs2.checks.8.in
index b1e7013..47e1873 100644
--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
+++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
@@ -657,6 +657,18 @@ refering an invalid refcount block.
Answering yes remove this refcount block and clear refcount flag from this file.
+.SS "REFCOUNT_REC_REDUNDANT"
+Refcount record is used to store the refcount for physical clusters. Some
+refcount record is found to have no physical clusters corresponding to it.
+
+Answering yes remove the refcount record.
+
+.SS "REFCOUNT_COUNT_INVALID"
+Refcount record is used to store the refcount for physical clusters. A record
+record is found whichs claims the wrong refcount for some physical clusters.
+
+Answering yes update the corresponding refcount record.
+
\" pass1b.c
.SS "DUP_CLUSTERS_SYSFILE_CLONE"
diff --git a/fsck.ocfs2/include/refcount.h b/fsck.ocfs2/include/refcount.h
index 4e4f901..b44dd95 100644
--- a/fsck.ocfs2/include/refcount.h
+++ b/fsck.ocfs2/include/refcount.h
@@ -28,5 +28,6 @@ errcode_t o2fsck_mark_clusters_refcounted(o2fsck_state *ost,
uint64_t p_cpos,
uint32_t clusters,
uint32_t v_cpos);
+errcode_t o2fsck_check_refcounted_clusters(o2fsck_state *ost);
#endif /* __O2FSCK_REFCOUNT_H__ */
diff --git a/fsck.ocfs2/pass1.c b/fsck.ocfs2/pass1.c
index e605aca..5e16a01 100644
--- a/fsck.ocfs2/pass1.c
+++ b/fsck.ocfs2/pass1.c
@@ -1436,6 +1436,9 @@ errcode_t o2fsck_pass1(o2fsck_state *ost)
mark_local_allocs(ost);
mark_truncate_logs(ost);
+ ret = o2fsck_check_refcounted_clusters(ost);
+ if (ret)
+ com_err(whoami, ret, "while checking refcounted clusters");
write_cluster_alloc(ost);
write_inode_alloc(ost);
diff --git a/fsck.ocfs2/refcount.c b/fsck.ocfs2/refcount.c
index 62dfdce..db7606d 100644
--- a/fsck.ocfs2/refcount.c
+++ b/fsck.ocfs2/refcount.c
@@ -55,6 +55,18 @@ struct refcount_tree {
uint64_t rf_blkno;
struct list_head files_list;
int is_valid;
+ char *root_buf;
+ /*
+ * leaf_buf, rec, index points to a refcount record we are checking.
+ * r_offset is the offset in refcount rec we have checked already.
+ */
+ char *leaf_buf;
+ struct ocfs2_refcount_rec rec;
+ int index;
+ int r_offset;
+
+ /* the position we have checked against this tree. */
+ uint64_t p_cend;
};
static errcode_t check_rb(o2fsck_state *ost, uint64_t blkno,
@@ -474,3 +486,541 @@ add_clusters:
refcount_extent_insert(file, extent);
return 0;
}
+
+/*
+ * Given a refcount tree, find the lowest p_cpos of all
+ * the files sharing the same tree.
+ * We also remove them from the refcount tree.
+ */
+static int get_refcounted_extent(struct refcount_tree *tree,
+ uint64_t *p_cpos,
+ uint32_t *p_clusters,
+ uint32_t *p_refcount)
+{
+ struct refcount_extent *extent;
+ struct refcount_file *file;
+ struct list_head *p, *next;
+ struct rb_node *node;
+ uint64_t cpos = UINT64_MAX;
+ uint32_t clusters, refcount;
+ int found = 0;
+
+ list_for_each_safe(p, next, &tree->files_list) {
+ file = list_entry(p, struct refcount_file, list);
+ node = rb_first(&file->ref_extents);
+
+ /*
+ * If the file has no extent, go to next file.
+ * XXX: We can improve it here by removing the empty file.
+ */
+ if (!node)
+ continue;
+
+ found = 1;
+
+ extent = rb_entry(node, struct refcount_extent, ext_node);
+ if (extent->p_cpos < cpos) {
+ /* We meet with a new start. */
+ clusters = cpos - extent->p_cpos < extent->clusters ?
+ cpos - extent->p_cpos : extent->clusters;
+ cpos = extent->p_cpos;
+ refcount = 1;
+ } else if (extent->p_cpos == cpos) {
+ clusters = clusters < extent->clusters ?
+ clusters : extent->clusters;
+ refcount++;
+ } else if (extent->p_cpos < cpos + clusters) {
+ /*
+ * extent->p_cpos > cpos, change clusters accordingly.
+ */
+ clusters = extent->p_cpos - cpos;
+ }
+ }
+
+ if (!found)
+ return 0;
+
+ *p_cpos = cpos;
+ *p_clusters = clusters;
+ *p_refcount = refcount;
+
+ return 1;
+}
+
+static void remove_refcounted_extent(struct refcount_tree *tree,
+ uint64_t cpos,
+ uint32_t clusters)
+{
+ struct refcount_extent *extent;
+ struct refcount_file *file;
+ struct list_head *p, *next;
+ struct rb_node *node;
+
+ /* Remove the tuple from the refcounted file. */
+ list_for_each_safe(p, next, &tree->files_list) {
+ file = list_entry(p, struct refcount_file, list);
+ node = rb_first(&file->ref_extents);
+
+ /* If the file has no extent, go to next file. */
+ if (!node)
+ continue;
+
+ extent = rb_entry(node, struct refcount_extent, ext_node);
+ assert(extent->p_cpos >= cpos);
+
+ if (cpos + clusters <= extent->p_cpos)
+ continue;
+
+ assert(extent->p_cpos + extent->clusters >= cpos + clusters);
+
+ if (cpos + clusters == extent->p_cpos + extent->clusters) {
+ rb_erase(&extent->ext_node, &file->ref_extents);
+ ocfs2_free(&extent);
+ } else {
+ extent->clusters =
+ (extent->p_cpos + extent->clusters) -
+ (cpos + clusters);
+ extent->p_cpos = cpos + clusters;
+ }
+ }
+}
+
+/*
+ * Check the all the files in the tree and if there is a tree contains
+ * the (p_cpos, len) with refcounted flag, we clear it.
+ */
+static errcode_t o2fsck_clear_refcount(o2fsck_state *ost,
+ struct refcount_tree *tree,
+ uint64_t p_cpos,
+ uint32_t len)
+{
+ errcode_t ret = 0;
+ struct refcount_extent *extent;
+ struct refcount_file *file;
+ struct list_head *p, *next;
+ struct rb_node *node;
+ uint32_t v_start;
+
+ list_for_each_safe(p, next, &tree->files_list) {
+ file = list_entry(p, struct refcount_file, list);
+ node = rb_first(&file->ref_extents);
+
+ /* If the file has no extent, go to next file. */
+ if (!node)
+ continue;
+
+ extent = rb_entry(node, struct refcount_extent, ext_node);
+ assert(extent->p_cpos >= p_cpos);
+
+ if (p_cpos + len <= extent->p_cpos)
+ continue;
+
+ assert(extent->p_cpos + extent->clusters >= p_cpos + len);
+
+ v_start = p_cpos - extent->p_cpos + extent->v_cpos;
+
+ ret = ocfs2_change_refcount_flag(ost->ost_fs, file->i_blkno,
+ v_start, len, p_cpos,
+ 0, OCFS2_EXT_REFCOUNTED);
+ if (ret) {
+ com_err(whoami, ret, "while clearing refcount flag at "
+ "%u in file %"PRIu64, v_start, file->i_blkno);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * o2fsck_refcount_punch_hole and o2fsck_change_refcount are just wrappers
+ * for the corresponding libocfs2 functions with one addition: re-read
+ * root refcount block since we may have changed the tree during the operation.
+ */
+static errcode_t o2fsck_refcount_punch_hole(o2fsck_state *ost,
+ struct refcount_tree *tree,
+ uint64_t p_cpos, uint32_t len)
+{
+ errcode_t ret;
+
+ ret = ocfs2_refcount_punch_hole(ost->ost_fs, tree->rf_blkno,
+ p_cpos, len);
+ if (ret) {
+ com_err(whoami, ret, "while punching hole in "
+ "(%"PRIu64", %u) in refcount tree %"PRIu64,
+ p_cpos, len, tree->rf_blkno);
+ goto out;
+ }
+
+ /* re-read the root blkno since we may have changed it somehow. */
+ ret = ocfs2_read_refcount_block(ost->ost_fs,
+ tree->rf_blkno, tree->root_buf);
+
+out:
+ return ret;
+}
+
+static errcode_t o2fsck_change_refcount(o2fsck_state *ost,
+ struct refcount_tree *tree,
+ uint64_t p_cpos, uint32_t len,
+ uint32_t refcount)
+{
+ errcode_t ret;
+
+ ret = ocfs2_change_refcount(ost->ost_fs, tree->rf_blkno,
+ p_cpos, len, refcount);
+ if (ret) {
+ com_err(whoami, ret, "while changing refcount in "
+ "(%"PRIu64", %u) in refcount tree %"PRIu64" to %u",
+ p_cpos, len, tree->rf_blkno, refcount);
+ goto out;
+ }
+
+ /* re-read the root blkno since we may have changed it somehow. */
+ ret = ocfs2_read_refcount_block(ost->ost_fs,
+ tree->rf_blkno, tree->root_buf);
+
+out:
+ return ret;
+
+}
+
+
+static errcode_t o2fsck_check_clusters_in_refcount(o2fsck_state *ost,
+ struct refcount_tree *tree,
+ uint64_t p_cpos,
+ uint32_t clusters,
+ uint32_t refcount)
+{
+ errcode_t ret = 0;
+ uint32_t rec_len;
+ struct ocfs2_refcount_rec tmp_rec, *rec = &tree->rec;
+ uint64_t cpos, range;
+ unsigned int len;
+ int index;
+
+ if (!clusters)
+ return 0;
+
+ /*
+ * the previous check ended at tree->p_cend, and now we get
+ * p_cpos, so any refcount record between p_cend and p_cpos
+ * should be considered as redundant.
+ */
+ cpos = tree->p_cend;
+ range = p_cpos - cpos;
+ while (range) {
+ len = range > UINT_MAX ? UINT_MAX : range;
+
+ ret = ocfs2_get_refcount_rec(ost->ost_fs, tree->root_buf,
+ cpos, len, &tmp_rec,
+ &index, tree->leaf_buf);
+ if (ret) {
+ com_err(whoami, ret, "while getting refcount rec at "
+ "%"PRIu64" in tree %"PRIu64,
+ cpos, tree->rf_blkno);
+ goto out;
+ }
+
+ if ((tmp_rec.r_refcount || tmp_rec.r_clusters != len) &&
+ prompt(ost, PY, PR_REFCOUNT_REC_REDUNDANT,
+ "refcount records among clusters (%"PRIu64
+ ", %u) are found with no physical clusters "
+ "corresponding to them. Remove them?", cpos, len)) {
+ ret = o2fsck_refcount_punch_hole(ost, tree, cpos, len);
+ if (ret) {
+ com_err(whoami, ret, "while punching "
+ "hole in (%"PRIu64", %u) in refcount "
+ "tree %"PRIu64,
+ cpos, len, tree->rf_blkno);
+ goto out;
+ }
+
+ /*
+ * We have punch a hole at (cpos, len),
+ * so fix the recorded index and r_offset in the tree
+ * if we have read them.
+ */
+ if (tree->index != -1) {
+ if (cpos + len >=
+ rec->r_cpos + rec->r_clusters) {
+ tree->r_offset = 0;
+ tree->index = -1;
+ } else
+ tree->r_offset =
+ cpos + len - rec->r_cpos;
+ }
+ }
+
+ cpos += len;
+ range -= len;
+ }
+
+again:
+ if (tree->index == -1) {
+ ret = ocfs2_get_refcount_rec(ost->ost_fs, tree->root_buf,
+ p_cpos, clusters, rec,
+ &tree->index, tree->leaf_buf);
+ if (ret) {
+ com_err(whoami, ret, "while getting refcount rec at "
+ "%"PRIu64" in tree %"PRIu64,
+ p_cpos, tree->rf_blkno);
+ goto out;
+ }
+ }
+
+ /*
+ * Actually ocfs2_get_refcount_rec will fake some refcount record
+ * in case it can't find p_cpos in the refcount tree. So we really
+ * shouldn't meet with a case rec->r_cpos + tree->r_offset > p_cpos.
+ * And as for rec->r_cpos + tree->r_offset < p_pcos, it should already
+ * be resolved by the previous punch_hole.
+ */
+ assert(rec->r_cpos + tree->r_offset == p_cpos);
+
+ rec_len = ocfs2_min(p_cpos + clusters,
+ (uint64_t)rec->r_cpos + rec->r_clusters) - p_cpos;
+ if (rec->r_refcount != refcount) {
+ if (prompt(ost, PY, PR_REFCOUNT_COUNT_INVALID,
+ "clusters %"PRIu64 " with len %u have %u refcount "
+ "while there are %u files point to them. "
+ "Correct the refcount value?",
+ p_cpos, rec_len, rec->r_refcount, refcount)) {
+ ret = o2fsck_change_refcount(ost, tree,
+ p_cpos, rec_len, refcount);
+ if (ret) {
+ com_err(whoami, ret, "while updating refcount "
+ "%u at %"PRIu64" len %u in tree "
+ "%"PRIu64, refcount, p_cpos,
+ rec_len, tree->rf_blkno);
+ goto out;
+ }
+ } else {
+ /*
+ * XXX:
+ * Do we need to ask user for adding them to dup?
+ */
+ o2fsck_mark_clusters_allocated(ost, p_cpos, rec_len);
+ ret = o2fsck_refcount_punch_hole(ost, tree,
+ p_cpos, rec_len);
+ if (ret) {
+ com_err(whoami, ret, "while punching "
+ "hole at %"PRIu64"in refcount "
+ "tree %"PRIu64, p_cpos,
+ tree->rf_blkno);
+ goto out;
+ }
+
+ ret = o2fsck_clear_refcount(ost, tree, p_cpos, rec_len);
+ if (ret) {
+ com_err(whoami, ret,
+ "while clearing refcount for "
+ "cluster %"PRIu64" len %u in %"PRIu64,
+ p_cpos, rec_len, tree->rf_blkno);
+ goto out;
+ }
+ }
+ }
+
+ if (p_cpos + clusters <= rec->r_cpos + rec->r_clusters) {
+ /* we have finished checking (p_cpos, clusters). */
+ if (p_cpos + clusters < rec->r_cpos + rec->r_clusters) {
+ /*
+ * this refcount_rec still have more clusters.
+ * record the end of the r_clusters.
+ * the next (p_cpos, clusters) will still use
+ * this refcount_rec.
+ */
+ tree->r_offset = p_cpos + clusters - rec->r_cpos;
+ } else {
+ tree->index = -1;
+ tree->r_offset = 0 ;
+ }
+ goto out;
+ }
+
+ /*
+ * now we have finished checking current refcount_rec,
+ * p_cpos + clusters > rec->r_cpos + rec->r_clusters,
+ * need to read next refcount_rec.
+ */
+ clusters += p_cpos;
+ p_cpos = rec->r_cpos + rec->r_clusters;
+ clusters -= p_cpos;
+ tree->r_offset = 0;
+ tree->index = -1;
+ goto again;
+
+out:
+ tree->p_cend = p_cpos + clusters;
+ return ret;
+}
+
+/*
+ * Given a refcount tree, check the refcounted clusters and their refcount.
+ */
+static errcode_t o2fsck_check_refcount(o2fsck_state *ost,
+ struct refcount_tree *tree)
+{
+ int val;
+ errcode_t ret;
+ uint64_t p_cpos, p_cpos_checked = 0;
+ uint32_t clusters, refcount;
+ uint64_t chk_p_cpos, orig_p_cpos;
+ uint32_t chk_clusters, orig_clusters;
+
+ ret = ocfs2_malloc_block(ost->ost_fs->fs_io, &tree->root_buf);
+ if (ret) {
+ com_err(whoami, ret, "while allocating a block-sized buffer "
+ "for a refcount block");
+ goto out;
+ }
+
+ ret = ocfs2_malloc_block(ost->ost_fs->fs_io, &tree->leaf_buf);
+ if (ret) {
+ com_err(whoami, ret, "while allocating a block-sized buffer "
+ "for a refcount block");
+ goto out;
+ }
+
+ ret = ocfs2_read_refcount_block(ost->ost_fs,
+ tree->rf_blkno, tree->root_buf);
+ if (ret) {
+ com_err(whoami, ret, "reading root refcount block at %"PRIu64,
+ tree->rf_blkno);
+ goto out;
+ }
+
+ tree->index = -1;
+ tree->r_offset = 0;
+
+ while (get_refcounted_extent(tree, &orig_p_cpos,
+ &orig_clusters, &refcount)) {
+ o2fsck_mark_clusters_allocated(ost, orig_p_cpos, orig_clusters);
+
+ chk_p_cpos = orig_p_cpos;
+ chk_clusters = orig_clusters;
+
+ while (chk_clusters) {
+ p_cpos = chk_p_cpos;
+ clusters = chk_clusters;
+ if (ost->ost_duplicate_clusters) {
+ while (clusters) {
+ ocfs2_bitmap_test(
+ ost->ost_duplicate_clusters,
+ p_cpos, &val);
+ if (val)
+ break;
+
+ clusters--;
+ p_cpos++;
+ }
+ /*
+ * If there is a cluster which is already setted
+ * by other owner, p_cpos now points to it.
+ * Otherwise p_cpos points to the end.
+ *
+ * So we check the refcounted clusters
+ * [chk_p_cpos, p_cpos) and then
+ * punch a hole in refcount tree at p_cpos.
+ */
+ clusters = p_cpos - chk_p_cpos;
+ }
+
+ ret = o2fsck_check_clusters_in_refcount(ost,
+ tree,
+ chk_p_cpos,
+ clusters,
+ refcount);
+ if (ret) {
+ com_err(whoami, ret, "while checking "
+ "refcounted clusters");
+ goto out;
+ }
+ p_cpos_checked = chk_p_cpos + clusters;
+
+ if (chk_clusters > clusters) {
+ /*
+ * We haven't finished our check and the reason
+ * is that p_cpos is setted in dup_clusters, so
+ * punch a hole, clear the refcount flag for
+ * p_cpos and continue our check.
+ */
+ ret = o2fsck_refcount_punch_hole(ost, tree,
+ p_cpos_checked, 1);
+ if (ret) {
+ com_err(whoami, ret, "while punching "
+ "hole at %"PRIu64"in refcount "
+ "tree %"PRIu64, p_cpos_checked,
+ tree->rf_blkno);
+ goto out;
+ }
+ ret = o2fsck_clear_refcount(ost, tree,
+ p_cpos_checked, 1);
+ if (ret) {
+ com_err(whoami, ret,
+ "while clearing refcount for "
+ "cluster %"PRIu64" in %"PRIu64,
+ p_cpos_checked, tree->rf_blkno);
+ goto out;
+ }
+ p_cpos_checked++;
+ }
+
+ chk_clusters = chk_p_cpos + chk_clusters -
+ p_cpos_checked;
+ chk_p_cpos = p_cpos_checked;
+ }
+ remove_refcounted_extent(tree, orig_p_cpos, orig_clusters);
+ }
+
+ /*
+ * Remove all the refcount rec passed "p_cpos_end" from the tree since
+ * there is no corresponding refcounted clusters.
+ */
+ ret = ocfs2_refcount_punch_hole(ost->ost_fs, tree->rf_blkno,
+ p_cpos_checked,
+ UINT_MAX - p_cpos_checked);
+ if (ret)
+ com_err(whoami, ret, "while deleting redundant refcount rec");
+out:
+ if (tree->root_buf)
+ ocfs2_free(&tree->root_buf);
+ if (tree->leaf_buf)
+ ocfs2_free(&tree->leaf_buf);
+ return ret;
+}
+
+errcode_t o2fsck_check_refcounted_clusters(o2fsck_state *ost)
+{
+ errcode_t ret = 0;
+ struct refcount_tree *tree;
+ struct rb_node *node;
+ struct list_head *p, *next;
+ struct refcount_file *file;
+
+ if (!ocfs2_refcount_tree(OCFS2_RAW_SB(ost->ost_fs->fs_super)))
+ return 0;
+
+ while ((node = rb_first(&ost->ost_refcount_trees)) != NULL) {
+ tree = rb_entry(node, struct refcount_tree, ref_node);
+
+ if (tree->is_valid) {
+ ret = o2fsck_check_refcount(ost, tree);
+ if (ret)
+ goto out;
+ }
+
+ list_for_each_safe(p, next, &tree->files_list) {
+ file = list_entry(p, struct refcount_file, list);
+ node = rb_first(&file->ref_extents);
+ assert(!node);
+ list_del(&file->list);
+ ocfs2_free(&file);
+ }
+ rb_erase(&tree->ref_node, &ost->ost_refcount_trees);
+ ocfs2_free(&tree);
+ }
+out:
+ return ret;
+}
diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
index 2f24edd..65e10e7 100644
--- a/include/ocfs2/ocfs2.h
+++ b/include/ocfs2/ocfs2.h
@@ -439,6 +439,12 @@ errcode_t ocfs2_refcount_punch_hole(ocfs2_filesys *fs, uint64_t rf_blkno,
errcode_t ocfs2_change_refcount(ocfs2_filesys *fs, uint64_t rf_blkno,
uint64_t p_start, uint32_t len,
uint32_t refcount);
+int ocfs2_get_refcount_rec(ocfs2_filesys *fs,
+ char *ref_root_buf,
+ uint64_t cpos, unsigned int len,
+ struct ocfs2_refcount_rec *ret_rec,
+ int *index,
+ char *ret_buf);
errcode_t ocfs2_swap_dir_entries_from_cpu(void *buf, uint64_t bytes);
errcode_t ocfs2_swap_dir_entries_to_cpu(void *buf, uint64_t bytes);
void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer);
diff --git a/libocfs2/refcount.c b/libocfs2/refcount.c
index d8fe16e..58afe32 100644
--- a/libocfs2/refcount.c
+++ b/libocfs2/refcount.c
@@ -280,12 +280,12 @@ out:
* and end at a small value between cpos+len and start of the next record.
* This fake record has r_refcount = 0.
*/
-static int ocfs2_get_refcount_rec(ocfs2_filesys *fs,
- char *ref_root_buf,
- uint64_t cpos, unsigned int len,
- struct ocfs2_refcount_rec *ret_rec,
- int *index,
- char *ret_buf)
+int ocfs2_get_refcount_rec(ocfs2_filesys *fs,
+ char *ref_root_buf,
+ uint64_t cpos, unsigned int len,
+ struct ocfs2_refcount_rec *ret_rec,
+ int *index,
+ char *ret_buf)
{
int ret = 0, i, found;
uint32_t low_cpos;
--
1.5.5
More information about the Ocfs2-tools-devel
mailing list