[Ocfs2-tools-devel] [PATCH 2/3] fsck.ocfs2: Add extent list check for discontig bg.

Tao Ma tao.ma at oracle.com
Wed Jul 21 00:31:37 PDT 2010


So we will do some basic sanity check for discontig block group.
The check includes:
1. l_tree_depth must be set to 0.
2. l_count must be set to ocfs2_extent_recs_per_gd.
3. if l_next_free_rec is broken and the extent record list looks
   sane, update l_next_free_rec.
4. if any extent record has an invalid e_cpos, dropt the group.
5. if there is only one rec which has invalid e_leaf_clusters,
   update it according to other extent records.
6. if one extent record looks broken while all the others look
   sane and complete, remove it.

Signed-off-by: Tao Ma <tao.ma at oracle.com>
---
 fsck.ocfs2/fsck.ocfs2.checks.8.in |   59 +++++++++++++
 fsck.ocfs2/pass0.c                |  172 +++++++++++++++++++++++++++++++++++++
 2 files changed, 231 insertions(+), 0 deletions(-)

diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in b/fsck.ocfs2/fsck.ocfs2.checks.8.in
index 5bea432..3855230 100644
--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
+++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
@@ -295,6 +295,65 @@ what was found by totalling up the group descriptors.
 Answering yes updates the c_total and c_free members of the header to reflect
 what was found in the group descriptors in the chain.
 
+.SS "DISCONTIG_BG_DEPTH"
+A discontiguous block group has a extent list which records all the clusters
+allocated to it and we can't only support a tree depth with 0 while we find
+one discontiguous block group which has tree depth greater than 0.
+
+Answering yes updates the l_tree_depth of the extent list to 0.
+
+.SS "DISCONTIG_BG_COUNT"
+A discontiguous block group has a extent list which records all the clusters
+allocated to it. The total record number is stored in bg_list.l_count.
+It is found that the count has exceeded the limit of the group.
+
+Answering yes updates the l_count to the maximum extent record number for the
+extent list.
+
+.SS "DISCONTIG_BG_REC_RANGE"
+A extent record in a discontig block group was found which claims to reference
+a region of clusters which partially extends beyond the number of clusters
+in the volume.
+
+Answering yes will remove the whole discontig block group.
+
+.SS "DISCONTIG_BG_CORRUPT_LEAFS"
+More than one extent records in a discontig block group were found which have
+broken clusters allocated.
+
+Answering yes will remove the whole discontig block group.
+
+.SS "DISCONTIG_BG_CLUSTERS"
+Extent records in a discontig block group were found having more clusters
+allocated then a block group can handle.
+
+Answering yes will remove the whole discontig block group.
+
+.SS "DISCONTIG_BG_NEXT_FREE_REC"
+The l_next_free_rec field of a discontig block group was found broken and
+didn't indicate all the extent records in the group.
+
+Answering yes will update it with the real extent record number we have.
+
+.SS "DISCONTIG_BG_LIST_CORRUPT"
+We use extent list to store allocated clusters in a discontig block group.
+Both l_next_free_rec and the total clusters in the extent list were found
+broken and we can't fix them on site.
+
+Answering yes will remove the whole discontig block group.
+
+.SS "DISCONTIG_BG_REMOVE_REC"
+One extent record in a discontig block group was found broken while
+others look sane.
+
+Answering yes will remove the corrupted extent record.
+
+.SS "DISCONTIG_BG_LEAF_CLUSTERS"
+One extent record in a discontig block group was found having more clusters
+allocated then a block group can handle.
+
+Answering yes will update the value calculated by other extent records.
+
 \" pass1.c
 
 .SS "INODE_ALLOC_REPAIR"
diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c
index aac0561..81fbf72 100644
--- a/fsck.ocfs2/pass0.c
+++ b/fsck.ocfs2/pass0.c
@@ -155,6 +155,173 @@ out:
 	return ret;
 }
 
+static void check_discontig_bg(o2fsck_state *ost, int cpg,
+			       struct ocfs2_group_desc *bg,
+			       int *changed, int *clear_ref)
+{
+	uint64_t blkno = bg->bg_blkno;
+	int next_free, i, total_clusters = 0, cpy;
+	int fix_leaf_clusters = 0, fix_pos = -1;
+	struct ocfs2_extent_rec *rec;
+
+	if (bg->bg_list.l_tree_depth &&
+	    prompt(ost, PY, PR_DISCONTIG_BG_DEPTH,
+		   "Discontig Group descriptor at block %"PRIu64" has "
+		   "a tree depth %u which is greathen than 0. "
+		   "Change it to 0?", blkno, bg->bg_list.l_tree_depth)) {
+		bg->bg_list.l_tree_depth = 0;
+		*changed = 1;
+	}
+
+	if ((bg->bg_list.l_count >
+	     ocfs2_extent_recs_per_gd(ost->ost_fs->fs_blocksize)) &&
+	    prompt(ost, PY, PR_DISCONTIG_BG_COUNT,
+		   "Discontig Group descriptor at block %"PRIu64" has "
+		   "a extent count %u which is greathen than %u. "
+		   "Change it?", blkno, bg->bg_list.l_count,
+		   ocfs2_extent_recs_per_gd(ost->ost_fs->fs_blocksize))) {
+		bg->bg_list.l_count =
+			ocfs2_extent_recs_per_gd(ost->ost_fs->fs_blocksize);
+		*changed = 1;
+	}
+
+	if (bg->bg_list.l_next_free_rec > bg->bg_list.l_count)
+		next_free = bg->bg_list.l_count;
+	else
+		next_free = bg->bg_list.l_next_free_rec;
+
+	for (i = 0; i < next_free; i++) {
+		rec = &bg->bg_list.l_recs[i];
+
+		/*
+		 * We treat e_blkno = 0 and e_leaf_cluster = 0 as the
+		 * end of the extent list so that we can find the proper
+		 * l_next_free_rec.
+		 */
+		if (!rec->e_blkno && !rec->e_leaf_clusters)
+			break;
+
+		if (ocfs2_block_out_of_range(ost->ost_fs, rec->e_blkno)) {
+			if (prompt(ost, PY, PR_DISCONTIG_BG_REC_RANGE,
+				   "Discontig block group %"PRIu64" in chain "
+				   "%d at inode %"PRIu64" contains a reference "
+				   "block %"PRIu64" which is out "
+				   "of range. Drop this group?",
+				   blkno, bg->bg_chain,
+				   (uint64_t)bg->bg_parent_dinode,
+				   (uint64_t)rec->e_blkno))  {
+				*clear_ref = 1;
+			}
+			goto out;
+		}
+
+		if (rec->e_leaf_clusters > cpg) {
+			if (fix_leaf_clusters) {
+				if (prompt(ost, PY,
+					   PR_DISCONTIG_BG_CORRUPT_LEAFS,
+					   "Discontig block group %"PRIu64" "
+					   "in chain %d at inode %"PRIu64" "
+					   "have too many errors in "
+					   "e_leaf_clusters. "
+					   "Drop this group?",
+					   blkno, bg->bg_chain,
+					   (uint64_t)bg->bg_parent_dinode))
+					*clear_ref = 1;
+				goto out;
+			}
+			fix_pos = i;
+			fix_leaf_clusters = 1;
+		} else
+			total_clusters += rec->e_leaf_clusters;
+	}
+
+	if (total_clusters > cpg) {
+		if (prompt(ost, PY, PR_DISCONTIG_BG_CLUSTERS,
+		    "Discontig Group descriptor at block %"PRIu64" has "
+		    "a extent list which has %u clusters allocated, which "
+		    "is greater than %u. Drop the group?",
+		    blkno, total_clusters, cpg))
+			*clear_ref = 1;
+		goto out;
+	}
+
+	if (i != next_free) {
+		/*
+		 * l_next_free_rec is broken since we find empty extent rec
+		 * before we reach it.
+		 */
+		if (total_clusters == cpg) {
+			/*
+			 * OK, the whole extent list is sane,
+			 * so change l_next_free_rec accordling.
+			 */
+			if (prompt(ost, PY, PR_DISCONTIG_BG_NEXT_FREE_REC,
+				   "Discontig Group descriptor at block "
+				   "%"PRIu64" has a free extent rec %u "
+				   "while the list only have %u. Change it?",
+				    blkno, bg->bg_list.l_next_free_rec,
+				    i)) {
+				bg->bg_list.l_next_free_rec = i;
+				*changed = 1;
+			}
+		} else {
+			/*
+			 * We have to drop the group now since both
+			 * l_next_free_rec and the extent list have errors.
+			 */
+			if (prompt(ost, PY, PR_DISCONTIG_BG_LIST_CORRUPT,
+				   "Discontig block group %"PRIu64" in chain "
+				   "%u at inode %"PRIu64" have errors in "
+				   "l_next_free_rec and the extent list. "
+				   "Drop this group?",
+				   blkno, bg->bg_chain,
+				   (uint64_t)bg->bg_parent_dinode))
+					*clear_ref = 1;
+			goto out;
+		}
+	}
+
+	if (fix_pos >= 0 && fix_pos < bg->bg_list.l_count) {
+		rec = &bg->bg_list.l_recs[fix_pos];
+
+		if (total_clusters == cpg) {
+			/*
+			 * We find a corrupted rec while the other recs
+			 * looks fine.
+			 */
+			if (prompt(ost, PY, PR_DISCONTIG_BG_REMOVE_REC,
+				   "Discontig Group descriptor at block "
+				   "%"PRIu64" has a extent rec %d which has "
+				   "leaf clusters %u while other extent recs "
+				   "look sane. Remove it?",
+				    blkno, fix_pos, rec->e_leaf_clusters)) {
+
+				cpy = (next_free - fix_pos - 1) * sizeof(*rec);
+				if (cpy != 0) {
+					memcpy(rec, rec + 1, cpy);
+					memset(&bg->bg_list.l_recs[next_free - 1],
+					       0, sizeof(*rec));
+				}
+				bg->bg_list.l_next_free_rec--;
+				*changed = 1;
+			}
+		} else {
+			if (prompt(ost, PY, PR_DISCONTIG_BG_LEAF_CLUSTERS,
+				   "Discontig Group descriptor at block "
+				   "%"PRIu64" has a extent rec %d which has "
+				   "leaf clusters %u while it should has %u. "
+				   "Change it?",
+				    blkno, fix_pos, rec->e_leaf_clusters,
+				    cpg - total_clusters)) {
+				rec->e_leaf_clusters = cpg - total_clusters;
+				*changed = 1;
+			}
+		}
+	}
+out:
+	return;
+}
+
 static errcode_t repair_group_desc(o2fsck_state *ost,
 				   struct ocfs2_dinode *di,
 				   struct chain_state *cs,
@@ -251,6 +418,11 @@ static errcode_t repair_group_desc(o2fsck_state *ost,
 		changed = 1;
 	}
 
+	if (ocfs2_gd_is_discontig(bg))
+		check_discontig_bg(ost, cs->cs_cpg, bg, &changed, clear_ref);
+	if (*clear_ref)
+		goto out;
+
 	/* XXX check bg_bits vs cpg/bpc. */
 
 	if (changed) {
-- 
1.7.1.GIT




More information about the Ocfs2-tools-devel mailing list