[Ocfs2-tools-devel] [PATCH 4/8] Fix superblock from backup in case of invalid ECC

Goldwyn Rodrigues rgoldwyn at gmail.com
Mon Aug 1 10:58:38 PDT 2011


First try to open the filesystem with metaecc, if that fails, try
without metaecc. Check the backup superblocks to check if they are good,
and if they are recover from them. If not, fail.

Algorithm proposed by Sunil Mushran.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn at suse.de>
---
 fsck.ocfs2/fsck.c                 |  116 +++++++++++++++++++++++++++++++++---
 fsck.ocfs2/fsck.ocfs2.checks.8.in |    9 +++
 2 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/fsck.ocfs2/fsck.c b/fsck.ocfs2/fsck.c
index e032dfd..b004e67 100644
--- a/fsck.ocfs2/fsck.c
+++ b/fsck.ocfs2/fsck.c
@@ -79,6 +79,8 @@ static o2fsck_state _ost;
 static int cluster_locked = 0;

 static void mark_magical_clusters(o2fsck_state *ost);
+static errcode_t recover_backup_super(o2fsck_state *ost, char* device,
+		int sb_num);

 static void handle_signal(int sig)
 {
@@ -229,6 +231,75 @@ errcode_t o2fsck_state_reinit(ocfs2_filesys *fs,
o2fsck_state *ost)
 	return 0;
 }

+static errcode_t check_backup_super(o2fsck_state *ost)
+{
+	errcode_t ret = OCFS2_ET_CORRUPT_SUPERBLOCK;;
+	int num, i;
+	char *bak_buf, *tmp;
+	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
+	ocfs2_filesys *fs = ost->ost_fs;
+	struct ocfs2_dinode *bak_di = NULL, *tmp_di = NULL;
+	struct ocfs2_super_block *bak, *tmp_sb, *sup;
+
+	num = ocfs2_get_backup_super_offsets(fs, blocks, ARRAY_SIZE(blocks));
+	if (!num)
+		goto fail;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &tmp);
+	if (ret)
+		goto fail;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &bak_buf);
+	if (ret)
+		goto fail;
+
+	/* Read all backup superblocks and compare */
+	ret = ocfs2_read_backup_super(fs, 0, bak_buf);
+	if (ret) {
+		com_err(whoami, ret, "while reading backup super block\n");
+		goto fail;
+	}
+	bak_di = (struct ocfs2_dinode *)bak_buf;
+	bak = OCFS2_RAW_SB(bak_di);
+	sup = OCFS2_RAW_SB(fs->fs_super);
+
+	/* Check for different values (tunefs modifiable) in backup*/
+	if ((sup->s_feature_compat != bak->s_feature_compat) ||
+		(sup->s_feature_incompat != bak->s_feature_incompat) ||
+		(sup->s_feature_ro_compat != bak->s_feature_ro_compat) ||
+		(sup->s_max_slots != bak->s_max_slots) ||
+		memcmp(sup->s_uuid, bak->s_uuid, OCFS2_VOL_UUID_LEN) ||
+		memcmp(sup->s_label, bak->s_label, OCFS2_VOL_UUID_LEN))
+		goto fail;
+
+	/* Compare and check all backups */
+	for (i = 1; i < num; i++) {
+		ret = ocfs2_read_backup_super(fs, i, tmp);
+		if (ret)
+			goto fail;
+		tmp_di = (struct ocfs2_dinode *)tmp;
+		tmp_sb = OCFS2_RAW_SB(tmp_di);
+		/* Validate backups ECC */
+		ret = ocfs2_block_check_validate(tmp_di,
+				fs->fs_blocksize, &tmp_di->i_check);
+		if (ret)
+			goto fail;
+		tmp_di->i_blkno = OCFS2_SUPER_BLOCK_BLKNO;
+		if (memcmp(bak, tmp_sb, sizeof(struct ocfs2_super_block))) {
+			ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
+			goto fail;
+		}
+	}
+
+	ret = 0;
+fail:
+	if (tmp)
+		ocfs2_free(&tmp);
+	if (bak_buf)
+		ocfs2_free(&bak_buf);
+	return ret;
+}
+
 static errcode_t check_superblock(o2fsck_state *ost)
 {
 	struct ocfs2_dinode *di = ost->ost_fs->fs_super;
@@ -412,21 +483,44 @@ static errcode_t open_and_check(o2fsck_state
*ost, char *filename,
 				uint64_t blksize)
 {
 	errcode_t ret;
+	int flag = 0;

+retry:
 	ret = ocfs2_open(filename, open_flags, blkno, blksize, &ost->ost_fs);
-	if (ret) {
-		com_err(whoami, ret, "while opening \"%s\"", filename);
+	if (!ret) {
+		ret = check_superblock(ost);
 		goto out;
 	}

-	ret = check_superblock(ost);
-	if (ret) {
-		printf("fsck saw unrecoverable errors in the super block and "
-		       "will not continue.\n");
+	/* Cannot recognize the error, bail */
+	if (flag || (ret != OCFS2_ET_BAD_ECC)) {
+		com_err(whoami, ret, "while opening \"%s\"", filename);
 		goto out;
 	}

+	/* Open without ECC to get fs */
+	ret = ocfs2_open(filename, open_flags | OCFS2_FLAG_NO_ECC_CHECKS,
+			blkno, blksize, &ost->ost_fs);
+	if (ret)
+		goto out;
+
+	ret = check_backup_super(ost);
+	if (ret)
+		goto out;
+	
+	/* Backups are fine. Recover from a backup now */
+	printf("Superblock corrupted. Attempting backup superblock\n");
+	ocfs2_free(ost->ost_fs);
+	ost->ost_fs = NULL;
+	ret = recover_backup_super(ost, filename, 1);
+	if (!ret && (flag++ < 1))
+		goto retry;
+
 out:
+	if (ret)
+		printf("fsck saw unrecoverable errors in the super block"
+			" and the backup superblock. fsck will"
+			" not continue.\n");
 	return ret;
 }

@@ -469,10 +563,12 @@ static errcode_t
maybe_replay_journals(o2fsck_state *ost, char *filename,
 	if (!replayed)
 		goto out;

-	ret = ocfs2_close(ost->ost_fs);
-	if (ret) {
-		com_err(whoami, ret, "while closing \"%s\"", filename);
-		goto out;
+	if (ost->ost_fs) {
+		ret = ocfs2_close(ost->ost_fs);
+		if (ret) {
+			com_err(whoami, ret, "while closing \"%s\"", filename);
+			goto out;
+		}
 	}

 	ret = open_and_check(ost, filename, open_flags, blkno, blksize);
diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in
b/fsck.ocfs2/fsck.ocfs2.checks.8.in
index e706ea5..d6289f5 100644
--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
+++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
@@ -1137,6 +1137,15 @@ index entry will cause lookups on this name to fail.

 Answering yes will rebuild the directory index, restoring the missing entry.

+.SS "SUPERBLOCK_INVALID_ECC"
+The superblock has incorrect Error Correcting Code (ECC). ECC is capable of
+correcting corruption upto 1 bit per block.Any corruptions higher that this
+may indicate corruption. In this case the filesystem reports an error with
+the read operation.
+
+Answering yes will recalculate the ECC and write the superblock with the
+calculated ECC.
+
 .SH "SEE ALSO"
 .BR fsck.ocfs2(8)

-- 
1.7.6



More information about the Ocfs2-tools-devel mailing list