[Ocfs2-tools-devel] [PATCH 3/3] Fix ECC of the superblock v2

Sunil Mushran sunil.mushran at oracle.com
Thu Jun 23 18:26:21 PDT 2011


I am wondering whether the following code flow is better.

open_and_check()
{
   int open = 0;

   repeat:
   open++;

   ocfs2_open();

   check_superblock();
   if (fail) {
     if (open > 1)
       goto fail;
     recover_superblock()
     goto repeat;
   }
}

On 06/22/2011 07:45 AM, Goldwyn Rodrigues wrote:
> Algorithm:
> If ECC for super block fails
>   1. Read backup superblocks, and check if they are alike
>   2. Compare with super block for tunefs modifiable parameters. If same,
>   		use backup as superblock
>   3. If backup failed, correct ECC of superblock and write.

> Changes:
> 	Incorporated backup superblock retrieval
>
> Signed-off-by: Goldwyn Rodrigues<rgoldwyn at suse.de>
> ---
>   fsck.ocfs2/fsck.c                 |  131 +++++++++++++++++++++++++++++++++++-
>   fsck.ocfs2/fsck.ocfs2.checks.8.in |    9 +++
>   2 files changed, 136 insertions(+), 4 deletions(-)
>
> diff --git a/fsck.ocfs2/fsck.c b/fsck.ocfs2/fsck.c
> index ea072c6..0a76319 100644
> --- a/fsck.ocfs2/fsck.c
> +++ b/fsck.ocfs2/fsck.c
> @@ -79,6 +79,7 @@ static o2fsck_state _ost;
>   static int cluster_locked = 0;
>
>   static void mark_magical_clusters(o2fsck_state *ost);
> +static errcode_t write_out_superblock(o2fsck_state *ost);
>
>   static void handle_signal(int sig)
>   {
> @@ -229,21 +230,143 @@ errcode_t o2fsck_state_reinit(ocfs2_filesys
> *fs, o2fsck_state *ost)
>   	return 0;
>   }
>
> +static errcode_t check_backup_super(o2fsck_state *ost,
> +		ocfs2_filesys **ret_fs)
> +{
> +	errcode_t ret = OCFS2_ET_CORRUPT_SUPERBLOCK;;
> +	int num, i;
> +	char *bak_buf, *tmp;
> +	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
> +	ocfs2_filesys *fs = ost->ost_fs, *bak_fs = NULL;
> +	struct ocfs2_dinode *bak_di = NULL, *tmp_di = NULL;
> +	struct ocfs2_super_block *bak, *tmp_sb, *sup;
> +
> +	num = ocfs2_get_backup_super_offsets(fs, blocks, ARRAY_SIZE(blocks));
> +	if (!num)
> +		goto fail;
> +
> +	ret = ocfs2_malloc_block(fs->fs_io,&tmp);
> +	if (ret)
> +		goto fail;
> +
> +	ret = ocfs2_malloc_block(fs->fs_io,&bak_buf);
> +	if (ret)
> +		goto fail;
> +
> +	/* Read all backup superblocks and compare */
> +	ret = ocfs2_read_blocks(fs, blocks[i], 1, bak_buf);

i = 0?

> +	if (ret) {
> +		com_err(whoami, ret, "while reading backup super block\n");
> +		goto fail;
> +	}
> +	bak_di = (struct ocfs2_dinode *)bak_buf;
> +	ocfs2_swap_inode_to_cpu(fs, bak_di);
> +	bak = OCFS2_RAW_SB(bak_di);
> +
> +	for (i = 1; i<  num; i++) {
> +		ret = ocfs2_read_blocks(fs, blocks[i], 1, tmp);
> +		if (ret)
> +			goto fail;
> +		tmp_di = (struct ocfs2_dinode *)tmp;
> +		tmp_di->i_blkno = OCFS2_SUPER_BLOCK_BLKNO;
> +		ocfs2_swap_inode_to_cpu(fs, tmp_di);
> +		tmp_sb = OCFS2_RAW_SB(tmp_di);
> +		if (memcmp(bak, tmp_sb, sizeof(struct ocfs2_super_block))) {
> +			ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
> +			goto fail;
> +		}
> +	}

Any reason you are not using ocfs2_read_backup_super()? It does the
swapping for you.

> +
> +	ret = ocfs2_open(fs->fs_devname, fs->fs_flags, blocks[0],
> +			fs->fs_blocksize,&bak_fs);
> +	if (ret)
> +		goto fail;
> +
> +	/* Validate backups ECC */
> +	bak_di = bak_fs->fs_super;
> +	ret = ocfs2_block_check_validate(bak_fs->fs_super,
> +			fs->fs_blocksize,&bak_di->i_check);
> +	if (ret) {
> +		com_err(whoami, ret, "while opening backup superblock\n");
> +		ocfs2_freefs(bak_fs);
> +		goto fail;
> +	}
> +
> +	/* Check for different values (tunefs modifiable) in backup*/

Comment could be more descriptive.

/*
Even if the backups are all good, there is a chance they were not
updated by tunefs. So we compare the attributes that tunefs updates.
If they do not match, then we assume that the main superblock is the
best one available. The backups are refreshed at the end.
*/

> +	sup = OCFS2_RAW_SB(fs->fs_super);
> +	bak = OCFS2_RAW_SB(bak_fs->fs_super);
> +
> +	if ((sup->s_feature_compat != bak->s_feature_compat) ||
> +		(sup->s_feature_incompat != bak->s_feature_incompat) ||
> +		(sup->s_feature_ro_compat != bak->s_feature_ro_compat) ||
> +		(sup->s_max_slots != bak->s_max_slots) ||
> +		memcmp(sup->s_uuid, bak->s_uuid, OCFS2_VOL_UUID_LEN) ||
> +		memcmp(sup->s_label, bak->s_label, OCFS2_VOL_UUID_LEN)) {
> +		ocfs2_freefs(bak_fs);
> +		goto fail;
> +	}
> +	ret = 0;
> +	*ret_fs = bak_fs;
> +fail:
> +	if (tmp)
> +		ocfs2_free(&tmp);
> +	return ret;
> +}
> +
>   static errcode_t check_superblock(o2fsck_state *ost)
>   {
> -	struct ocfs2_dinode *di = ost->ost_fs->fs_super;
> -	struct ocfs2_super_block *sb = OCFS2_RAW_SB(di);
> +	char *buf;
> +	struct ocfs2_dinode *di;
> +	ocfs2_filesys *fs = ost->ost_fs, *bak_fs = NULL;
> +	struct ocfs2_super_block *sb = OCFS2_RAW_SB(fs->fs_super);
>   	errcode_t ret = 0;
>
> + 	ret = ocfs2_malloc_block(fs->fs_io,&buf);
> +	if (ret)
> +		goto fail;
> +
> +	memcpy(buf, (char *)fs->fs_super, fs->fs_blocksize);
> +	di = (struct ocfs2_dinode *)buf;
> +
>   	if (sb->s_max_slots == 0) {
>   		printf("The superblock max_slots field is set to 0.\n");
>   		ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
>   	}
>
> -	ost->ost_fs_generation = di->i_fs_generation;
> +	if (ocfs2_meta_ecc(OCFS2_RAW_SB(ost->ost_fs->fs_super))) {
> +		ret = ocfs2_block_check_validate(
> +				(char *)di, fs->fs_blocksize,&di->i_check);
> +		if (!ret) {
> +			/* Just in case hamming fixed anything, copy back*/
> +			memcpy(fs->fs_super, di, fs->fs_blocksize);
> +			goto out;
> +		}
>
> -	/* XXX do we want checking for different revisions of ocfs2? */
> +		ret = check_backup_super(ost,&bak_fs);
> +
> +		if (ret) {
> +			if (!prompt(ost, PN, PR_SUPERBLOCK_INVALID_ECC,
> +					"Superblock has invalid ECC. Fix?"))
> +				goto fail;
> +		} else {
> +			/* Use backup copy of the super block and write */
> +			fprintf(stderr, "%s: primary superblock failed ECC."
> +				" Using backup superblock.\n", whoami);

Checkout recover_backup_super(). The code is already there with a
prompt code.


> +			ocfs2_free(ost->ost_fs);
> +			fs = ost->ost_fs = bak_fs;
> +			fs->fs_super->i_blkno = OCFS2_SUPER_BLOCK_BLKNO;
> +		}
> +		ret = write_out_superblock(ost);
> +		if (ret)
> +			com_err(whoami, ret, "while writing superblock\n");
> +	}
>
> +out:
> +	ost->ost_fs_generation = di->i_fs_generation;
> +	/* XXX do we want checking for different revisions of ocfs2? */
> +fail:
> +	if (buf)
> +		ocfs2_free(&buf);
>   	return ret;
>   }
>
> diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in
> b/fsck.ocfs2/fsck.ocfs2.checks.8.in
> index e706ea5..d6289f5 100644
> --- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
> +++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
> @@ -1137,6 +1137,15 @@ index entry will cause lookups on this name to fail.
>
>   Answering yes will rebuild the directory index, restoring the missing entry.
>
> +.SS "SUPERBLOCK_INVALID_ECC"
> +The superblock has incorrect Error Correcting Code (ECC). ECC is capable of
> +correcting corruption upto 1 bit per block.Any corruptions higher that this
> +may indicate corruption. In this case the filesystem reports an error with
> +the read operation.
> +
> +Answering yes will recalculate the ECC and write the superblock with the
> +calculated ECC.
> +
>   .SH "SEE ALSO"
>   .BR fsck.ocfs2(8)
>



More information about the Ocfs2-tools-devel mailing list