[Ocfs2-tools-devel] [PATCH 3/3] Fix ECC of the superblock v2
Goldwyn Rodrigues
rgoldwyn at gmail.com
Mon Jun 27 07:42:58 PDT 2011
On Thu, Jun 23, 2011 at 8:26 PM, Sunil Mushran <sunil.mushran at oracle.com> wrote:
> I am wondering whether the following code flow is better.
>
> open_and_check()
> {
> int open = 0;
>
> repeat:
> open++;
>
> ocfs2_open();
>
> check_superblock();
> if (fail) {
> if (open > 1)
> goto fail;
> recover_superblock()
> goto repeat;
> }
> }
I was thinking more on the lines of recovering from backup when the
ocfs2_open fails as well (think super's signature corruption). More in
the lines of -
open_and_check()
{
ocfs2_open();
if fail
goto recover_from_backup;
check_superblock - checks super block only
if fail
goto recover_from_backup;
else
return;
recover_from_backup:
check backups with opened superblock (if exists)
recover from backup
}
Of course it will have prompts et cetra.
>
> On 06/22/2011 07:45 AM, Goldwyn Rodrigues wrote:
>>
>> Algorithm:
>> If ECC for super block fails
>> 1. Read backup superblocks, and check if they are alike
>> 2. Compare with super block for tunefs modifiable parameters. If same,
>> use backup as superblock
>> 3. If backup failed, correct ECC of superblock and write.
>
>> Changes:
>> Incorporated backup superblock retrieval
>>
>> Signed-off-by: Goldwyn Rodrigues<rgoldwyn at suse.de>
>> ---
>> fsck.ocfs2/fsck.c | 131
>> +++++++++++++++++++++++++++++++++++-
>> fsck.ocfs2/fsck.ocfs2.checks.8.in | 9 +++
>> 2 files changed, 136 insertions(+), 4 deletions(-)
>>
>> diff --git a/fsck.ocfs2/fsck.c b/fsck.ocfs2/fsck.c
>> index ea072c6..0a76319 100644
>> --- a/fsck.ocfs2/fsck.c
>> +++ b/fsck.ocfs2/fsck.c
>> @@ -79,6 +79,7 @@ static o2fsck_state _ost;
>> static int cluster_locked = 0;
>>
>> static void mark_magical_clusters(o2fsck_state *ost);
>> +static errcode_t write_out_superblock(o2fsck_state *ost);
>>
>> static void handle_signal(int sig)
>> {
>> @@ -229,21 +230,143 @@ errcode_t o2fsck_state_reinit(ocfs2_filesys
>> *fs, o2fsck_state *ost)
>> return 0;
>> }
>>
>> +static errcode_t check_backup_super(o2fsck_state *ost,
>> + ocfs2_filesys **ret_fs)
>> +{
>> + errcode_t ret = OCFS2_ET_CORRUPT_SUPERBLOCK;;
>> + int num, i;
>> + char *bak_buf, *tmp;
>> + uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
>> + ocfs2_filesys *fs = ost->ost_fs, *bak_fs = NULL;
>> + struct ocfs2_dinode *bak_di = NULL, *tmp_di = NULL;
>> + struct ocfs2_super_block *bak, *tmp_sb, *sup;
>> +
>> + num = ocfs2_get_backup_super_offsets(fs, blocks,
>> ARRAY_SIZE(blocks));
>> + if (!num)
>> + goto fail;
>> +
>> + ret = ocfs2_malloc_block(fs->fs_io,&tmp);
>> + if (ret)
>> + goto fail;
>> +
>> + ret = ocfs2_malloc_block(fs->fs_io,&bak_buf);
>> + if (ret)
>> + goto fail;
>> +
>> + /* Read all backup superblocks and compare */
>> + ret = ocfs2_read_blocks(fs, blocks[i], 1, bak_buf);
>
> i = 0?
oops. blame it on copy-paste.
>
>> + if (ret) {
>> + com_err(whoami, ret, "while reading backup super
>> block\n");
>> + goto fail;
>> + }
>> + bak_di = (struct ocfs2_dinode *)bak_buf;
>> + ocfs2_swap_inode_to_cpu(fs, bak_di);
>> + bak = OCFS2_RAW_SB(bak_di);
>> +
>> + for (i = 1; i< num; i++) {
>> + ret = ocfs2_read_blocks(fs, blocks[i], 1, tmp);
>> + if (ret)
>> + goto fail;
>> + tmp_di = (struct ocfs2_dinode *)tmp;
>> + tmp_di->i_blkno = OCFS2_SUPER_BLOCK_BLKNO;
>> + ocfs2_swap_inode_to_cpu(fs, tmp_di);
>> + tmp_sb = OCFS2_RAW_SB(tmp_di);
>> + if (memcmp(bak, tmp_sb, sizeof(struct ocfs2_super_block)))
>> {
>> + ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
>> + goto fail;
>> + }
>> + }
>
> Any reason you are not using ocfs2_read_backup_super()? It does the
> swapping for you.
Hmm.. did not realize a function like this exists.
>
>> +
>> + ret = ocfs2_open(fs->fs_devname, fs->fs_flags, blocks[0],
>> + fs->fs_blocksize,&bak_fs);
>> + if (ret)
>> + goto fail;
>> +
>> + /* Validate backups ECC */
>> + bak_di = bak_fs->fs_super;
>> + ret = ocfs2_block_check_validate(bak_fs->fs_super,
>> + fs->fs_blocksize,&bak_di->i_check);
>> + if (ret) {
>> + com_err(whoami, ret, "while opening backup superblock\n");
>> + ocfs2_freefs(bak_fs);
>> + goto fail;
>> + }
>> +
>> + /* Check for different values (tunefs modifiable) in backup*/
>
> Comment could be more descriptive.
>
> /*
> Even if the backups are all good, there is a chance they were not
> updated by tunefs. So we compare the attributes that tunefs updates.
> If they do not match, then we assume that the main superblock is the
> best one available. The backups are refreshed at the end.
> */
Sure..
>
>> + sup = OCFS2_RAW_SB(fs->fs_super);
>> + bak = OCFS2_RAW_SB(bak_fs->fs_super);
>> +
>> + if ((sup->s_feature_compat != bak->s_feature_compat) ||
>> + (sup->s_feature_incompat != bak->s_feature_incompat) ||
>> + (sup->s_feature_ro_compat != bak->s_feature_ro_compat) ||
>> + (sup->s_max_slots != bak->s_max_slots) ||
>> + memcmp(sup->s_uuid, bak->s_uuid, OCFS2_VOL_UUID_LEN) ||
>> + memcmp(sup->s_label, bak->s_label, OCFS2_VOL_UUID_LEN)) {
>> + ocfs2_freefs(bak_fs);
>> + goto fail;
>> + }
>> + ret = 0;
>> + *ret_fs = bak_fs;
>> +fail:
>> + if (tmp)
>> + ocfs2_free(&tmp);
>> + return ret;
>> +}
>> +
>> static errcode_t check_superblock(o2fsck_state *ost)
>> {
>> - struct ocfs2_dinode *di = ost->ost_fs->fs_super;
>> - struct ocfs2_super_block *sb = OCFS2_RAW_SB(di);
>> + char *buf;
>> + struct ocfs2_dinode *di;
>> + ocfs2_filesys *fs = ost->ost_fs, *bak_fs = NULL;
>> + struct ocfs2_super_block *sb = OCFS2_RAW_SB(fs->fs_super);
>> errcode_t ret = 0;
>>
>> + ret = ocfs2_malloc_block(fs->fs_io,&buf);
>> + if (ret)
>> + goto fail;
>> +
>> + memcpy(buf, (char *)fs->fs_super, fs->fs_blocksize);
>> + di = (struct ocfs2_dinode *)buf;
>> +
>> if (sb->s_max_slots == 0) {
>> printf("The superblock max_slots field is set to 0.\n");
>> ret = OCFS2_ET_CORRUPT_SUPERBLOCK;
>> }
>>
>> - ost->ost_fs_generation = di->i_fs_generation;
>> + if (ocfs2_meta_ecc(OCFS2_RAW_SB(ost->ost_fs->fs_super))) {
>> + ret = ocfs2_block_check_validate(
>> + (char *)di,
>> fs->fs_blocksize,&di->i_check);
>> + if (!ret) {
>> + /* Just in case hamming fixed anything, copy
>> back*/
>> + memcpy(fs->fs_super, di, fs->fs_blocksize);
>> + goto out;
>> + }
>>
>> - /* XXX do we want checking for different revisions of ocfs2? */
>> + ret = check_backup_super(ost,&bak_fs);
>> +
>> + if (ret) {
>> + if (!prompt(ost, PN, PR_SUPERBLOCK_INVALID_ECC,
>> + "Superblock has invalid ECC.
>> Fix?"))
>> + goto fail;
>> + } else {
>> + /* Use backup copy of the super block and write */
>> + fprintf(stderr, "%s: primary superblock failed
>> ECC."
>> + " Using backup superblock.\n", whoami);
>
> Checkout recover_backup_super(). The code is already there with a
> prompt code.
Yes, thats sounds better.
>
>
>> + ocfs2_free(ost->ost_fs);
>> + fs = ost->ost_fs = bak_fs;
>> + fs->fs_super->i_blkno = OCFS2_SUPER_BLOCK_BLKNO;
>> + }
>> + ret = write_out_superblock(ost);
>> + if (ret)
>> + com_err(whoami, ret, "while writing
>> superblock\n");
>> + }
>>
>> +out:
>> + ost->ost_fs_generation = di->i_fs_generation;
>> + /* XXX do we want checking for different revisions of ocfs2? */
>> +fail:
>> + if (buf)
>> + ocfs2_free(&buf);
>> return ret;
>> }
>>
>> diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in
>> b/fsck.ocfs2/fsck.ocfs2.checks.8.in
>> index e706ea5..d6289f5 100644
>> --- a/fsck.ocfs2/fsck.ocfs2.checks.8.in
>> +++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in
>> @@ -1137,6 +1137,15 @@ index entry will cause lookups on this name to
>> fail.
>>
>> Answering yes will rebuild the directory index, restoring the missing
>> entry.
>>
>> +.SS "SUPERBLOCK_INVALID_ECC"
>> +The superblock has incorrect Error Correcting Code (ECC). ECC is capable
>> of
>> +correcting corruption upto 1 bit per block.Any corruptions higher that
>> this
>> +may indicate corruption. In this case the filesystem reports an error
>> with
>> +the read operation.
>> +
>> +Answering yes will recalculate the ECC and write the superblock with the
>> +calculated ECC.
>> +
>> .SH "SEE ALSO"
>> .BR fsck.ocfs2(8)
>>
>
--
Goldwyn
More information about the Ocfs2-tools-devel
mailing list