[Ocfs2-tools-devel] [PATCH 1/2 v4] fsck: supporting fixing inode alloc group desc
piaojun
piaojun at huawei.com
Fri Feb 23 00:43:16 PST 2018
Hi Eric,
On 2018/2/23 16:38, Eric Ren wrote:
> Hi,
>
> On 02/14/2018 11:38 AM, piaojun wrote:
>> When inode_alloc's gd is corrupted, we may reinitialize it and then set
>> its bitmap by iterating all files of root dir.
>>
>> How to make corrupted gd?
>> 1. Find the gd blkno of inode_alloc with debugfs.ocfs2:
>> # debugfs.ocfs2 -R "stat //inode_alloc:0000" /dev/mapper/xxxx
>> ...
>> ## Block# Total Used Free Contig Size
>> 0 167424 1024 3 1021 1021 4032
>> 2. clear the gd with 'dd' command:
>> # dd if=/dev/zero of=/dev/mapper/xxxx bs=4k count=1 seek=167424
>> oflag=direct
>>
>> How to fix corrupted gd?
>> 1. Identify the corrupted gd by generation and magic.
>> 2. Initialize the corrupted gd with ocfs2_init_group_desc().
>> 3. Iterate all files in root dir, and set inode_alloc's bitmap by inode
>> blknum.
>> 4. Write back the good gd to disk.
>>
>> Currently we could only fix the situation that there is one gd in each
>> chain, because we can hardly rebuild the gd far from chain header. The
>> key problem is that we can not trust gd anymore as they have been
>> corrupted. So we must relay on the ocfs2_chain_list struct to restore
>> all gds.
>>
>> Signed-off-by: Jun Piao <piaojun at huawei.com>
>> ---
>> fsck.ocfs2/pass0.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 182 insertions(+)
>>
>> diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c
>> index bfd11fb..25cb779 100644
>> --- a/fsck.ocfs2/pass0.c
>> +++ b/fsck.ocfs2/pass0.c
>> @@ -1308,6 +1308,184 @@ static errcode_t verify_bitmap_descs(o2fsck_state *ost,
>> return ret;
>> }
>>
>> +struct walk_path {
>> + const char *argv0;
>> + char *path;
>> + ocfs2_filesys *fs;
>> + struct ocfs2_group_desc *bgs;
>> + int corrupted_bgs;
>> +};
>> +
>> +static int set_bitmap_func(struct ocfs2_dir_entry *dentry,
>> + uint64_t blocknr,
>> + int offset,
>> + int blocksize,
>> + char *buf,
>> + void *priv_data)
>> +{
>> + struct walk_path *wp = priv_data;
>> + struct ocfs2_group_desc *bg;
>> + __le64 inode = dentry->inode;
>> + __le64 bg_blkno;
>> + errcode_t ret;
>> + int len;
>> + int reti = 0;
>> + int i = 0;
>> + char *old_path, *path = NULL;
>> +
>> + if (!strncmp(dentry->name, ".", dentry->name_len) ||
>> + !strncmp(dentry->name, "..", dentry->name_len))
>> + return 0;
>> +
>> + ret = ocfs2_malloc0(PATH_MAX, &path);
>> + if (ret) {
>> + com_err(wp->argv0, ret,
>> + "while allocating path memory in %s\n", wp->path);
>> + return OCFS2_DIRENT_ABORT;
>> + }
>> +
>> + len = strlen(wp->path);
>> + memcpy(path, wp->path, len);
>> + memcpy(path + len, dentry->name, dentry->name_len);
>> + if (dentry->file_type == OCFS2_FT_DIR)
>> + path[len + dentry->name_len] = '/';
>> +
>> + /* set group desc bitmap */
>> + for (i = 0; i < wp->corrupted_bgs; i++) {
>> + bg = &wp->bgs[i];
>> + bg_blkno = bg->bg_blkno;
>> + if (inode > bg_blkno && inode <= bg_blkno + bg->bg_bits) {
>> + ocfs2_set_bit(inode - bg_blkno, bg->bg_bitmap);
>> + bg->bg_free_bits_count--;
>> + }
>> + }
>> +
>> + if (dentry->file_type == OCFS2_FT_DIR) {
>> + old_path = wp->path;
>> + wp->path = path;
>> + ret = ocfs2_dir_iterate(wp->fs, inode, 0, NULL,
>> + set_bitmap_func, wp);
>> + if (ret) {
>> + com_err(wp->argv0, ret, "while walking %s", wp->path);
>> + reti = OCFS2_DIRENT_ABORT;
>> + }
>> + wp->path = old_path;
>> + }
>> +
>> + ocfs2_free(&path);
>> +
>> + return reti;
>> +}
>> +
>> +static errcode_t verify_group_desc(o2fsck_state *ost,
>> + struct ocfs2_dinode *di, int type)
>> +{
>> + uint16_t bits;
>> + uint64_t blkno;
>> + errcode_t ret = 0;
>> + int corrupted_bgs = 0, i;
>> + struct ocfs2_chain_list *cl = &di->id2.i_chain;
>> + struct ocfs2_chain_rec *rec;
>> + struct ocfs2_group_desc *bgs = NULL;
>> +
>> + ret = ocfs2_malloc_blocks(ost->ost_fs->fs_io,
>> + cl->cl_next_free_rec, &bgs);
>> + if (ret) {
>> + com_err(whoami, ret, "while allocating block group descriptors");
>> + goto out;
>> + }
>> + memset(bgs, 0, ost->ost_fs->fs_blocksize * cl->cl_next_free_rec);
>> +
>> + /*
>> + * Currently we could only fix the situation that there is one gd
>> + * in each chain, because we can hardly rebuild the gd far from
>> + * chain header. The key problem is that we can not trust gd
>> + * anymore as they have been corrupted. So we must relay on
>> + * the ocfs2_chain_list struct to restore all gds.
>> + */
>
> As talked in v3 as below:
>
> ===
>
>> I still think it's not proper to silently assume that there is only one gd in each chain.
>> So, could we give warning for user's attention by check how many
>> gds the allocator has? If number of gds > the number of chain recs, give some warnings?
>
>> Eric
>
> Agree, we could give user a choice whether fixing this problem if
> gds > the number of chain recs.
>
> ===
>
> I cannot see where you enforce this?
I add some comment in log to notice user so that they can choose
whether fixing this problem.
"Note that we could only fix the "
"situation that there is one gd in each chain",
thanks,
Jun
>
> Eric
>
>> + for (i = 0; i < cl->cl_next_free_rec; i++) {
>> + rec = &cl->cl_recs[i];
>> + blkno = rec->c_blkno;
>> + bits = rec->c_total;
>> +
>> + ret = ocfs2_read_group_desc(ost->ost_fs, blkno,
>> + (char *)&bgs[corrupted_bgs]);
>> + if ((ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC) ||
>> + (!ret && bgs[corrupted_bgs].bg_generation != ost->ost_fs_generation)) {
>> + if (!prompt(ost, PY, PR_GROUP_EXPECTED_DESC,
>> + "Block %"PRIu64" should be a group "
>> + "descriptor for the bitmap chain allocator "
>> + "but it was corrupted. Reinitialize it as "
>> + "a group desc and link it into the bitmap "
>> + "allocator? Note that we could only fix the "
>> + "situation that there is one gd in each chain",
>> + blkno))
>> + continue;
>> + ocfs2_init_group_desc(ost->ost_fs,
>> + &bgs[corrupted_bgs],
>> + blkno, ost->ost_fs_generation,
>> + di->i_blkno, bits, i, 1);
>> + corrupted_bgs++;
>> + } else if (ret) {
>> + com_err(whoami, ret, "while reading a block bitmap "
>> + "group descriptor from block %"PRIu64,
>> + blkno);
>> + }
>> + }
>> +
>> + /* traverse all inodes, and set group desc bitmap */
>> + if (corrupted_bgs) {
>> + /* Walk root dir */
>> + struct walk_path wp;
>> + uint64_t root_blkno;
>> + char *path = NULL;
>> +
>> + switch (type) {
>> + case GLOBAL_INODE_ALLOC_SYSTEM_INODE:
>> + path = "//";
>> + root_blkno = ost->ost_fs->fs_sysdir_blkno;
>> + break;
>> + case INODE_ALLOC_SYSTEM_INODE:
>> + path = "/";
>> + root_blkno = ost->ost_fs->fs_root_blkno;
>> + break;
>> + default:
>> + ret = OCFS2_ET_INTERNAL_FAILURE;
>> + com_err(whoami, ret, "while verifying group desc");
>> + goto out;
>> + }
>> +
>> + wp.argv0 = whoami;
>> + wp.path = path;
>> + wp.fs = ost->ost_fs;
>> + wp.bgs = bgs;
>> + wp.corrupted_bgs = corrupted_bgs;
>> + ret = ocfs2_dir_iterate(ost->ost_fs,
>> + root_blkno, 0, NULL,
>> + set_bitmap_func, &wp);
>> + if (ret) {
>> + com_err(whoami, ret, "while walking root dir");
>> + goto out;
>> + }
>> + }
>> +
>> + /* write back fixed bgs */
>> + for (i = 0; i < corrupted_bgs; i++) {
>> + ret = ocfs2_write_group_desc(ost->ost_fs,
>> + bgs[i].bg_blkno,
>> + (char *)&bgs[i]);
>> + if (ret) {
>> + com_err(whoami, ret, "while writing a block group "
>> + "descriptor at block %"PRIu64, blkno);
>> + ost->ost_saw_error = 1;
>> + }
>> + }
>> +
>> +out:
>> + ocfs2_free(&bgs);
>> + return ret;
>> +}
>> +
>> /* this returns an error if it didn't leave the allocators in a state that
>> * the iterators will be able to work with. There is probably some room
>> * for more resiliance here. */
>> @@ -1483,6 +1661,10 @@ errcode_t o2fsck_pass0(o2fsck_state *ost)
>> verbosef("Caching inode alloc failed, err %d\n",
>> (int)ret);
>>
>> + ret = verify_group_desc(ost, di, type);
>> + if (ret)
>> + goto out;
>> +
>> ret = verify_chain_alloc(ost, di,
>> blocks + ost->ost_fs->fs_blocksize,
>> blocks +
>
> .
>
More information about the Ocfs2-tools-devel
mailing list