[Ocfs2-tools-devel] [PATCH 1/2] fsck: supporting fixing inode alloc group desc
piaojun
piaojun at huawei.com
Mon Jan 29 23:16:54 PST 2018
Hi Eric,
On 2018/1/30 12:32, Eric Ren wrote:
> Hi Jun,
>
> On 01/17/2018 07:48 PM, piaojun wrote:
>> when inode_alloc's gd is corrupted, we may reinitialize it and then set
>> its bitmap by iterating all files of root dir.
>
> Kind reminder: add "patch v2" in subject and your changes since v1 when
> you re-send.
>
> We know it's not easy to improve fsck tool, and do it correctly. Could you elaborate
> a little bit about how you make a corrupted group descriptor and how well your patch
> works?
>
How to make corrupted gd?
1. Find the gd blkno of inode_alloc with debugfs.ocfs2:
# debugfs.ocfs2 -R "stat //inode_alloc:0000" /dev/mapper/360022a11000d938407102acf00000155
...
## Block# Total Used Free Contig Size
0 167424 1024 3 1021 1021 4032
2. clear the gd with 'dd' command:
# dd if=/dev/zero of=/dev/mapper/360022a11000d938407102acf00000155 bs=4k count=1 seek=167424 oflag=direct
How to fix corrupted gd?
1. Identify the corrupted gd by generation and magic.
2. Initialize the corrupted gd with ocfs2_init_group_desc().
3. Iterate all files in root dir, and set inode_alloc's bitmap by inode blknum.
4. Write back the good gd to disk.
>>
>> Signed-off-by: Jun Piao <piaojun at huawei.com>
>> ---
>> fsck.ocfs2/pass0.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 173 insertions(+)
>>
>> diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c
>> index bfd11fb..29673ed 100644
>> --- a/fsck.ocfs2/pass0.c
>> +++ b/fsck.ocfs2/pass0.c
>> @@ -1308,6 +1308,175 @@ static errcode_t verify_bitmap_descs(o2fsck_state *ost,
>> return ret;
>> }
>>
>> +struct walk_path {
>> + const char *argv0;
>> + char *path;
>> + ocfs2_filesys *fs;
>> + struct ocfs2_group_desc *bgs;
>> + int corrupted_bgs;
>> +};
>> +
>> +static int set_bitmap_func(struct ocfs2_dir_entry *dentry,
>> + uint64_t blocknr,
>> + int offset,
>> + int blocksize,
>> + char *buf,
>> + void *priv_data)
>> +{
>> + struct walk_path *wp = priv_data;
>> + struct ocfs2_group_desc *bg;
>> + __le64 inode = dentry->inode;
>> + __le64 bg_blkno;
>> + errcode_t ret;
>> + int len;
>> + int reti = 0;
> reti -> rc? hha
>> + int i = 0;
>> + char *old_path, *path = NULL;
>> +
>> + if (!strncmp(dentry->name, ".", dentry->name_len) ||
>> + !strncmp(dentry->name, "..", dentry->name_len))
>> + return 0;
>> +
>> + ret = ocfs2_malloc0(PATH_MAX, &path);
>> + if (ret) {
>> + com_err(wp->argv0, ret,
>> + "while allocating path memory in %s\n", wp->path);
>> + return OCFS2_DIRENT_ABORT;
>> + }
>> +
>> + len = strlen(wp->path);
>> + memcpy(path, wp->path, len);
>> + memcpy(path + len, dentry->name, dentry->name_len);
>> + if (dentry->file_type == OCFS2_FT_DIR)
>> + path[len + dentry->name_len] = '/';
>> +
>> + /* set group desc bitmap */
>> + for (i = 0; i < wp->corrupted_bgs; i++) {
>> + bg = &wp->bgs[i];
>> + bg_blkno = bg->bg_blkno;
>> + if (inode > bg_blkno && inode <= bg_blkno + bg->bg_bits) {
>> + ocfs2_set_bit(inode - bg_blkno, bg->bg_bitmap);
>> + bg->bg_free_bits_count--;
>> + }
>> + }
>> +
>> + if (dentry->file_type == OCFS2_FT_DIR) {
>> + old_path = wp->path;
>> + wp->path = path;
>> + ret = ocfs2_dir_iterate(wp->fs, inode, 0, NULL,
>> + set_bitmap_func, wp);
>> + if (ret) {
>> + com_err(wp->argv0, ret, "while walking %s", wp->path);
>> + reti = OCFS2_DIRENT_ABORT;
>> + }
>> + wp->path = old_path;
>> + }
>> +
>> + ocfs2_free(&path);
>
> path could be NULL, right? The same applies to the other appearance of ocfs2_free().
>
NULL path means that -ENOMEM error happens.
>> +
>> + return reti;
>> +}
>> +
>> +static errcode_t verify_group_desc(o2fsck_state *ost,
>> + struct ocfs2_dinode *di, int type)
>> +{
>> + uint16_t bits;
>> + uint64_t blkno;
>> + errcode_t ret = 0;
>> + int corrupted_bgs = 0, i;
>> + struct ocfs2_chain_list *cl = &di->id2.i_chain;
>> + struct ocfs2_chain_rec *rec;
>> + struct ocfs2_group_desc *bgs = NULL;
>> +
>> + ret = ocfs2_malloc_blocks(ost->ost_fs->fs_io,
>> + cl->cl_next_free_rec, &bgs);
>> + if (ret) {
>> + com_err(whoami, ret, "while allocating block group descriptors");
>> + goto out;
>> + }
>> + memset(bgs, 0, ost->ost_fs->fs_blocksize * cl->cl_next_free_rec);
>> +
>> + for (i = 0; i < cl->cl_next_free_rec; i++) {
>> + rec = &cl->cl_recs[i];
>
> One thing I doubt about:
>
> In this loop, it checks every first group descriptor of each chain, right? How about the
> rest gd on the chain?
>
Currently we could only fix the situation that there is one gd in each
chain, because we can hardly rebuild the gd far from chain header. For
example, when gd4 is corrupted, we can find its son gd247.
rec rec rec rec ... rec
gd1 gd2 gd3 gd4 ... gd243
| | | | ...
gd gd gd gd247
>> + blkno = rec->c_blkno;
>> + bits = rec->c_total;
>> +
>> + ret = ocfs2_read_group_desc(ost->ost_fs, blkno,
>> + (char *)&bgs[corrupted_bgs]);
>> + if ((ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC) ||
>> + (!ret && bgs[corrupted_bgs].bg_generation != ost->ost_fs_generation)) {
>> + if (!prompt(ost, PY, PR_GROUP_EXPECTED_DESC,
>> + "Block %"PRIu64" should be a group "
>> + "descriptor for the bitmap chain allocator "
>> + "but it was corrupted. Reinitialize it as "
>> + "a group desc and link it into the bitmap "
>> + "allocator?", blkno))
>> + continue;
>> + ocfs2_init_group_desc(ost->ost_fs,
>> + &bgs[corrupted_bgs],
>> + blkno, ost->ost_fs_generation,
>> + di->i_blkno, bits, i, 1);
> In the last review:
> """
>
> Hmm, regarding the last parameter of ocfs2_init_group_desc(...,
> suballoc), is it correct to always
> set 0 no matter it's global_inode_alloc or inode_alloc?
> """
>
> global_inode_alloc is also sub-alloctor? oh, I think so.
>
>
> Can fsck already fix global bitmap group desc?
>
Yes, it could.
thanks,
Jun
>
> The others looks good to me. Thanks.
>
> Eric
>
>
>> + corrupted_bgs++;
>> + } else if (ret) {
>> + com_err(whoami, ret, "while reading a block bitmap "
>> + "group descriptor from block %"PRIu64,
>> + blkno);
>> + }
>> + }
>> +
>> + /* traverse all inodes, and set group desc bitmap */
>> + if (corrupted_bgs) {
>> + /* Walk root dir */
>> + struct walk_path wp;
>> + uint64_t root_blkno;
>> + char *path = NULL;
>> +
>> + switch (type) {
>> + case GLOBAL_INODE_ALLOC_SYSTEM_INODE:
>> + path = "//";
>> + root_blkno = ost->ost_fs->fs_sysdir_blkno;
>> + break;
>> + case INODE_ALLOC_SYSTEM_INODE:
>> + path = "/";
>> + root_blkno = ost->ost_fs->fs_root_blkno;
>> + break;
>> + default:
>> + ret = OCFS2_ET_INTERNAL_FAILURE;
>> + com_err(whoami, ret, "while verifying group desc");
>> + goto out;
>> + }
>> +
>> + wp.argv0 = whoami;
>> + wp.path = path;
>> + wp.fs = ost->ost_fs;
>> + wp.bgs = bgs;
>> + wp.corrupted_bgs = corrupted_bgs;
>> + ret = ocfs2_dir_iterate(ost->ost_fs,
>> + root_blkno, 0, NULL,
>> + set_bitmap_func, &wp);
>> + if (ret) {
>> + com_err(whoami, ret, "while walking root dir");
>> + goto out;
>> + }
>> + }
>> +
>> + /* write back fixed bgs */
>> + for (i = 0; i < corrupted_bgs; i++) {
>> + ret = ocfs2_write_group_desc(ost->ost_fs,
>> + bgs[i].bg_blkno,
>> + (char *)&bgs[i]);
>> + if (ret) {
>> + com_err(whoami, ret, "while writing a block group "
>> + "descriptor at block %"PRIu64, blkno);
>> + ost->ost_saw_error = 1;
>> + }
>> + }
>> +
>> +out:
>> + ocfs2_free(&bgs);
>> + return ret;
>> +}
>> +
>> /* this returns an error if it didn't leave the allocators in a state that
>> * the iterators will be able to work with. There is probably some room
>> * for more resiliance here. */
>> @@ -1483,6 +1652,10 @@ errcode_t o2fsck_pass0(o2fsck_state *ost)
>> verbosef("Caching inode alloc failed, err %d\n",
>> (int)ret);
>>
>> + ret = verify_group_desc(ost, di, type);
>> + if (ret)
>> + goto out;
>> +
>> ret = verify_chain_alloc(ost, di,
>> blocks + ost->ost_fs->fs_blocksize,
>> blocks +
>
> .
>
More information about the Ocfs2-tools-devel
mailing list