[Ocfs2-tools-devel] [PATCH 1/2 v3] fsck: supporting fixing inode alloc group desc

Eric Ren zren at suse.com
Tue Feb 6 20:33:15 PST 2018


Jun,

On 02/02/2018 05:20 PM, piaojun wrote:
> When inode_alloc's gd is corrupted, we may reinitialize it and then set
> its bitmap by iterating all files of root dir.
>
> How to make corrupted gd?
> 1. Find the gd blkno of inode_alloc with debugfs.ocfs2:
>      # debugfs.ocfs2 -R "stat //inode_alloc:0000" /dev/mapper/xxxx
>      ...
>      ##   Block#            Total    Used     Free     Contig   Size
>      0    167424            1024     3        1021     1021     4032
> 2. clear the gd with 'dd' command:
>      # dd if=/dev/zero of=/dev/mapper/xxxx bs=4k count=1 seek=167424
>      oflag=direct
>
> How to fix corrupted gd?
> 1. Identify the corrupted gd by generation and magic.
> 2. Initialize the corrupted gd with ocfs2_init_group_desc().
> 3. Iterate all files in root dir, and set inode_alloc's bitmap by inode
>     blknum.
> 4. Write back the good gd to disk.
>
> Currently we could only fix the situation that there is one gd in each
> chain, because we can hardly rebuild the gd far from chain header. The
> key problem is that we can not trust gd anymore as they have been
> corrupted. So we must relay on the ocfs2_chain_list struct to restore
> all gds.

I still think it's not proper to silently assume that there is only one 
gd in each chain.
So, could we give warning for user's attention by check how many
gds the allocator has?  If number of gds > the number of chain recs, 
give some warnings?

Eric
>
> Signed-off-by: Jun Piao <piaojun at huawei.com>
> ---
>   fsck.ocfs2/pass0.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 180 insertions(+)
>
> diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c
> index bfd11fb..6fe1f2f 100644
> --- a/fsck.ocfs2/pass0.c
> +++ b/fsck.ocfs2/pass0.c
> @@ -1308,6 +1308,182 @@ static errcode_t verify_bitmap_descs(o2fsck_state *ost,
>   	return ret;
>   }
>
> +struct walk_path {
> +	const char *argv0;
> +	char *path;
> +	ocfs2_filesys *fs;
> +	struct ocfs2_group_desc *bgs;
> +	int corrupted_bgs;
> +};
> +
> +static int set_bitmap_func(struct ocfs2_dir_entry *dentry,
> +			  uint64_t blocknr,
> +			  int offset,
> +			  int blocksize,
> +			  char *buf,
> +			  void *priv_data)
> +{
> +	struct walk_path *wp = priv_data;
> +	struct ocfs2_group_desc *bg;
> +	__le64 inode = dentry->inode;
> +	__le64 bg_blkno;
> +	errcode_t ret;
> +	int len;
> +	int reti = 0;
> +	int i = 0;
> +	char *old_path, *path = NULL;
> +
> +	if (!strncmp(dentry->name, ".", dentry->name_len) ||
> +	    !strncmp(dentry->name, "..", dentry->name_len))
> +		return 0;
> +
> +	ret = ocfs2_malloc0(PATH_MAX, &path);
> +	if (ret) {
> +		com_err(wp->argv0, ret,
> +			"while allocating path memory in %s\n", wp->path);
> +		return OCFS2_DIRENT_ABORT;
> +	}
> +
> +	len = strlen(wp->path);
> +	memcpy(path, wp->path, len);
> +	memcpy(path + len, dentry->name, dentry->name_len);
> +	if (dentry->file_type == OCFS2_FT_DIR)
> +		path[len + dentry->name_len] = '/';
> +
> +	/* set group desc bitmap */
> +	for (i = 0; i < wp->corrupted_bgs; i++) {
> +		bg = &wp->bgs[i];
> +		bg_blkno = bg->bg_blkno;
> +		if (inode > bg_blkno && inode <= bg_blkno + bg->bg_bits) {
> +			ocfs2_set_bit(inode - bg_blkno, bg->bg_bitmap);
> +			bg->bg_free_bits_count--;
> +		}
> +	}
> +
> +	if (dentry->file_type == OCFS2_FT_DIR) {
> +		old_path = wp->path;
> +		wp->path = path;
> +		ret = ocfs2_dir_iterate(wp->fs, inode, 0, NULL,
> +					set_bitmap_func, wp);
> +		if (ret) {
> +			com_err(wp->argv0, ret, "while walking %s", wp->path);
> +			reti = OCFS2_DIRENT_ABORT;
> +		}
> +		wp->path = old_path;
> +	}
> +
> +	ocfs2_free(&path);
> +
> +	return reti;
> +}
> +
> +static errcode_t verify_group_desc(o2fsck_state *ost,
> +				     struct ocfs2_dinode *di, int type)
> +{
> +	uint16_t bits;
> +	uint64_t blkno;
> +	errcode_t ret = 0;
> +	int corrupted_bgs = 0, i;
> +	struct ocfs2_chain_list *cl = &di->id2.i_chain;
> +	struct ocfs2_chain_rec *rec;
> +	struct ocfs2_group_desc *bgs = NULL;
> +
> +	ret = ocfs2_malloc_blocks(ost->ost_fs->fs_io,
> +			cl->cl_next_free_rec, &bgs);
> +	if (ret) {
> +		com_err(whoami, ret, "while allocating block group descriptors");
> +		goto out;
> +	}
> +	memset(bgs, 0, ost->ost_fs->fs_blocksize * cl->cl_next_free_rec);
> +
> +	/*
> +	 * Currently we could only fix the situation that there is one gd
> +	 * in each chain, because we can hardly rebuild the gd far from
> +	 * chain header. The key problem is that we can not trust gd
> +	 * anymore as they have been corrupted. So we must relay on
> +	 * the ocfs2_chain_list struct to restore all gds.
> +	 */
> +	for (i = 0; i < cl->cl_next_free_rec; i++) {
> +		rec = &cl->cl_recs[i];
> +		blkno = rec->c_blkno;
> +		bits = rec->c_total;
> +
> +		ret = ocfs2_read_group_desc(ost->ost_fs, blkno,
> +				(char *)&bgs[corrupted_bgs]);
> +		if ((ret == OCFS2_ET_BAD_GROUP_DESC_MAGIC) ||
> +		    (!ret && bgs[corrupted_bgs].bg_generation != ost->ost_fs_generation)) {
> +			if (!prompt(ost, PY, PR_GROUP_EXPECTED_DESC,
> +			    "Block %"PRIu64" should be a group "
> +			    "descriptor for the bitmap chain allocator "
> +			    "but it was corrupted.  Reinitialize it as "
> +			    "a group desc and link it into the bitmap "
> +			    "allocator?", blkno))
> +				continue;
> +			ocfs2_init_group_desc(ost->ost_fs,
> +					&bgs[corrupted_bgs],
> +					blkno, ost->ost_fs_generation,
> +					di->i_blkno, bits, i, 1);
> +			corrupted_bgs++;
> +		} else if (ret) {
> +			com_err(whoami, ret, "while reading a block bitmap "
> +				"group descriptor from block %"PRIu64,
> +				blkno);
> +		}
> +	}
> +
> +	/* traverse all inodes, and set group desc bitmap */
> +	if (corrupted_bgs) {
> +		/* Walk root dir */
> +		struct walk_path wp;
> +		uint64_t root_blkno;
> +		char *path = NULL;
> +
> +		switch (type) {
> +		case GLOBAL_INODE_ALLOC_SYSTEM_INODE:
> +			path = "//";
> +			root_blkno = ost->ost_fs->fs_sysdir_blkno;
> +			break;
> +		case INODE_ALLOC_SYSTEM_INODE:
> +			path = "/";
> +			root_blkno = ost->ost_fs->fs_root_blkno;
> +			break;
> +		default:
> +			ret = OCFS2_ET_INTERNAL_FAILURE;
> +			com_err(whoami, ret, "while verifying group desc");
> +			goto out;
> +		}
> +
> +		wp.argv0 = whoami;
> +		wp.path = path;
> +		wp.fs = ost->ost_fs;
> +		wp.bgs = bgs;
> +		wp.corrupted_bgs = corrupted_bgs;
> +		ret = ocfs2_dir_iterate(ost->ost_fs,
> +				root_blkno, 0, NULL,
> +				set_bitmap_func, &wp);
> +		if (ret) {
> +			com_err(whoami, ret, "while walking root dir");
> +			goto out;
> +		}
> +	}
> +
> +	/* write back fixed bgs */
> +	for (i = 0; i < corrupted_bgs; i++) {
> +		ret = ocfs2_write_group_desc(ost->ost_fs,
> +				bgs[i].bg_blkno,
> +				(char *)&bgs[i]);
> +		if (ret) {
> +			com_err(whoami, ret, "while writing a block group "
> +				"descriptor at block %"PRIu64, blkno);
> +			ost->ost_saw_error = 1;
> +		}
> +	}
> +
> +out:
> +	ocfs2_free(&bgs);
> +	return ret;
> +}
> +
>   /* this returns an error if it didn't leave the allocators in a state that
>    * the iterators will be able to work with.  There is probably some room
>    * for more resiliance here. */
> @@ -1483,6 +1659,10 @@ errcode_t o2fsck_pass0(o2fsck_state *ost)
>   			verbosef("Caching inode alloc failed, err %d\n",
>   				 (int)ret);
>
> +		ret = verify_group_desc(ost, di, type);
> +		if (ret)
> +			goto out;
> +
>   		ret = verify_chain_alloc(ost, di,
>   					 blocks + ost->ost_fs->fs_blocksize,
>   					 blocks +




More information about the Ocfs2-tools-devel mailing list