[Ocfs2-devel] [PATCH] ocfs2: fix error handling during mounting stage

heming.zhao at suse.com heming.zhao at suse.com
Wed Apr 6 15:14:05 UTC 2022


On 4/6/22 17:27, Joseph Qi wrote:
> 
> On 4/5/22 12:40 AM, Heming Zhao wrote:
>> This is draft version, It only passed compiling & mount/umount test.
>> I want to know if my idea is acceptable for maintainers.
>>
>> There left one issue: mounting crash when no dlm connection. I still
>> can't find out a better solution without directly free inode memory.
>> So I marked the related function (ocfs2_release_system_inodes) with
>> comment: "/* FIXME crash when no journal */"
>>
>> Signed-off-by: Heming Zhao <heming.zhao at suse.com>
>> ---
>>   fs/ocfs2/reservations.c |   4 +-
>>   fs/ocfs2/reservations.h |   2 +-
>>   fs/ocfs2/super.c        | 156 +++++++++++++++++++++++++---------------
>>   3 files changed, 99 insertions(+), 63 deletions(-)
>>
>> diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
>> index 769e466887b0..a9d1296d736d 100644
>> --- a/fs/ocfs2/reservations.c
>> +++ b/fs/ocfs2/reservations.c
>> @@ -198,7 +198,7 @@ void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
>>   	resv->r_flags |= flags;
>>   }
>>   
>> -int ocfs2_resmap_init(struct ocfs2_super *osb,
>> +void ocfs2_resmap_init(struct ocfs2_super *osb,
>>   		      struct ocfs2_reservation_map *resmap)
>>   {
>>   	memset(resmap, 0, sizeof(*resmap));
>> @@ -207,8 +207,6 @@ int ocfs2_resmap_init(struct ocfs2_super *osb,
>>   	resmap->m_reservations = RB_ROOT;
>>   	/* m_bitmap_len is initialized to zero by the above memset. */
>>   	INIT_LIST_HEAD(&resmap->m_lru);
>> -
>> -	return 0;
>>   }
>>   
>>   static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
>> diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
>> index 677c50663595..6e5559d6940d 100644
>> --- a/fs/ocfs2/reservations.h
>> +++ b/fs/ocfs2/reservations.h
>> @@ -81,7 +81,7 @@ void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
>>    * Only possible return value other than '0' is -ENOMEM for failure to
>>    * allocation mirror bitmap.
>>    */
>> -int ocfs2_resmap_init(struct ocfs2_super *osb,
>> +void ocfs2_resmap_init(struct ocfs2_super *osb,
>>   		      struct ocfs2_reservation_map *resmap);
>>   
>>   /**
>> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
>> index 477cdf94122e..04d7bc7e1110 100644
>> --- a/fs/ocfs2/super.c
>> +++ b/fs/ocfs2/super.c
>> @@ -112,6 +112,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   				  struct buffer_head *bh,
>>   				  int sector_size,
>>   				  struct ocfs2_blockcheck_stats *stats);
>> +static void ocfs2_uninitialize_super(struct ocfs2_super *osb);
>>   static int ocfs2_get_sector(struct super_block *sb,
>>   			    struct buffer_head **bh,
>>   			    int block,
>> @@ -458,6 +459,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
>>   			continue;
>>   		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
>>   		if (!new) {
>> +			/* FIXME crash when no journal */
> 
> I don't have better idea at moment, so we may factor out part of journal init
> and put bofore.

I will do the revert commit da5e7c87827e8 job in next patch verion (v1).

> 
>>   			ocfs2_release_system_inodes(osb);
>>   			status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
>>   			mlog_errno(status);
>> @@ -488,6 +490,7 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
>>   			continue;
>>   		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
>>   		if (!new) {
>> +			/* FIXME crash when no journal */
>>   			ocfs2_release_system_inodes(osb);
>>   			status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
>>   			mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
>> @@ -989,28 +992,27 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   
>>   	if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
>>   		status = -EINVAL;
>> -		goto read_super_error;
>> +		goto finally;
>>   	}
>>   
>>   	/* probe for superblock */
>>   	status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
>>   	if (status < 0) {
>>   		mlog(ML_ERROR, "superblock probe failed!\n");
>> -		goto read_super_error;
>> +		goto finally;
>>   	}
>>   
>>   	status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
>> -	osb = OCFS2_SB(sb);
>> -	if (status < 0) {
>> -		mlog_errno(status);
>> -		goto read_super_error;
>> -	}
>>   	brelse(bh);
>>   	bh = NULL;
>> +	if (status < 0) {
>> +		goto finally;
>> +	}
>> +	osb = OCFS2_SB(sb);

please note I move "osb = xx" & "if (xx)" after brelse(bh);
So bh don't need to free in goto label place.

>>   
>>   	if (!ocfs2_check_set_options(sb, &parsed_options)) {
>>   		status = -EINVAL;
>> -		goto read_super_error;
>> +		goto super_out;
>>   	}
>>   	osb->s_mount_opt = parsed_options.mount_opt;
>>   	osb->s_atime_quantum = parsed_options.atime_quantum;
>> @@ -1027,7 +1029,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   
>>   	status = ocfs2_verify_userspace_stack(osb, &parsed_options);
>>   	if (status)
>> -		goto read_super_error;
>> +		goto super_out;
>>   
>>   	sb->s_magic = OCFS2_SUPER_MAGIC;
>>   
>> @@ -1041,7 +1043,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   			status = -EACCES;
>>   			mlog(ML_ERROR, "Readonly device detected but readonly "
>>   			     "mount was not specified.\n");
>> -			goto read_super_error;
>> +			goto super_out;
>>   		}
>>   
>>   		/* You should not be able to start a local heartbeat
>> @@ -1050,7 +1052,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   			status = -EROFS;
>>   			mlog(ML_ERROR, "Local heartbeat specified on readonly "
>>   			     "device.\n");
>> -			goto read_super_error;
>> +			goto super_out;
>>   		}
>>   
>>   		status = ocfs2_check_journals_nolocks(osb);
>> @@ -1061,7 +1063,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   				     "unavailable.\n");
>>   			else
>>   				mlog_errno(status);
>> -			goto read_super_error;
>> +			goto super_out;
>>   		}
>>   
>>   		ocfs2_set_ro_flag(osb, 1);
>> @@ -1079,7 +1081,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   	status = ocfs2_verify_heartbeat(osb);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto read_super_error;
>> +		goto super_out;
>>   	}
>>   
>>   	osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
>> @@ -1094,15 +1096,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   
>>   	status = ocfs2_mount_volume(sb);
>>   	if (status < 0)
>> -		goto read_super_error;
>> +		goto debugfs_out;
>>   
>>   	if (osb->root_inode)
>>   		inode = igrab(osb->root_inode);
>>   
>>   	if (!inode) {
>>   		status = -EIO;
>> -		mlog_errno(status);
>> -		goto read_super_error;
>> +		goto journal_out;
>>   	}
>>   
>>   	osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
>> @@ -1110,7 +1111,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   	if (!osb->osb_dev_kset) {
>>   		status = -ENOMEM;
>>   		mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
>> -		goto read_super_error;
>> +		goto journal_out;
>>   	}
>>   
>>   	/* Create filecheck sysfs related directories/files at
>> @@ -1119,14 +1120,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   		status = -ENOMEM;
>>   		mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
>>   			"/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
>> -		goto read_super_error;
>> +		goto journal_out;
>>   	}
>>   
>>   	root = d_make_root(inode);
>>   	if (!root) {
>>   		status = -ENOMEM;
>> -		mlog_errno(status);
>> -		goto read_super_error;
>> +		goto journal_out;
>>   	}
>>   
>>   	sb->s_root = root;
>> @@ -1178,17 +1178,22 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
>>   
>>   	return status;
>>   
>> -read_super_error:
>> -	brelse(bh);
>> +journal_out:
>> +	mlog_errno(status);
>> +	atomic_set(&osb->vol_state, VOLUME_DISABLED);
>> +	wake_up(&osb->osb_mount_event);
>> +	ocfs2_dismount_volume(sb, 1);
>>   
>> -	if (status)
>> -		mlog_errno(status);
>> +	return status;
>>   
>> -	if (osb) {
>> -		atomic_set(&osb->vol_state, VOLUME_DISABLED);
>> -		wake_up(&osb->osb_mount_event);
>> -		ocfs2_dismount_volume(sb, 1);
>> -	}
>> +debugfs_out:
>> +	debugfs_remove_recursive(osb->osb_debug_root);
>> +super_out:
>> +	/* FIXME crash when no journal */
>> +	ocfs2_release_system_inodes(osb);
>> +	ocfs2_uninitialize_super(osb);
>> +finally:
> 
> Missing brelse(bh) since it will goto here if ocfs2_sb_probe()
> fails.

No. please also check my previous comment. if ocfs2_sb_probe() fails, bh is
released before return. and in normal code flow, bh will be released after
ocfs2_initialize_super() return. So no need to call brelse(bh) in error
handling place.

> Suggest we name the label as out_xxx, and I have to take more
> time to review the above changes.

OK, will change in next patch version (v1).

> 
>> +	mlog_errno(status);
>>   
>>   	return status;
>>   }
>> @@ -1803,11 +1808,10 @@ static int ocfs2_get_sector(struct super_block *sb,
>>   static int ocfs2_mount_volume(struct super_block *sb)
>>   {
>>   	int status = 0;
>> -	int unlock_super = 0;
>>   	struct ocfs2_super *osb = OCFS2_SB(sb);
>>   
>>   	if (ocfs2_is_hard_readonly(osb))
>> -		goto leave;
>> +		goto finally;
>>   
>>   	mutex_init(&osb->obs_trim_fs_mutex);
>>   
>> @@ -1817,44 +1821,54 @@ static int ocfs2_mount_volume(struct super_block *sb)
>>   		if (status == -EBADR && ocfs2_userspace_stack(osb))
>>   			mlog(ML_ERROR, "couldn't mount because cluster name on"
>>   			" disk does not match the running cluster name.\n");
>> -		goto leave;
>> +		goto finally;
>>   	}
>>   
>>   	status = ocfs2_super_lock(osb, 1);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto leave;
>> +		goto dlm_out;
>>   	}
>> -	unlock_super = 1;
>>   
>>   	/* This will load up the node map and add ourselves to it. */
>>   	status = ocfs2_find_slot(osb);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto leave;
>> +		goto super_lock;
>>   	}
>>   
>>   	/* load all node-local system inodes */
>>   	status = ocfs2_init_local_system_inodes(osb);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto leave;
>> +		goto super_lock;
>>   	}
>>   
>>   	status = ocfs2_check_volume(osb);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto leave;
>> +		goto system_inodes;
>>   	}
>>   
>>   	status = ocfs2_truncate_log_init(osb);
>> -	if (status < 0)
>> +	if (status < 0) {
>>   		mlog_errno(status);
>> +		goto journal_shutdown;
>> +	}
>>   
>> -leave:
>> -	if (unlock_super)
>> -		ocfs2_super_unlock(osb, 1);
>> +	ocfs2_super_unlock(osb, 1);
>> +	return status;
>>   
>> +journal_shutdown:
>> +	ocfs2_journal_shutdown(osb);
>> +system_inodes:
>> +	/* FIXME crash when no journal */
>> +	ocfs2_release_system_inodes(osb);
>> +super_lock:
>> +	ocfs2_super_unlock(osb, 1);
>> +dlm_out:
>> +	ocfs2_dlm_shutdown(osb, 0);
>> +finally:
>>   	return status;
>>   }
>>   
>> @@ -2110,17 +2124,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   
>>   	init_waitqueue_head(&osb->osb_mount_event);
>>   
>> -	status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
>> -	if (status) {
>> -		mlog_errno(status);
>> -		goto bail;
>> -	}
>> +	ocfs2_resmap_init(osb, &osb->osb_la_resmap);
>>   
>>   	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
>>   	if (!osb->vol_label) {
>>   		mlog(ML_ERROR, "unable to alloc vol label\n");
>>   		status = -ENOMEM;
>> -		goto bail;
>> +		goto recovery_map;
>>   	}
>>   
>>   	osb->slot_recovery_generations =
>> @@ -2129,7 +2139,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	if (!osb->slot_recovery_generations) {
>>   		status = -ENOMEM;
>>   		mlog_errno(status);
>> -		goto bail;
>> +		goto vol_label;
>>   	}
>>   
>>   	init_waitqueue_head(&osb->osb_wipe_event);
>> @@ -2139,7 +2149,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	if (!osb->osb_orphan_wipes) {
>>   		status = -ENOMEM;
>>   		mlog_errno(status);
>> -		goto bail;
>> +		goto slot_recovery_gen;
>>   	}
>>   
>>   	osb->osb_rf_lock_tree = RB_ROOT;
>> @@ -2155,13 +2165,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   		mlog(ML_ERROR, "couldn't mount because of unsupported "
>>   		     "optional features (%x).\n", i);
>>   		status = -EINVAL;
>> -		goto bail;
>> +		goto orphan_wipes;
>>   	}
>>   	if (!sb_rdonly(osb->sb) && (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
>>   		mlog(ML_ERROR, "couldn't mount RDWR because of "
>>   		     "unsupported optional features (%x).\n", i);
>>   		status = -EINVAL;
>> -		goto bail;
>> +		goto orphan_wipes;
>>   	}
>>   
>>   	if (ocfs2_clusterinfo_valid(osb)) {
>> @@ -2182,7 +2192,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   			     "cluster stack label (%s) \n",
>>   			     osb->osb_cluster_stack);
>>   			status = -EINVAL;
>> -			goto bail;
>> +			goto orphan_wipes;
>>   		}
>>   		memcpy(osb->osb_cluster_name,
>>   			OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
>> @@ -2208,7 +2218,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   		mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n",
>>   		     osb->s_clustersize);
>>   		status = -EINVAL;
>> -		goto bail;
>> +		goto orphan_wipes;
>>   	}
>>   
>>   	total_blocks = ocfs2_clusters_to_blocks(osb->sb,
>> @@ -2220,14 +2230,14 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   		mlog(ML_ERROR, "Volume too large "
>>   		     "to mount safely on this system");
>>   		status = -EFBIG;
>> -		goto bail;
>> +		goto orphan_wipes;
>>   	}
>>   
>>   	if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid,
>>   				 sizeof(di->id2.i_super.s_uuid))) {
>>   		mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n");
>>   		status = -ENOMEM;
>> -		goto bail;
>> +		goto orphan_wipes;
>>   	}
>>   
>>   	strlcpy(osb->vol_label, di->id2.i_super.s_label,
>> @@ -2247,7 +2257,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	if (!osb->osb_dlm_debug) {
>>   		status = -ENOMEM;
>>   		mlog_errno(status);
>> -		goto bail;
>> +		goto uuid_str;
>>   	}
>>   
>>   	atomic_set(&osb->vol_state, VOLUME_INIT);
>> @@ -2256,7 +2266,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	status = ocfs2_init_global_system_inodes(osb);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto bail;
>> +		goto dlm_out;
>>   	}
>>   
>>   	/*
>> @@ -2267,7 +2277,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	if (!inode) {
>>   		status = -EINVAL;
>>   		mlog_errno(status);
>> -		goto bail;
>> +		goto system_inodes;
>>   	}
>>   
>>   	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
>> @@ -2280,19 +2290,47 @@ static int ocfs2_initialize_super(struct super_block *sb,
>>   	status = ocfs2_init_slot_info(osb);
>>   	if (status < 0) {
>>   		mlog_errno(status);
>> -		goto bail;
>> +		goto system_inodes;
>>   	}
>>   
>>   	osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
>>   	if (!osb->ocfs2_wq) {
>>   		status = -ENOMEM;
>>   		mlog_errno(status);
>> +		goto slot_info;
>>   	}
>>   
>> +	return status;
>> +
>> +slot_info:
>> +	ocfs2_free_slot_info(osb);
>> +system_inodes:
>> +	/* FIXME crash when no journal */
>> +	ocfs2_release_system_inodes(osb);
>> +dlm_out:
>> +	ocfs2_put_dlm_debug(osb->osb_dlm_debug);
>> +uuid_str:
>> +	kfree(osb->uuid_str);
>> +orphan_wipes:
>> +	kfree(osb->osb_orphan_wipes);
>> +slot_recovery_gen:
>> +	kfree(osb->slot_recovery_generations);
>> +vol_label:
>> +	kfree(osb->vol_label);
>> +recovery_map:
>> +	kfree(osb->recovery_map);
>>   bail:
>> +	kfree(osb);
>>   	return status;
>>   }
> Could we split ocfs2_initialize_super() changes as a single patch?
> Also suggest we name the label as out_xxx.
> 

NO problem.

>>   
>> +static void ocfs2_uninitialize_super(struct ocfs2_super *osb)
>> +{
>> +	kfree(osb->recovery_map);
>> +	ocfs2_delete_osb(osb);
>> +	kfree(osb);
>> +}
> 
> Since ocfs2_initialize_super() does more than above, I don't think
> the name is quite suitable here.
> If we the above operations are enough, I suggest just open code in
> error handling.
> 

No problem, will change to directly call these three line codes.
I checked many times and thought they're enough.

Thank you for your kindly review. please wait for patch v1.

- Heming




More information about the Ocfs2-devel mailing list