[Ocfs2-devel] [PATCH v1 1/5] ocfs2: partly revert da5e7c87827e8 for mounting crash issue
Joseph Qi
joseph.qi at linux.alibaba.com
Sat Apr 9 13:11:25 UTC 2022
Suggest rename the subject, something like
ocfs2: fix mount crash if journal is not alloced
On 4/8/22 6:30 PM, Heming Zhao wrote:
> After commit da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown"),
> journal init later than before, it makes NULL pointer access in free
> routine.
>
> Crash flow:
>
> ocfs2_fill_super
> + ocfs2_mount_volume
> | + ocfs2_dlm_init //fail & return, osb->journal is NULL.
> | + ...
> | + ocfs2_check_volume //no chance to init osb->journal
> |
> + ...
> + ocfs2_dismount_volume
> ocfs2_release_system_inodes
> ...
> evict
> ...
> ocfs2_clear_inode
> ocfs2_checkpoint_inode
> ocfs2_ci_fully_checkpointed
> time_after(journal->j_trans_id, ci->ci_last_trans)
> + journal is empty, crash!
>
> For fixing, there are three solutions:
>
> 1> Revert commit da5e7c87827e8
>
> For avoiding kernel crash, this make sense for us. We only concerned
> whether there has any non-system inode access before dlm init. The
> answer is NO. And all journal replay/recovery handling happen after
> dlm & journal init done. So this method is not graceful but workable.
>
> 2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode)
>
> The fix code is special for mounting phase, but it will continue
> working after mounting stage. In another word, this method adds useless
> code in normal inode free flow.
>
> 3> Do directly free inode in mounting phase
>
> This method is brutal/complex and may introduce unsafe code, currently
> maintainer didn't like.
>
> At last, we chose method <1> and partly reverted commit da5e7c87827e8.
> We reverted journal init code, and kept cleanup code flow.
>
> Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown")
> Signed-off-by: Heming Zhao <heming.zhao at suse.com>
> ---
> fs/ocfs2/inode.c | 4 ++--
> fs/ocfs2/journal.c | 21 +--------------------
> fs/ocfs2/super.c | 33 +++++++++++++++++++++++++++++++++
> 3 files changed, 36 insertions(+), 22 deletions(-)
>
> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
> index 5739dc301569..bb116c39b581 100644
> --- a/fs/ocfs2/inode.c
> +++ b/fs/ocfs2/inode.c
> @@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
> struct inode *inode = NULL;
> struct super_block *sb = osb->sb;
> struct ocfs2_find_inode_args args;
> + journal_t *journal = osb->journal->j_journal;
>
> trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
> sysfile_type);
> @@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
> * part of the transaction - the inode could have been reclaimed and
> * now it is reread from disk.
> */
> - if (osb->journal) {
> + if (journal) {
> transaction_t *transaction;
> tid_t tid;
> struct ocfs2_inode_info *oi = OCFS2_I(inode);
> - journal_t *journal = osb->journal->j_journal;
>
> read_lock(&journal->j_state_lock);
> if (journal->j_running_transaction)
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index 1887a2708709..afb85de3bb60 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -815,30 +815,11 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
> int status = -1;
> struct inode *inode = NULL; /* the journal inode */
> journal_t *j_journal = NULL;
> - struct ocfs2_journal *journal = NULL;
> + struct ocfs2_journal *journal = osb->journal;
> struct ocfs2_dinode *di = NULL;
> struct buffer_head *bh = NULL;
> int inode_lock = 0;
>
> - /* initialize our journal structure */
> - journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
> - if (!journal) {
> - mlog(ML_ERROR, "unable to alloc journal\n");
> - status = -ENOMEM;
> - goto done;
> - }
> - osb->journal = journal;
> - journal->j_osb = osb;
> -
> - atomic_set(&journal->j_num_trans, 0);
> - init_rwsem(&journal->j_trans_barrier);
> - init_waitqueue_head(&journal->j_checkpointed);
> - spin_lock_init(&journal->j_lock);
> - journal->j_trans_id = 1UL;
> - INIT_LIST_HEAD(&journal->j_la_cleanups);
> - INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
> - journal->j_state = OCFS2_JOURNAL_FREE;
> -
> /* already have the inode for our journal */
> inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
> osb->slot_num);
> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
> index 477cdf94122e..5f63a2333e52 100644
> --- a/fs/ocfs2/super.c
> +++ b/fs/ocfs2/super.c
> @@ -2015,6 +2015,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
> int i, cbits, bbits;
> struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
> struct inode *inode = NULL;
> + struct ocfs2_journal *journal;
> struct ocfs2_super *osb;
> u64 total_blocks;
>
> @@ -2195,6 +2196,32 @@ static int ocfs2_initialize_super(struct super_block *sb,
>
> get_random_bytes(&osb->s_next_generation, sizeof(u32));
>
> + /* FIXME
> + * This should be done in ocfs2_journal_init(), but unknown
> + * ordering issues will cause the filesystem to crash.
> + * If anyone wants to figure out what part of the code
> + * refers to osb->journal before ocfs2_journal_init() is run,
> + * be my guest.
> + */
> + /* initialize our journal structure */
> + journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
> + if (!journal) {
> + mlog(ML_ERROR, "unable to alloc journal\n");
> + status = -ENOMEM;
> + goto bail;
> + }
> + osb->journal = journal;
> + journal->j_osb = osb;
> +
> + atomic_set(&journal->j_num_trans, 0);
> + init_rwsem(&journal->j_trans_barrier);
> + init_waitqueue_head(&journal->j_checkpointed);
> + spin_lock_init(&journal->j_lock);
> + journal->j_trans_id = 1UL;
> + INIT_LIST_HEAD(&journal->j_la_cleanups);
> + INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
> + journal->j_state = OCFS2_JOURNAL_FREE;
> +
We may fold these into a new function, such as ocfs2_journal_alloc().
Thanks,
Joseph
> INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
> init_llist_head(&osb->dquot_drop_list);
>
> @@ -2483,6 +2510,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
>
> kfree(osb->osb_orphan_wipes);
> kfree(osb->slot_recovery_generations);
> + /* FIXME
> + * This belongs in journal shutdown, but because we have to
> + * allocate osb->journal at the middle of ocfs2_initialize_super(),
> + * we free it here.
> + */
> + kfree(osb->journal);
> kfree(osb->local_alloc_copy);
> kfree(osb->uuid_str);
> kfree(osb->vol_label);
More information about the Ocfs2-devel
mailing list