[Ocfs2-devel] [PATCH v3] ocfs2/dlm: don't handle migrate lockres if already in shutdown

Changwei Ge ge.changwei at h3c.com
Mon Mar 5 00:36:53 PST 2018


Looks good to me.
Reviewed-by: Changwei Ge <ge.changwei at h3c.com>

On 2018/3/5 11:45, piaojun wrote:
> We should not handle migrate lockres if we are already in
> 'DLM_CTXT_IN_SHUTDOWN', as that will cause lockres remains after leaving
> dlm domain. At last other nodes will get stuck into infinite loop when
> requsting lock from us.
> 
> The problem is caused by concurrency umount between nodes. Before
> receiveing N1's DLM_BEGIN_EXIT_DOMAIN_MSG, N2 has picked up N1 as the
> migrate target. So N2 will continue sending lockres to N1 even though N1
> has left domain.
> 
>      N1                             N2 (owner)
>                                     touch file
> 
> access the file,
> and get pr lock
> 
>                                     begin leave domain and
>                                     pick up N1 as new owner
> 
> begin leave domain and
> migrate all lockres done
> 
>                                     begin migrate lockres to N1
> 
> end leave domain, but
> the lockres left
> unexpectedly, because
> migrate task has passed
> 
> Signed-off-by: Jun Piao <piaojun at huawei.com>
> Reviewed-by: Yiwen Jiang <jiangyiwen at huawei.com>
> Reviewed-by: Joseph Qi <jiangqi903 at gmail.com>
> ---
>   fs/ocfs2/dlm/dlmdomain.c   | 14 --------------
>   fs/ocfs2/dlm/dlmdomain.h   | 25 ++++++++++++++++++++++++-
>   fs/ocfs2/dlm/dlmrecovery.c |  9 +++++++++
>   3 files changed, 33 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
> index e1fea14..25b76f0 100644
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -675,20 +675,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
>   	spin_unlock(&dlm->spinlock);
>   }
> 
> -int dlm_shutting_down(struct dlm_ctxt *dlm)
> -{
> -	int ret = 0;
> -
> -	spin_lock(&dlm_domain_lock);
> -
> -	if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
> -		ret = 1;
> -
> -	spin_unlock(&dlm_domain_lock);
> -
> -	return ret;
> -}
> -
>   void dlm_unregister_domain(struct dlm_ctxt *dlm)
>   {
>   	int leave = 0;
> diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h
> index fd6122a..8a92814 100644
> --- a/fs/ocfs2/dlm/dlmdomain.h
> +++ b/fs/ocfs2/dlm/dlmdomain.h
> @@ -28,7 +28,30 @@
>   extern spinlock_t dlm_domain_lock;
>   extern struct list_head dlm_domains;
> 
> -int dlm_shutting_down(struct dlm_ctxt *dlm);
> +static inline int dlm_joined(struct dlm_ctxt *dlm)
> +{
> +	int ret = 0;
> +
> +	spin_lock(&dlm_domain_lock);
> +	if (dlm->dlm_state == DLM_CTXT_JOINED)
> +		ret = 1;
> +	spin_unlock(&dlm_domain_lock);
> +
> +	return ret;
> +}
> +
> +static inline int dlm_shutting_down(struct dlm_ctxt *dlm)
> +{
> +	int ret = 0;
> +
> +	spin_lock(&dlm_domain_lock);
> +	if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
> +		ret = 1;
> +	spin_unlock(&dlm_domain_lock);
> +
> +	return ret;
> +}
> +
>   void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
>   					int node_num);
> 
> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
> index ec8f758..505ab42 100644
> --- a/fs/ocfs2/dlm/dlmrecovery.c
> +++ b/fs/ocfs2/dlm/dlmrecovery.c
> @@ -1378,6 +1378,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
>   	if (!dlm_grab(dlm))
>   		return -EINVAL;
> 
> +	if (!dlm_joined(dlm)) {
> +		mlog(ML_ERROR, "Domain %s not joined! "
> +			  "lockres %.*s, master %u\n",
> +			  dlm->name, mres->lockname_len,
> +			  mres->lockname, mres->master);
> +		dlm_put(dlm);
> +		return -EINVAL;
> +	}
> +
>   	BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
> 
>   	real_master = mres->master;
> 



More information about the Ocfs2-devel mailing list