[Ocfs2-devel] [PATCH v2 3/3] ocfs2/dlm: continue to purge recovery lockres when recovery master goes down
Gechangwei
ge.changwei at h3c.com
Thu Jan 5 18:22:57 PST 2017
On 2017/1/6 10:08, piaojun wrote:
>
> On 2017/1/5 15:44, Gechangwei wrote:
>> On 2017/1/5 15:28, gechangwei 12382 (Cloud) wrote:
>>
>> Hi Jun,
>> I suppose that a defect hid your patch.
>>
>>
>>> We found a dlm-blocked situation caused by continuous breakdown of recovery masters described below. To solve this problem, we should purge recovery lock once detecting recovery master goes down.
>>>
>>> N3 N2 N1(reco master)
>>> go down
>>> pick up recovery lock and
>>> begin recoverying for N2
>>>
>>> go down
>>>
>>> pick up recovery
>>> lock failed, then
>>> purge it:
>>> dlm_purge_lockres
>>> ->DROPPING_REF is set
>>>
>>> send deref to N1 failed,
>>> recovery lock is not purged
>>>
>>> find N1 go down, begin
>>> recoverying for N1, but
>>> blocked in dlm_do_recovery
>>> as DROPPING_REF is set:
>>> dlm_do_recovery
>>> ->dlm_pick_recovery_master
>>> ->dlmlock
>>> ->dlm_get_lock_resource
>>> ->__dlm_wait_on_lockres_flags(tmpres,
>>> DLM_LOCK_RES_DROPPING_REF);
>>>
>>> Fixes: 8c0343968163 ("ocfs2/dlm: clear DROPPING_REF flag when the master goes down")
>>>
>>> Signed-off-by: Jun Piao <piaojun at huawei.com>
>>> Reviewed-by: Joseph Qi <joseph.qi at huawei.com>
>>> Reviewed-by: Jiufei Xue <xuejiufei at huawei.com>
>>> ---
>>> fs/ocfs2/dlm/dlmcommon.h | 2 ++
>>> fs/ocfs2/dlm/dlmmaster.c | 38 +++------------------------------
>>> fs/ocfs2/dlm/dlmrecovery.c | 29 +++++++++++++++++++-------
>>> fs/ocfs2/dlm/dlmthread.c | 52 ++++++++++++++++++++++++++++++++++++++++++----
>>> 4 files changed, 74 insertions(+), 47 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 004f2cb..3e3e9ba8 100644
>>> --- a/fs/ocfs2/dlm/dlmcommon.h
>>> +++ b/fs/ocfs2/dlm/dlmcommon.h
>>> @@ -1004,6 +1004,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
>>> u8 nodenum, u8 *real_master);
>>>
>>> +void __dlm_do_purge_lockres(struct dlm_ctxt *dlm,
>>> + struct dlm_lock_resource *res);
>>>
>>> int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
>>> struct dlm_lock_resource *res, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 311404f..1d87e0f 100644
>>> --- a/fs/ocfs2/dlm/dlmmaster.c
>>> +++ b/fs/ocfs2/dlm/dlmmaster.c
>>> @@ -2425,52 +2425,20 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
>>> mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
>>> "but it is already derefed!\n", dlm->name,
>>> res->lockname.len, res->lockname.name, node);
>>> - dlm_lockres_put(res);
>>> ret = 0;
>>> goto done;
>>> }
>>> -
>>> - if (!list_empty(&res->purge)) {
>>> - mlog(0, "%s: Removing res %.*s from purgelist\n",
>>> - dlm->name, res->lockname.len, res->lockname.name);
>>> - list_del_init(&res->purge);
>>> - dlm_lockres_put(res);
>>> - dlm->purge_count--;
>>> - }
>>> -
>>> - if (!__dlm_lockres_unused(res)) {
>>> - mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
>>> - dlm->name, res->lockname.len, res->lockname.name);
>>> - __dlm_print_one_lock_resource(res);
>>> - BUG();
>>> - }
>>> -
>>> - __dlm_unhash_lockres(dlm, res);
>>> -
>>> - spin_lock(&dlm->track_lock);
>>> - if (!list_empty(&res->tracking))
>>> - list_del_init(&res->tracking);
>>> - else {
>>> - mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n",
>>> - dlm->name, res->lockname.len, res->lockname.name);
>>> - __dlm_print_one_lock_resource(res);
>>> - }
>>> - spin_unlock(&dlm->track_lock);
>>> -
>>> - /* lockres is not in the hash now. drop the flag and wake up
>>> - * any processes waiting in dlm_get_lock_resource.
>>> - */
>>> - res->state &= ~DLM_LOCK_RES_DROPPING_REF;
>>> + __dlm_do_purge_lockres(dlm, res);
>>> spin_unlock(&res->spinlock);
>>> wake_up(&res->wq);
>>>
>>> - dlm_lockres_put(res);
>>> -
>>> spin_unlock(&dlm->spinlock);
>>>
>>> ret = 0;
>>>
>>> done:
>>> + if (res)
>>> + dlm_lockres_put(res);
>>> dlm_put(dlm);
>>> return ret;
>>> }
>>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f6b3138..dd5cb8b 100644
>>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>>> @@ -2343,6 +2343,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>>> struct dlm_lock_resource *res;
>>> int i;
>>> struct hlist_head *bucket;
>>> + struct hlist_node *tmp;
>>> struct dlm_lock *lock;
>>>
>>>
>>> @@ -2365,7 +2366,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>>> */
>>> for (i = 0; i < DLM_HASH_BUCKETS; i++) {
>>> bucket = dlm_lockres_hash(dlm, i);
>>> - hlist_for_each_entry(res, bucket, hash_node) {
>>> + hlist_for_each_entry_safe(res, tmp, bucket, hash_node) {
>>> /* always prune any $RECOVERY entries for dead nodes,
>>> * otherwise hangs can occur during later recovery */
>>> if (dlm_is_recovery_lock(res->lockname.name,
>>> @@ -2386,8 +2387,17 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>>> break;
>>> }
>>> }
>>> - dlm_lockres_clear_refmap_bit(dlm, res,
>>> - dead_node);
>>> +
>>> + if ((res->owner == dead_node) &&
>>> + (res->state & DLM_LOCK_RES_DROPPING_REF)) {
>>> + dlm_lockres_get(res);
>>> + __dlm_do_purge_lockres(dlm, res);
>>> + spin_unlock(&res->spinlock);
>>> + wake_up(&res->wq);
>>> + dlm_lockres_put(res);
>>> + continue;
>>> + } else if (res->owner == dlm->node_num)
>>> + dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
>>> spin_unlock(&res->spinlock);
>>> continue;
>>> }
>>> @@ -2398,14 +2408,17 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>>> if (res->state & DLM_LOCK_RES_DROPPING_REF) {
>>> mlog(0, "%s:%.*s: owned by "
>>> "dead node %u, this node was "
>>> - "dropping its ref when it died. "
>>> - "continue, dropping the flag.\n",
>>> + "dropping its ref when master died. "
>>> + "continue, purging the lockres.\n",
>>> dlm->name, res->lockname.len,
>>> res->lockname.name, dead_node);
>>> + dlm_lockres_get(res);
>>> + __dlm_do_purge_lockres(dlm, res);
>>> + spin_unlock(&res->spinlock);
>>> + wake_up(&res->wq);
>>> + dlm_lockres_put(res);
>>> + continue;
>>> }
>>> - res->state &= ~DLM_LOCK_RES_DROPPING_REF;
>>> - dlm_move_lockres_to_recovery_list(dlm,
>>> - res);
>> Here, you just remove above line, thus, no chance for DLM to select lock
>> resources to be recovered, is right?
>>
>>
>>
> Here we purge lockres directly instead of moving to recovery list
> when DLM_LOCK_RES_DROPPING_REF is set in case of deadlock.
>
> Thanks,
> Jun
I agree with your approach to handle this scenario. But not all dlm lock
resources are set to DLM_LOCK_RES_DROPPING_REF.
So we must set those which have no DLM_LOCK_RES_DROPPING_REF set to
DLM_LOCK_RES_RECOVERING through dlm_move_lockres_to_recovery_list().
Or it will face broken exclusion problems.
BR.
Changwei
>
>>> } else if (res->owner == dlm->node_num) {
>>> dlm_free_dead_locks(dlm, res, dead_node);
>>> __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index ce39722..838a06d 100644
>>> --- a/fs/ocfs2/dlm/dlmthread.c
>>> +++ b/fs/ocfs2/dlm/dlmthread.c
>>> @@ -160,6 +160,52 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>>> spin_unlock(&dlm->spinlock);
>>> }
>>>
>>> +/*
>>> + * Do the real purge work:
>>> + * unhash the lockres, and
>>> + * clear flag DLM_LOCK_RES_DROPPING_REF.
>>> + * It requires dlm and lockres spinlock to be taken.
>>> + */
>>> +void __dlm_do_purge_lockres(struct dlm_ctxt *dlm,
>>> + struct dlm_lock_resource *res)
>>> +{
>>> + assert_spin_locked(&dlm->spinlock);
>>> + assert_spin_locked(&res->spinlock);
>>> +
>>> + if (!list_empty(&res->purge)) {
>>> + mlog(0, "%s: Removing res %.*s from purgelist\n",
>>> + dlm->name, res->lockname.len, res->lockname.name);
>>> + list_del_init(&res->purge);
>>> + dlm_lockres_put(res);
>>> + dlm->purge_count--;
>>> + }
>>> +
>>> + if (!__dlm_lockres_unused(res)) {
>>> + mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
>>> + dlm->name, res->lockname.len, res->lockname.name);
>>> + __dlm_print_one_lock_resource(res);
>>> + BUG();
>>> + }
>>> +
>>> + __dlm_unhash_lockres(dlm, res);
>>> +
>>> + spin_lock(&dlm->track_lock);
>>> + if (!list_empty(&res->tracking))
>>> + list_del_init(&res->tracking);
>>> + else {
>>> + mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n",
>>> + dlm->name, res->lockname.len, res->lockname.name);
>>> + __dlm_print_one_lock_resource(res);
>>> + }
>>> + spin_unlock(&dlm->track_lock);
>>> +
>>> + /*
>>> + * lockres is not in the hash now. drop the flag and wake up
>>> + * any processes waiting in dlm_get_lock_resource.
>>> + */
>>> + res->state &= ~DLM_LOCK_RES_DROPPING_REF; }
>>> +
>>> static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>>> struct dlm_lock_resource *res)
>>> {
>>> @@ -176,10 +222,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>>>
>>> if (!master) {
>>> if (res->state & DLM_LOCK_RES_DROPPING_REF) {
>>> - mlog(ML_NOTICE, "%s: res %.*s already in "
>>> - "DLM_LOCK_RES_DROPPING_REF state\n",
>>> - dlm->name, res->lockname.len,
>>> - res->lockname.name);
>>> + mlog(ML_NOTICE, "%s: res %.*s already in DLM_LOCK_RES_DROPPING_REF state\n",
>>> + dlm->name, res->lockname.len, res->lockname.name);
>>> spin_unlock(&res->spinlock);
>>> return;
>>> }
>>> --
>>> 1.8.4.3
>>>
>>>
>>> _______________________________________________
>>> Ocfs2-devel mailing list
>>> Ocfs2-devel at oss.oracle.com
>>> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>>
>> -------------------------------------------------------------------------------------------------------------------------------------
>> 本邮件及其附件含有杭州华三通信技术有限公司的保密信息,仅限于发送给上面地址中列出
>> 的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、
>> 或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本
>> 邮件!
>> This e-mail and its attachments contain confidential information from H3C, which is
>> intended only for the person or entity whose address is listed above. Any use of the
>> information contained herein in any way (including, but not limited to, total or partial
>> disclosure, reproduction, or dissemination) by persons other than the intended
>> recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender
>> by phone or email immediately and delete it!
>>
More information about the Ocfs2-devel
mailing list