[Ocfs2-devel] [PATCH v2 3/3] ocfs2/dlm: continue to purge recovery lockres when recovery master goes down
piaojun
piaojun at huawei.com
Thu Jan 5 18:07:27 PST 2017
On 2017/1/5 15:44, Gechangwei wrote:
> On 2017/1/5 15:28, gechangwei 12382 (Cloud) wrote:
>
> Hi Jun,
> I suppose that a defect hid your patch.
>
>
>> We found a dlm-blocked situation caused by continuous breakdown of recovery masters described below. To solve this problem, we should purge recovery lock once detecting recovery master goes down.
>>
>> N3 N2 N1(reco master)
>> go down
>> pick up recovery lock and
>> begin recoverying for N2
>>
>> go down
>>
>> pick up recovery
>> lock failed, then
>> purge it:
>> dlm_purge_lockres
>> ->DROPPING_REF is set
>>
>> send deref to N1 failed,
>> recovery lock is not purged
>>
>> find N1 go down, begin
>> recoverying for N1, but
>> blocked in dlm_do_recovery
>> as DROPPING_REF is set:
>> dlm_do_recovery
>> ->dlm_pick_recovery_master
>> ->dlmlock
>> ->dlm_get_lock_resource
>> ->__dlm_wait_on_lockres_flags(tmpres,
>> DLM_LOCK_RES_DROPPING_REF);
>>
>> Fixes: 8c0343968163 ("ocfs2/dlm: clear DROPPING_REF flag when the master goes down")
>>
>> Signed-off-by: Jun Piao <piaojun at huawei.com>
>> Reviewed-by: Joseph Qi <joseph.qi at huawei.com>
>> Reviewed-by: Jiufei Xue <xuejiufei at huawei.com>
>> ---
>> fs/ocfs2/dlm/dlmcommon.h | 2 ++
>> fs/ocfs2/dlm/dlmmaster.c | 38 +++------------------------------
>> fs/ocfs2/dlm/dlmrecovery.c | 29 +++++++++++++++++++-------
>> fs/ocfs2/dlm/dlmthread.c | 52 ++++++++++++++++++++++++++++++++++++++++++----
>> 4 files changed, 74 insertions(+), 47 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 004f2cb..3e3e9ba8 100644
>> --- a/fs/ocfs2/dlm/dlmcommon.h
>> +++ b/fs/ocfs2/dlm/dlmcommon.h
>> @@ -1004,6 +1004,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
>> u8 nodenum, u8 *real_master);
>>
>> +void __dlm_do_purge_lockres(struct dlm_ctxt *dlm,
>> + struct dlm_lock_resource *res);
>>
>> int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
>> struct dlm_lock_resource *res, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 311404f..1d87e0f 100644
>> --- a/fs/ocfs2/dlm/dlmmaster.c
>> +++ b/fs/ocfs2/dlm/dlmmaster.c
>> @@ -2425,52 +2425,20 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
>> mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
>> "but it is already derefed!\n", dlm->name,
>> res->lockname.len, res->lockname.name, node);
>> - dlm_lockres_put(res);
>> ret = 0;
>> goto done;
>> }
>> -
>> - if (!list_empty(&res->purge)) {
>> - mlog(0, "%s: Removing res %.*s from purgelist\n",
>> - dlm->name, res->lockname.len, res->lockname.name);
>> - list_del_init(&res->purge);
>> - dlm_lockres_put(res);
>> - dlm->purge_count--;
>> - }
>> -
>> - if (!__dlm_lockres_unused(res)) {
>> - mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
>> - dlm->name, res->lockname.len, res->lockname.name);
>> - __dlm_print_one_lock_resource(res);
>> - BUG();
>> - }
>> -
>> - __dlm_unhash_lockres(dlm, res);
>> -
>> - spin_lock(&dlm->track_lock);
>> - if (!list_empty(&res->tracking))
>> - list_del_init(&res->tracking);
>> - else {
>> - mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n",
>> - dlm->name, res->lockname.len, res->lockname.name);
>> - __dlm_print_one_lock_resource(res);
>> - }
>> - spin_unlock(&dlm->track_lock);
>> -
>> - /* lockres is not in the hash now. drop the flag and wake up
>> - * any processes waiting in dlm_get_lock_resource.
>> - */
>> - res->state &= ~DLM_LOCK_RES_DROPPING_REF;
>> + __dlm_do_purge_lockres(dlm, res);
>> spin_unlock(&res->spinlock);
>> wake_up(&res->wq);
>>
>> - dlm_lockres_put(res);
>> -
>> spin_unlock(&dlm->spinlock);
>>
>> ret = 0;
>>
>> done:
>> + if (res)
>> + dlm_lockres_put(res);
>> dlm_put(dlm);
>> return ret;
>> }
>> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f6b3138..dd5cb8b 100644
>> --- a/fs/ocfs2/dlm/dlmrecovery.c
>> +++ b/fs/ocfs2/dlm/dlmrecovery.c
>> @@ -2343,6 +2343,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>> struct dlm_lock_resource *res;
>> int i;
>> struct hlist_head *bucket;
>> + struct hlist_node *tmp;
>> struct dlm_lock *lock;
>>
>>
>> @@ -2365,7 +2366,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>> */
>> for (i = 0; i < DLM_HASH_BUCKETS; i++) {
>> bucket = dlm_lockres_hash(dlm, i);
>> - hlist_for_each_entry(res, bucket, hash_node) {
>> + hlist_for_each_entry_safe(res, tmp, bucket, hash_node) {
>> /* always prune any $RECOVERY entries for dead nodes,
>> * otherwise hangs can occur during later recovery */
>> if (dlm_is_recovery_lock(res->lockname.name,
>> @@ -2386,8 +2387,17 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>> break;
>> }
>> }
>> - dlm_lockres_clear_refmap_bit(dlm, res,
>> - dead_node);
>> +
>> + if ((res->owner == dead_node) &&
>> + (res->state & DLM_LOCK_RES_DROPPING_REF)) {
>> + dlm_lockres_get(res);
>> + __dlm_do_purge_lockres(dlm, res);
>> + spin_unlock(&res->spinlock);
>> + wake_up(&res->wq);
>> + dlm_lockres_put(res);
>> + continue;
>> + } else if (res->owner == dlm->node_num)
>> + dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
>> spin_unlock(&res->spinlock);
>> continue;
>> }
>> @@ -2398,14 +2408,17 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
>> if (res->state & DLM_LOCK_RES_DROPPING_REF) {
>> mlog(0, "%s:%.*s: owned by "
>> "dead node %u, this node was "
>> - "dropping its ref when it died. "
>> - "continue, dropping the flag.\n",
>> + "dropping its ref when master died. "
>> + "continue, purging the lockres.\n",
>> dlm->name, res->lockname.len,
>> res->lockname.name, dead_node);
>> + dlm_lockres_get(res);
>> + __dlm_do_purge_lockres(dlm, res);
>> + spin_unlock(&res->spinlock);
>> + wake_up(&res->wq);
>> + dlm_lockres_put(res);
>> + continue;
>> }
>> - res->state &= ~DLM_LOCK_RES_DROPPING_REF;
>> - dlm_move_lockres_to_recovery_list(dlm,
>> - res);
>
> Here, you just remove above line, thus, no chance for DLM to select lock
> resources to be recovered, is right?
>
>
>
Here we purge lockres directly instead of moving to recovery list
when DLM_LOCK_RES_DROPPING_REF is set in case of deadlock.
Thanks,
Jun
>> } else if (res->owner == dlm->node_num) {
>> dlm_free_dead_locks(dlm, res, dead_node);
>> __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index ce39722..838a06d 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -160,6 +160,52 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>> spin_unlock(&dlm->spinlock);
>> }
>>
>> +/*
>> + * Do the real purge work:
>> + * unhash the lockres, and
>> + * clear flag DLM_LOCK_RES_DROPPING_REF.
>> + * It requires dlm and lockres spinlock to be taken.
>> + */
>> +void __dlm_do_purge_lockres(struct dlm_ctxt *dlm,
>> + struct dlm_lock_resource *res)
>> +{
>> + assert_spin_locked(&dlm->spinlock);
>> + assert_spin_locked(&res->spinlock);
>> +
>> + if (!list_empty(&res->purge)) {
>> + mlog(0, "%s: Removing res %.*s from purgelist\n",
>> + dlm->name, res->lockname.len, res->lockname.name);
>> + list_del_init(&res->purge);
>> + dlm_lockres_put(res);
>> + dlm->purge_count--;
>> + }
>> +
>> + if (!__dlm_lockres_unused(res)) {
>> + mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
>> + dlm->name, res->lockname.len, res->lockname.name);
>> + __dlm_print_one_lock_resource(res);
>> + BUG();
>> + }
>> +
>> + __dlm_unhash_lockres(dlm, res);
>> +
>> + spin_lock(&dlm->track_lock);
>> + if (!list_empty(&res->tracking))
>> + list_del_init(&res->tracking);
>> + else {
>> + mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n",
>> + dlm->name, res->lockname.len, res->lockname.name);
>> + __dlm_print_one_lock_resource(res);
>> + }
>> + spin_unlock(&dlm->track_lock);
>> +
>> + /*
>> + * lockres is not in the hash now. drop the flag and wake up
>> + * any processes waiting in dlm_get_lock_resource.
>> + */
>> + res->state &= ~DLM_LOCK_RES_DROPPING_REF; }
>> +
>> static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>> struct dlm_lock_resource *res)
>> {
>> @@ -176,10 +222,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>>
>> if (!master) {
>> if (res->state & DLM_LOCK_RES_DROPPING_REF) {
>> - mlog(ML_NOTICE, "%s: res %.*s already in "
>> - "DLM_LOCK_RES_DROPPING_REF state\n",
>> - dlm->name, res->lockname.len,
>> - res->lockname.name);
>> + mlog(ML_NOTICE, "%s: res %.*s already in DLM_LOCK_RES_DROPPING_REF state\n",
>> + dlm->name, res->lockname.len, res->lockname.name);
>> spin_unlock(&res->spinlock);
>> return;
>> }
>> --
>> 1.8.4.3
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel at oss.oracle.com
>> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>
>
> -------------------------------------------------------------------------------------------------------------------------------------
> 本邮件及其附件含有杭州华三通信技术有限公司的保密信息,仅限于发送给上面地址中列出
> 的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、
> 或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本
> 邮件!
> This e-mail and its attachments contain confidential information from H3C, which is
> intended only for the person or entity whose address is listed above. Any use of the
> information contained herein in any way (including, but not limited to, total or partial
> disclosure, reproduction, or dissemination) by persons other than the intended
> recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender
> by phone or email immediately and delete it!
>
More information about the Ocfs2-devel
mailing list