[Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3

Srinivas Eeda srinivas.eeda at oracle.com
Wed Jun 23 10:25:36 PDT 2010


Sunil, Joel, Wengang. Thanks for reviewing the patch and your comments.

On 6/23/2010 10:00 AM, Sunil Mushran wrote:
> Signed-off-by: Sunil Mushran<sunil.mushran at oracle.com>
>
>
> On 06/22/2010 10:48 PM, Srinivas Eeda wrote:
>> There are two problems in dlm_run_purgelist
>>
>> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying 
>> to purge
>> the same lockres instead of trying the next lockres.
>>
>> 2. When a lockres is found unused, dlm_run_purgelist releases lockres 
>> spinlock
>> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
>> spinlock is reacquired but in this window lockres can get reused. 
>> This leads
>> to BUG.
>>
>> This patch modifies dlm_run_purgelist to skip lockres if it's in use 
>> and purge
>>   next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before 
>> releasing the
>> lockres spinlock protecting it from getting reused.
>>
>> Signed-off-by: Srinivas Eeda<srinivas.eeda at oracle.com>
>> ---
>>   fs/ocfs2/dlm/dlmthread.c |   79 
>> +++++++++++++++++++--------------------------
>>   1 files changed, 33 insertions(+), 46 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index 11a6d1f..6822f9a 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>>       spin_unlock(&dlm->spinlock);
>>   }
>>
>> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>>                    struct dlm_lock_resource *res)
>>   {
>>       int master;
>>       int ret = 0;
>>
>> -    spin_lock(&res->spinlock);
>> -    if (!__dlm_lockres_unused(res)) {
>> -        mlog(0, "%s:%.*s: tried to purge but not unused\n",
>> -             dlm->name, res->lockname.len, res->lockname.name);
>> -        __dlm_print_one_lock_resource(res);
>> -        spin_unlock(&res->spinlock);
>> -        BUG();
>> -    }
>> -
>> -    if (res->state&  DLM_LOCK_RES_MIGRATING) {
>> -        mlog(0, "%s:%.*s: Delay dropref as this lockres is "
>> -             "being remastered\n", dlm->name, res->lockname.len,
>> -             res->lockname.name);
>> -        /* Re-add the lockres to the end of the purge list */
>> -        if (!list_empty(&res->purge)) {
>> -            list_del_init(&res->purge);
>> -            list_add_tail(&res->purge,&dlm->purge_list);
>> -        }
>> -        spin_unlock(&res->spinlock);
>> -        return 0;
>> -    }
>> +    assert_spin_locked(&dlm->spinlock);
>> +    assert_spin_locked(&res->spinlock);
>>
>>       master = (res->owner == dlm->node_num);
>>
>> -    if (!master)
>> -        res->state |= DLM_LOCK_RES_DROPPING_REF;
>> -    spin_unlock(&res->spinlock);
>>
>>       mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>>            res->lockname.name, master);
>>
>>       if (!master) {
>> +        res->state |= DLM_LOCK_RES_DROPPING_REF;
>>           /* drop spinlock...  retake below */
>> +        spin_unlock(&res->spinlock);
>>           spin_unlock(&dlm->spinlock);
>>
>>           spin_lock(&res->spinlock);
>> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>           mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>>                dlm->name, res->lockname.len, res->lockname.name, ret);
>>           spin_lock(&dlm->spinlock);
>> +        spin_lock(&res->spinlock);
>>       }
>>
>> -    spin_lock(&res->spinlock);
>>       if (!list_empty(&res->purge)) {
>>           mlog(0, "removing lockres %.*s:%p from purgelist, "
>>                "master = %d\n", res->lockname.len, res->lockname.name,
>>                res, master);
>>           list_del_init(&res->purge);
>> -        spin_unlock(&res->spinlock);
>>           dlm_lockres_put(res);
>>           dlm->purge_count--;
>> -    } else
>> -        spin_unlock(&res->spinlock);
>> +    }
>> +
>> +    if (!__dlm_lockres_unused) {
>> +        mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
>> +             dlm->name, res->lockname.len, res->lockname.name);
>> +        __dlm_print_one_lock_resource(res);
>> +        BUG();
>> +    }
>>
>>       __dlm_unhash_lockres(res);
>>
>>       /* lockres is not in the hash now.  drop the flag and wake up
>>        * any processes waiting in dlm_get_lock_resource. */
>>       if (!master) {
>> -        spin_lock(&res->spinlock);
>>           res->state&= ~DLM_LOCK_RES_DROPPING_REF;
>>           spin_unlock(&res->spinlock);
>>           wake_up(&res->wq);
>> -    }
>> -    return 0;
>> +    } else
>> +        spin_unlock(&res->spinlock);
>>   }
>>
>>   static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>> @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt 
>> *dlm,
>>           lockres = list_entry(dlm->purge_list.next,
>>                        struct dlm_lock_resource, purge);
>>
>> -        /* Status of the lockres *might* change so double
>> -         * check. If the lockres is unused, holding the dlm
>> -         * spinlock will prevent people from getting and more
>> -         * refs on it -- there's no need to keep the lockres
>> -         * spinlock. */
>>           spin_lock(&lockres->spinlock);
>> -        unused = __dlm_lockres_unused(lockres);
>> -        spin_unlock(&lockres->spinlock);
>> -
>> -        if (!unused)
>> -            continue;
>>
>>           purge_jiffies = lockres->last_used +
>>               msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>> @@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt 
>> *dlm,
>>                * in tail order, we can stop at the first
>>                * unpurgable resource -- anyone added after
>>                * him will have a greater last_used value */
>> +            spin_unlock(&lockres->spinlock);
>>               break;
>>           }
>>
>> +        /* Status of the lockres *might* change so double
>> +         * check. If the lockres is unused, holding the dlm
>> +         * spinlock will prevent people from getting and more
>> +         * refs on it. */
>> +        unused = __dlm_lockres_unused(lockres);
>> +        if (!unused ||
>> +            (lockres->state&  DLM_LOCK_RES_MIGRATING)) {
>> +            mlog(0, "lockres %s:%.*s: is in use or "
>> +                 "being remastered\n", dlm->name,
>> +                 lockres->lockname.len, lockres->lockname.name);
>> +            list_move_tail(&dlm->purge_list,&lockres->purge);
>> +            spin_unlock(&lockres->spinlock);
>> +            continue;
>> +        }
>> +
>>           dlm_lockres_get(lockres);
>>
>> -        /* This may drop and reacquire the dlm spinlock if it
>> -         * has to do migration. */
>> -        if (dlm_purge_lockres(dlm, lockres))
>> -            BUG();
>> +        dlm_purge_lockres(dlm, lockres);
>>
>>           dlm_lockres_put(lockres);
>>
>>    
>



More information about the Ocfs2-devel mailing list