[Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)

Srinivas Eeda srinivas.eeda at oracle.com
Fri Jun 18 09:32:07 PDT 2010


On 6/17/2010 7:11 PM, Sunil Mushran wrote:
> This patch looks ok on the surface. Should be usable. But checking into
> the repo is another matter.
ok, won't checkin but will give this as one-off fix for now.
>
> My problem is that this flag is very much like inflight_locks but
> is not the same. inflight_locks are taken only on lockres' that are
> mastered by the node. This new flag does the same but for non
> mastered locks too. A better solution will be to merge the two.
> And that means cleaning up inflight_locks.
will look into this.
>
> The reason for the NAK for check in is that the code is adding
> more messiness to an area that is already very messy.
>
> Sunil
>
> On 06/15/2010 09:43 PM, Srinivas Eeda wrote:
>> This patch fixes the following hole.
>> dlmlock tries to create a new lock on a lockres that is on purge 
>> list. It calls
>> dlm_get_lockresource and later adds a lock to blocked list. But in 
>> this window,
>> dlm_thread can purge the lockres and unhash it. This will cause a 
>> BUG, as when
>> the AST comes back from the master lockres is not found
>>
>> This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE 
>> which would
>> protect lockres from dlm_thread purging it.
>>
>> Signed-off-by: Srinivas Eeda<srinivas.eeda at oracle.com>
>> ---
>>   fs/ocfs2/dlm/dlmcommon.h |    1 +
>>   fs/ocfs2/dlm/dlmlock.c   |    4 ++++
>>   fs/ocfs2/dlm/dlmmaster.c |    5 ++++-
>>   fs/ocfs2/dlm/dlmthread.c |    1 +
>>   4 files changed, 10 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
>> index 0102be3..6cf3c08 100644
>> --- a/fs/ocfs2/dlm/dlmcommon.h
>> +++ b/fs/ocfs2/dlm/dlmcommon.h
>> @@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct 
>> dlm_ctxt *dlm,
>>   #define DLM_LOCK_RES_DIRTY                0x00000008
>>   #define DLM_LOCK_RES_IN_PROGRESS          0x00000010
>>   #define DLM_LOCK_RES_MIGRATING            0x00000020
>> +#define DLM_LOCK_RES_IN_USE               0x00000100
>>   #define DLM_LOCK_RES_DROPPING_REF         0x00000040
>>   #define DLM_LOCK_RES_BLOCK_DIRTY          0x00001000
>>   #define DLM_LOCK_RES_SETREF_INPROG        0x00002000
>> diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
>> index 7333377..501ac40 100644
>> --- a/fs/ocfs2/dlm/dlmlock.c
>> +++ b/fs/ocfs2/dlm/dlmlock.c
>> @@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct 
>> dlm_ctxt *dlm,
>>       if (status != DLM_NORMAL&&
>>       lock->ml.node != dlm->node_num) {
>>           /* erf.  state changed after lock was dropped. */
>> +        /* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
>> +        res->state&= ~DLM_LOCK_RES_IN_USE;
>>           spin_unlock(&res->spinlock);
>>           dlm_error(status);
>>           return status;
>> @@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct 
>> dlm_ctxt *dlm,
>>               kick_thread = 1;
>>           }
>>       }
>> +    res->state&= ~DLM_LOCK_RES_IN_USE;
>>       /* reduce the inflight count, this may result in the lockres
>>        * being purged below during calc_usage */
>>       if (lock->ml.node == dlm->node_num)
>> @@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct 
>> dlm_ctxt *dlm,
>>
>>       spin_lock(&res->spinlock);
>>       res->state&= ~DLM_LOCK_RES_IN_PROGRESS;
>> +    res->state&= ~DLM_LOCK_RES_IN_USE;
>>       lock->lock_pending = 0;
>>       if (status != DLM_NORMAL) {
>>           if (status == DLM_RECOVERING&&
>> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
>> index 9289b43..f0f2d97 100644
>> --- a/fs/ocfs2/dlm/dlmmaster.c
>> +++ b/fs/ocfs2/dlm/dlmmaster.c
>> @@ -719,6 +719,7 @@ lookup:
>>       if (tmpres) {
>>           int dropping_ref = 0;
>>
>> +        tmpres->state |= DLM_LOCK_RES_IN_USE;
>>           spin_unlock(&dlm->spinlock);
>>
>>           spin_lock(&tmpres->spinlock);
>> @@ -731,8 +732,10 @@ lookup:
>>           if (tmpres->owner == dlm->node_num) {
>>               BUG_ON(tmpres->state&  DLM_LOCK_RES_DROPPING_REF);
>>               dlm_lockres_grab_inflight_ref(dlm, tmpres);
>> -        } else if (tmpres->state&  DLM_LOCK_RES_DROPPING_REF)
>> +        } else if (tmpres->state&  DLM_LOCK_RES_DROPPING_REF) {
>> +            tmpres->state&= ~DLM_LOCK_RES_IN_USE;
>>               dropping_ref = 1;
>> +        }
>>           spin_unlock(&tmpres->spinlock);
>>
>>           /* wait until done messaging the master, drop our ref to allow
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index fb0be6c..2b82e0b 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct 
>> dlm_lock_resource *res)
>>   int __dlm_lockres_unused(struct dlm_lock_resource *res)
>>   {
>>       if (!__dlm_lockres_has_locks(res)&&
>> +        !(res->state&  DLM_LOCK_RES_IN_USE)&&
>>       (list_empty(&res->dirty)&&  !(res->state&  DLM_LOCK_RES_DIRTY))) {
>>           /* try not to scan the bitmap unless the first two
>>            * conditions are already true */
>>    
>



More information about the Ocfs2-devel mailing list