[Ocfs2-devel] [PATCH 3/3] ocfs2/dlm: Do not migrate resource to a node that is leaving the domain

Sunil Mushran sunil.mushran at oracle.com
Tue May 10 10:17:31 PDT 2011


Yes, this series addresses that problem.

On 05/09/2011 11:19 PM, Wengang Wang wrote:
> Hi Sunil,
>
> Your this series of patches fix a problem in mmap(and could also other)
> test in ocfs2-test.
> The problem is that when all the three nodes do unmount at the same time,
> two of them keeps migrating a lockres to each other. So the umount on these
> two nodes hang there. I have idea why dlm_run_purge_list have no change to
> purge it. Your patches fix it :)
>
> thanks,
> wengang.
>
> On 11-04-26 16:03, Sunil Mushran wrote:
>> During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that
>> have no locks and references are freed. Ones that have locks and/or references
>> are migrated to another node.
>>
>> The first task in migration is finding a target. Currently we scan the lock
>> resource and find one node that either has a lock or a reference. This is not
>> very efficient in a parallel umount case as we might end up migrating the
>> lock resource to a node which itself may have to migrate it to a third node.
>>
>> The patch scans the dlm->exit_domain_map to ensure the target node is not
>> leaving the domain. If no valid target node is found, o2dlm does not migrate
>> the resource but instead waits for the unlock and deref messages that will
>> allow it to free the resource.
>>
>> Signed-off-by: Sunil Mushran<sunil.mushran at oracle.com>
>> ---
>>   fs/ocfs2/dlm/dlmdomain.c |   10 ++-
>>   fs/ocfs2/dlm/dlmmaster.c |  139 ++++++++++++++++-----------------------------
>>   2 files changed, 57 insertions(+), 92 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
>> index 3aff23f..6ed6b95 100644
>> --- a/fs/ocfs2/dlm/dlmdomain.c
>> +++ b/fs/ocfs2/dlm/dlmdomain.c
>> @@ -451,14 +451,18 @@ redo_bucket:
>>   			dropped = dlm_empty_lockres(dlm, res);
>>
>>   			spin_lock(&res->spinlock);
>> -			__dlm_lockres_calc_usage(dlm, res);
>> -			iter = res->hash_node.next;
>> +			if (dropped)
>> +				__dlm_lockres_calc_usage(dlm, res);
>> +			else
>> +				iter = res->hash_node.next;
>>   			spin_unlock(&res->spinlock);
>>
>>   			dlm_lockres_put(res);
>>
>> -			if (dropped)
>> +			if (dropped) {
>> +				cond_resched_lock(&dlm->spinlock);
>>   				goto redo_bucket;
>> +			}
>>   		}
>>   		cond_resched_lock(&dlm->spinlock);
>>   		num += n;
>> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
>> index 3e59ff9..4499d86 100644
>> --- a/fs/ocfs2/dlm/dlmmaster.c
>> +++ b/fs/ocfs2/dlm/dlmmaster.c
>> @@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
>>
>>
>>   static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
>> -			       struct dlm_lock_resource *res,
>> -			       u8 target)
>> +			       struct dlm_lock_resource *res, u8 target)
>>   {
>>   	struct dlm_master_list_entry *mle = NULL;
>>   	struct dlm_master_list_entry *oldmle = NULL;
>> @@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
>>   	if (!dlm_grab(dlm))
>>   		return -EINVAL;
>>
>> +	BUG_ON(target == O2NM_MAX_NODES);
>> +
>>   	name = res->lockname.name;
>>   	namelen = res->lockname.len;
>>
>> -	mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
>> -
>> -	/* Ensure this lockres is a proper candidate for migration */
>> -	spin_lock(&res->spinlock);
>> -	ret = dlm_is_lockres_migrateable(dlm, res);
>> -	spin_unlock(&res->spinlock);
>> -
>> -	/* No work to do */
>> -	if (!ret)
>> -		goto leave;
>> -
>> -	/*
>> -	 * preallocate up front
>> -	 * if this fails, abort
>> -	 */
>> +	mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
>> +	     target);
>>
>> +	/* preallocate up front. if this fails, abort */
>>   	ret = -ENOMEM;
>>   	mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
>>   	if (!mres) {
>> @@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
>>   	ret = 0;
>>
>>   	/*
>> -	 * find a node to migrate the lockres to
>> -	 */
>> -
>> -	spin_lock(&dlm->spinlock);
>> -	/* pick a new node */
>> -	if (!test_bit(target, dlm->domain_map) ||
>> -	    target>= O2NM_MAX_NODES) {
>> -		target = dlm_pick_migration_target(dlm, res);
>> -	}
>> -	mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
>> -	     namelen, name, target);
>> -
>> -	if (target>= O2NM_MAX_NODES ||
>> -	    !test_bit(target, dlm->domain_map)) {
>> -		/* target chosen is not alive */
>> -		ret = -EINVAL;
>> -	}
>> -
>> -	if (ret) {
>> -		spin_unlock(&dlm->spinlock);
>> -		goto fail;
>> -	}
>> -
>> -	mlog(0, "continuing with target = %u\n", target);
>> -
>> -	/*
>>   	 * clear any existing master requests and
>>   	 * add the migration mle to the list
>>   	 */
>> +	spin_lock(&dlm->spinlock);
>>   	spin_lock(&dlm->master_lock);
>>   	ret = dlm_add_migration_mle(dlm, res, mle,&oldmle, name,
>>   				    namelen, target, dlm->node_num);
>> @@ -2514,6 +2478,7 @@ fail:
>>   			dlm_put_mle(mle);
>>   		} else if (mle) {
>>   			kmem_cache_free(dlm_mle_cache, mle);
>> +			mle = NULL;
>>   		}
>>   		goto leave;
>>   	}
>> @@ -2632,7 +2597,6 @@ leave:
>>   	if (wake)
>>   		wake_up(&res->wq);
>>
>> -	/* TODO: cleanup */
>>   	if (mres)
>>   		free_page((unsigned long)mres);
>>
>> @@ -2657,28 +2621,28 @@ leave:
>>    */
>>   int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
>>   {
>> -	int mig, ret;
>> +	int ret;
>>   	int lock_dropped = 0;
>> +	u8 target = O2NM_MAX_NODES;
>>
>>   	assert_spin_locked(&dlm->spinlock);
>>
>>   	spin_lock(&res->spinlock);
>> -	mig = dlm_is_lockres_migrateable(dlm, res);
>> +	if (dlm_is_lockres_migrateable(dlm, res))
>> +		target = dlm_pick_migration_target(dlm, res);
>>   	spin_unlock(&res->spinlock);
>> -	if (!mig)
>> +
>> +	if (target == O2NM_MAX_NODES)
>>   		goto leave;
>>
>>   	/* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
>>   	spin_unlock(&dlm->spinlock);
>>   	lock_dropped = 1;
>> -	while (1) {
>> -		ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES);
>> -		if (ret>= 0)
>> -			break;
>> -		mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name,
>> -		     res->lockname.len, res->lockname.name);
>> -		msleep(DLM_MIGRATION_RETRY_MS);
>> -	}
>> +	ret = dlm_migrate_lockres(dlm, res, target);
>> +	if (ret)
>> +		mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
>> +		     dlm->name, res->lockname.len, res->lockname.name,
>> +		     target, ret);
>>   	spin_lock(&dlm->spinlock);
>>   leave:
>>   	return lock_dropped;
>> @@ -2862,61 +2826,58 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
>>   	}
>>   }
>>
>> -/* for now this is not too intelligent.  we will
>> - * need stats to make this do the right thing.
>> - * this just finds the first lock on one of the
>> - * queues and uses that node as the target. */
>> +/*
>> + * Pick a node to migrate the lock resource to. This function selects a
>> + * potential target based first on the locks and then on refmap. It skips
>> + * nodes that are in the process of exiting the domain.
>> + */
>>   static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
>>   				    struct dlm_lock_resource *res)
>>   {
>> -	int i;
>> +	enum dlm_lockres_list idx;
>>   	struct list_head *queue =&res->granted;
>>   	struct dlm_lock *lock;
>>   	int nodenum;
>>
>>   	assert_spin_locked(&dlm->spinlock);
>> +	assert_spin_locked(&res->spinlock);
>>
>> -	spin_lock(&res->spinlock);
>> -	for (i=0; i<3; i++) {
>> +	/* Go through all the locks */
>> +	for (idx = DLM_GRANTED_LIST; idx<= DLM_BLOCKED_LIST; idx++) {
>> +		queue = dlm_list_idx_to_ptr(res, idx);
>>   		list_for_each_entry(lock, queue, list) {
>> -			/* up to the caller to make sure this node
>> -			 * is alive */
>> -			if (lock->ml.node != dlm->node_num) {
>> -				spin_unlock(&res->spinlock);
>> -				return lock->ml.node;
>> -			}
>> +			if (lock->ml.node == dlm->node_num)
>> +				continue;
>> +			if (test_bit(lock->ml.node, dlm->exit_domain_map))
>> +				continue;
>> +			if (!test_bit(lock->ml.node, dlm->domain_map))
>> +				continue;
>> +			nodenum = lock->ml.node;
>> +			goto bail;
>>   		}
>> -		queue++;
>> -	}
>> -
>> -	nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
>> -	if (nodenum<  O2NM_MAX_NODES) {
>> -		spin_unlock(&res->spinlock);
>> -		return nodenum;
>>   	}
>> -	spin_unlock(&res->spinlock);
>> -	mlog(0, "have not found a suitable target yet! checking domain map\n");
>>
>> -	/* ok now we're getting desperate.  pick anyone alive. */
>> +	/* Go thru the refmap */
>>   	nodenum = -1;
>>   	while (1) {
>> -		nodenum = find_next_bit(dlm->domain_map,
>> -					O2NM_MAX_NODES, nodenum+1);
>> -		mlog(0, "found %d in domain map\n", nodenum);
>> +		nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES,
>> +					nodenum + 1);
>>   		if (nodenum>= O2NM_MAX_NODES)
>>   			break;
>> -		if (nodenum != dlm->node_num) {
>> -			mlog(0, "picking %d\n", nodenum);
>> -			return nodenum;
>> -		}
>> +		if (nodenum == dlm->node_num)
>> +			continue;
>> +		if (test_bit(nodenum, dlm->exit_domain_map))
>> +			continue;
>> +		if (!test_bit(lock->ml.node, dlm->domain_map))
>> +			continue;
>> +		goto bail;
>>   	}
>>
>> -	mlog(0, "giving up.  no master to migrate to\n");
>> -	return DLM_LOCK_RES_OWNER_UNKNOWN;
>> +	nodenum = O2NM_MAX_NODES;
>> +bail:
>> +	return nodenum;
>>   }
>>
>> -
>> -
>>   /* this is called by the new master once all lockres
>>    * data has been received */
>>   static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
>> -- 
>> 1.7.1
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel at oss.oracle.com
>> http://oss.oracle.com/mailman/listinfo/ocfs2-devel




More information about the Ocfs2-devel mailing list