[Ocfs2-devel] [PATCH 3/3] ocfs2/dlm: Do not migrate resource to a node that is leaving the domain

Mark Fasheh mfasheh at suse.com
Thu May 5 15:24:17 PDT 2011


On Tue, Apr 26, 2011 at 04:03:25PM -0700, Sunil Mushran wrote:
> During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that
> have no locks and references are freed. Ones that have locks and/or references
> are migrated to another node.
> 
> The first task in migration is finding a target. Currently we scan the lock
> resource and find one node that either has a lock or a reference. This is not
> very efficient in a parallel umount case as we might end up migrating the
> lock resource to a node which itself may have to migrate it to a third node.
> 
> The patch scans the dlm->exit_domain_map to ensure the target node is not
> leaving the domain. If no valid target node is found, o2dlm does not migrate
> the resource but instead waits for the unlock and deref messages that will
> allow it to free the resource.
> 
> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
> ---
>  fs/ocfs2/dlm/dlmdomain.c |   10 ++-
>  fs/ocfs2/dlm/dlmmaster.c |  139 ++++++++++++++++-----------------------------
>  2 files changed, 57 insertions(+), 92 deletions(-)
> 
> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
> index 3aff23f..6ed6b95 100644
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -451,14 +451,18 @@ redo_bucket:
>  			dropped = dlm_empty_lockres(dlm, res);
>  
>  			spin_lock(&res->spinlock);
> -			__dlm_lockres_calc_usage(dlm, res);
> -			iter = res->hash_node.next;
> +			if (dropped)
> +				__dlm_lockres_calc_usage(dlm, res);
> +			else
> +				iter = res->hash_node.next;
>  			spin_unlock(&res->spinlock);
>  
>  			dlm_lockres_put(res);
>  
> -			if (dropped)
> +			if (dropped) {
> +				cond_resched_lock(&dlm->spinlock);
>  				goto redo_bucket;
> +			}
>  		}
>  		cond_resched_lock(&dlm->spinlock);
>  		num += n;
> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
> index 3e59ff9..4499d86 100644
> --- a/fs/ocfs2/dlm/dlmmaster.c
> +++ b/fs/ocfs2/dlm/dlmmaster.c
> @@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
>  
>  
>  static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
> -			       struct dlm_lock_resource *res,
> -			       u8 target)
> +			       struct dlm_lock_resource *res, u8 target)
>  {
>  	struct dlm_master_list_entry *mle = NULL;
>  	struct dlm_master_list_entry *oldmle = NULL;
> @@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
>  	if (!dlm_grab(dlm))
>  		return -EINVAL;
>  
> +	BUG_ON(target == O2NM_MAX_NODES);
> +
>  	name = res->lockname.name;
>  	namelen = res->lockname.len;
>  
> -	mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
> -
> -	/* Ensure this lockres is a proper candidate for migration */
> -	spin_lock(&res->spinlock);
> -	ret = dlm_is_lockres_migrateable(dlm, res);
> -	spin_unlock(&res->spinlock);
> -
> -	/* No work to do */
> -	if (!ret)
> -		goto leave;
> -
> -	/*
> -	 * preallocate up front
> -	 * if this fails, abort
> -	 */
> +	mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
> +	     target);
>  
> +	/* preallocate up front. if this fails, abort */
>  	ret = -ENOMEM;
>  	mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
>  	if (!mres) {
> @@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
>  	ret = 0;
>  
>  	/*
> -	 * find a node to migrate the lockres to
> -	 */
> -
> -	spin_lock(&dlm->spinlock);
> -	/* pick a new node */
> -	if (!test_bit(target, dlm->domain_map) ||
> -	    target >= O2NM_MAX_NODES) {
> -		target = dlm_pick_migration_target(dlm, res);
> -	}
> -	mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
> -	     namelen, name, target);
> -
> -	if (target >= O2NM_MAX_NODES ||
> -	    !test_bit(target, dlm->domain_map)) {
> -		/* target chosen is not alive */
> -		ret = -EINVAL;
> -	}
> -
> -	if (ret) {
> -		spin_unlock(&dlm->spinlock);
> -		goto fail;
> -	}
> -
> -	mlog(0, "continuing with target = %u\n", target);
> -
> -	/*
>  	 * clear any existing master requests and
>  	 * add the migration mle to the list
>  	 */
> +	spin_lock(&dlm->spinlock);
>  	spin_lock(&dlm->master_lock);
>  	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
>  				    namelen, target, dlm->node_num);
> @@ -2514,6 +2478,7 @@ fail:
>  			dlm_put_mle(mle);
>  		} else if (mle) {
>  			kmem_cache_free(dlm_mle_cache, mle);
> +			mle = NULL;
>  		}
>  		goto leave;
>  	}
> @@ -2632,7 +2597,6 @@ leave:
>  	if (wake)
>  		wake_up(&res->wq);
>  
> -	/* TODO: cleanup */
>  	if (mres)
>  		free_page((unsigned long)mres);
>  
> @@ -2657,28 +2621,28 @@ leave:
>   */
>  int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
>  {
> -	int mig, ret;
> +	int ret;
>  	int lock_dropped = 0;
> +	u8 target = O2NM_MAX_NODES;
>  
>  	assert_spin_locked(&dlm->spinlock);
>  
>  	spin_lock(&res->spinlock);
> -	mig = dlm_is_lockres_migrateable(dlm, res);
> +	if (dlm_is_lockres_migrateable(dlm, res))
> +		target = dlm_pick_migration_target(dlm, res);
>  	spin_unlock(&res->spinlock);
> -	if (!mig)
> +
> +	if (target == O2NM_MAX_NODES)
>  		goto leave;
>  
>  	/* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
>  	spin_unlock(&dlm->spinlock);
>  	lock_dropped = 1;
> -	while (1) {
> -		ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES);
> -		if (ret >= 0)
> -			break;
> -		mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name,
> -		     res->lockname.len, res->lockname.name);
> -		msleep(DLM_MIGRATION_RETRY_MS);
> -	}
> +	ret = dlm_migrate_lockres(dlm, res, target);
> +	if (ret)
> +		mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
> +		     dlm->name, res->lockname.len, res->lockname.name,
> +		     target, ret);
>  	spin_lock(&dlm->spinlock);
>  leave:
>  	return lock_dropped;
> @@ -2862,61 +2826,58 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
>  	}
>  }
>  
> -/* for now this is not too intelligent.  we will
> - * need stats to make this do the right thing.
> - * this just finds the first lock on one of the
> - * queues and uses that node as the target. */
> +/*
> + * Pick a node to migrate the lock resource to. This function selects a
> + * potential target based first on the locks and then on refmap. It skips
> + * nodes that are in the process of exiting the domain.
> + */
>  static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
>  				    struct dlm_lock_resource *res)
>  {
> -	int i;
> +	enum dlm_lockres_list idx;
>  	struct list_head *queue = &res->granted;
>  	struct dlm_lock *lock;
>  	int nodenum;
>  
>  	assert_spin_locked(&dlm->spinlock);
> +	assert_spin_locked(&res->spinlock);
>  
> -	spin_lock(&res->spinlock);
> -	for (i=0; i<3; i++) {
> +	/* Go through all the locks */
> +	for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
> +		queue = dlm_list_idx_to_ptr(res, idx);
>  		list_for_each_entry(lock, queue, list) {
> -			/* up to the caller to make sure this node
> -			 * is alive */
> -			if (lock->ml.node != dlm->node_num) {
> -				spin_unlock(&res->spinlock);
> -				return lock->ml.node;
> -			}
> +			if (lock->ml.node == dlm->node_num)
> +				continue;
> +			if (test_bit(lock->ml.node, dlm->exit_domain_map))
> +				continue;
> +			if (!test_bit(lock->ml.node, dlm->domain_map))
> +				continue;
> +			nodenum = lock->ml.node;
> +			goto bail;
>  		}
> -		queue++;
> -	}
> -
> -	nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
> -	if (nodenum < O2NM_MAX_NODES) {
> -		spin_unlock(&res->spinlock);
> -		return nodenum;
>  	}
> -	spin_unlock(&res->spinlock);
> -	mlog(0, "have not found a suitable target yet! checking domain map\n");
>  
> -	/* ok now we're getting desperate.  pick anyone alive. */
> +	/* Go thru the refmap */
>  	nodenum = -1;
>  	while (1) {
> -		nodenum = find_next_bit(dlm->domain_map,
> -					O2NM_MAX_NODES, nodenum+1);
> -		mlog(0, "found %d in domain map\n", nodenum);
> +		nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES,
> +					nodenum + 1);
>  		if (nodenum >= O2NM_MAX_NODES)
>  			break;
> -		if (nodenum != dlm->node_num) {
> -			mlog(0, "picking %d\n", nodenum);
> -			return nodenum;
> -		}
> +		if (nodenum == dlm->node_num)
> +			continue;
> +		if (test_bit(nodenum, dlm->exit_domain_map))
> +			continue;
> +		if (!test_bit(lock->ml.node, dlm->domain_map))
> +			continue;

If the lock's owning node isn't in the domain map, we're just ignoring it? I
guess I'm not following what the last 'if (!test_bit(lock->ml.node,
dlm->domain_map))' line is trying to do.
	--Mark

PS: I'm rusty on fs/ocfs2/dlm stuff as you can tell :)

--
Mark Fasheh



More information about the Ocfs2-devel mailing list