[Ocfs2-devel] [PATCH 6/8] ocfs2/dlm: Add message DLM_QUERY_HBREGION

Wengang Wang wen.gang.wang at oracle.com
Wed Jul 28 09:21:57 PDT 2010


Hi Sunil,

Why don't we merge the two new message to the existing DLM_QUERY_JOIN_MSG?
Because it will become too large?

On 10-07-23 16:55, Sunil Mushran wrote:
> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
> ---
>  fs/ocfs2/cluster/ocfs2_nodemanager.h |    2 +
>  fs/ocfs2/dlm/dlmcommon.h             |   12 ++-
>  fs/ocfs2/dlm/dlmdomain.c             |  222 +++++++++++++++++++++++++++++++++-
>  3 files changed, 234 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h
> index 5b9854b..1829c01 100644
> --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h
> +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h
> @@ -36,4 +36,6 @@
>  /* host name, group name, cluster name all 64 bytes */
>  #define O2NM_MAX_NAME_LEN        64    // __NEW_UTS_LEN
>  
> +#define O2NM_MAX_HBREGIONS	16
> +
>  #endif /* _OCFS2_NODEMANAGER_H */
> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
> index a13292a..2c05138 100644
> --- a/fs/ocfs2/dlm/dlmcommon.h
> +++ b/fs/ocfs2/dlm/dlmcommon.h
> @@ -445,7 +445,8 @@ enum {
>  	DLM_LOCK_REQUEST_MSG,	 /* 515 */
>  	DLM_RECO_DATA_DONE_MSG,	 /* 516 */
>  	DLM_BEGIN_RECO_MSG,	 /* 517 */
> -	DLM_FINALIZE_RECO_MSG	 /* 518 */
> +	DLM_FINALIZE_RECO_MSG,	 /* 518 */
> +	DLM_QUERY_HBREGION,	 /* 519 */
>  };
>  
>  struct dlm_reco_node_data
> @@ -727,6 +728,15 @@ struct dlm_cancel_join
>  	u8 domain[O2NM_MAX_NAME_LEN];
>  };
>  
> +struct dlm_query_hbregion {
> +	u8 qhb_node;
> +	u8 qhb_numregions;
> +	u8 qhb_namelen;
> +	u8 pad1;
> +	u8 qhb_domain[O2NM_MAX_NAME_LEN];
> +	u8 qhb_hbregions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_HBREGIONS];
> +};
> +
>  struct dlm_exit_domain
>  {
>  	u8 node_idx;
> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
> index 2408b9f..3521a00 100644
> --- a/fs/ocfs2/dlm/dlmdomain.c
> +++ b/fs/ocfs2/dlm/dlmdomain.c
> @@ -128,10 +128,13 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
>   * will have a negotiated version with the same major number and a minor
>   * number equal or smaller.  The dlm_ctxt->dlm_locking_proto field should
>   * be used to determine what a running domain is actually using.
> + *
> + * New in version 1.1:
> + *	- Message DLM_QUERY_HBREGION added to support global heartbeat
>   */
>  static const struct dlm_protocol_version dlm_protocol = {
>  	.pv_major = 1,
> -	.pv_minor = 0,
> +	.pv_minor = 1,
>  };
>  
>  #define DLM_DOMAIN_BACKOFF_MS 200
> @@ -142,6 +145,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
>  				     void **ret_data);
>  static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
>  				   void **ret_data);
> +static int dlm_query_hbregion_handler(struct o2net_msg *msg, u32 len,
> +				      void *data, void **ret_data);
>  static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
>  				   void **ret_data);
>  static int dlm_protocol_compare(struct dlm_protocol_version *existing,
> @@ -918,6 +923,205 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
>  	return 0;
>  }
>  
> +static int dlm_match_hbregions(struct dlm_ctxt *dlm,
> +			       struct dlm_query_hbregion *qhb)
> +{
> +	char *local = NULL, *remote = qhb->qhb_hbregions;
> +	char *l, *r;
> +	int localnr, i, j, foundit;
> +	int status = 0;
> +
> +	if (!o2hb_global_heartbeat_active()) {
> +		if (qhb->qhb_numregions) {
> +			mlog(ML_ERROR, "Domain %s: Joining node %d has global "
> +			     "heartbeat enabled but local node %d does not\n",
> +			     qhb->qhb_domain, qhb->qhb_node, dlm->node_num);
> +			status = -EINVAL;
> +		}
> +		goto bail;
> +	}
> +
> +	if (o2hb_global_heartbeat_active() && !qhb->qhb_numregions) {
> +		mlog(ML_ERROR, "Domain %s: Local node %d has global "
> +		     "heartbeat enabled but joining node %d does not\n",
> +		     qhb->qhb_domain, dlm->node_num, qhb->qhb_node);
> +		status = -EINVAL;
> +		goto bail;
> +	}
> +
> +	r = remote;
> +	for (i = 0; i < qhb->qhb_numregions; ++i) {
> +		mlog(ML_NOTICE, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r);
> +		r += O2HB_MAX_REGION_NAME_LEN;
> +	}
> +
> +	local = kmalloc(sizeof(qhb->qhb_hbregions), GFP_KERNEL);
> +	if (!local) {
> +		status = -ENOMEM;
> +		goto bail;
> +	}
> +
> +	localnr = o2hb_get_all_regions(local, O2NM_MAX_HBREGIONS);
> +
> +	/* compare local regions with remote */
> +	l = local;
> +	for (i = 0; i < localnr; ++i) {
> +		foundit = 0;
> +		r = remote;
> +		for (j = 0; j <= qhb->qhb_numregions; ++j) {
> +			if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) {
> +				foundit = 1;
> +				break;
> +			}
> +			r += O2HB_MAX_REGION_NAME_LEN;
> +		}
> +		if (!foundit) {
> +			status = -EINVAL;
> +			mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
> +			     "in local node %d but not in joining node %d\n",
> +			     qhb->qhb_domain, O2HB_MAX_REGION_NAME_LEN, l,
> +			     dlm->node_num, qhb->qhb_node);
> +			goto bail;
> +		}
> +		l += O2HB_MAX_REGION_NAME_LEN;
> +	}
> +
> +	/* compare remote with local regions */
> +	r = remote;
> +	for (i = 0; i < qhb->qhb_numregions; ++i) {
> +		foundit = 0;
> +		l = local;
> +		for (j = 0; j < localnr; ++j) {
> +			if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) {
> +				foundit = 1;
> +				break;
> +			}
> +			l += O2HB_MAX_REGION_NAME_LEN;
> +		}
> +		if (!foundit) {
> +			status = -EINVAL;
> +			mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
> +			     "in joining node %d but not in local node %d\n",
> +			     qhb->qhb_domain, O2HB_MAX_REGION_NAME_LEN, r,
> +			     qhb->qhb_node, dlm->node_num);
> +			goto bail;
> +		}
> +		r += O2HB_MAX_REGION_NAME_LEN;
> +	}

Why need to compare again? just checking "qhb->qhb_numregions == localnr" is not
fine?

> +
> +bail:
> +	kfree(local);
> +
> +	return status;
> +}
> +
> +static int dlm_send_hbregions(struct dlm_ctxt *dlm, unsigned long *node_map)
> +{
> +	struct dlm_query_hbregion *qhb = NULL;
> +	int status, ret = 0, i;
> +	char *p;
> +
> +	if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
> +		goto bail;
> +
> +	qhb = kmalloc(sizeof(struct dlm_query_hbregion), GFP_KERNEL);
> +	if (!qhb) {
> +		ret = -ENOMEM;
> +		mlog_errno(ret);
> +		goto bail;
> +	}
> +
> +	memset(qhb, 0, sizeof(struct dlm_query_hbregion));
> +
> +	qhb->qhb_node = dlm->node_num;
> +	qhb->qhb_namelen = strlen(dlm->name);
> +	memcpy(qhb->qhb_domain, dlm->name, qhb->qhb_namelen);
> +	/* if local hb, the numregions will be zero */
> +	if (o2hb_global_heartbeat_active())
> +		qhb->qhb_numregions = o2hb_get_all_regions(qhb->qhb_hbregions,
> +							   O2NM_MAX_HBREGIONS);
> +
> +	p = qhb->qhb_hbregions;
> +	for (i = 0; i < qhb->qhb_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN)
> +		mlog(ML_NOTICE, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p);
> +
> +	i = -1;
> +	while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
> +				  i + 1)) < O2NM_MAX_NODES) {
> +		if (i == dlm->node_num)
> +			continue;
> +
> +		mlog(ML_NOTICE, "Sending hbregion to node %d\n", i);
> +

Is this(also the aboves and the belows) NOTICE log needed?
Guessing you were using them for debug purpose :-P

regards,
wengang.
> +		ret = o2net_send_message(DLM_QUERY_HBREGION, DLM_MOD_KEY, qhb,
> +					 sizeof(struct dlm_query_hbregion),
> +					 i, &status);



More information about the Ocfs2-devel mailing list