[Ocfs2-tools-devel] [PATCH 3/3] ocfs2_controld.pcmk: Complete implementation of kill_stack_node()

Joel Becker Joel.Becker at oracle.com
Thu Aug 21 14:55:47 PDT 2008


On Thu, Aug 21, 2008 at 02:34:19PM -0700, Mark Fasheh wrote:
> On Thu, Aug 21, 2008 at 02:21:08PM -0700, Joel Becker wrote:
> > On Thu, Aug 21, 2008 at 10:15:05AM -0700, Mark Fasheh wrote:
> > > This plugs ocfs2_controld.pcmk into the pacemaker-provided stonith library,
> > > so that kill_stack_node() can initiate fencing of a misbehaving node.
> > > 
> > > Signed-off-by: Andrew Beekhof <abeekhof at suse.de>
> > > Signed-off-by: Mark Fasheh <mfasheh at suse.com>
> > 
> > 	I like this!  One comment:
> > 
> > > +	stonithd_ci = connection_add(stonithd_fd, process_stonithd,
> > > +				     dead_stonithd);	
> > > +	return TRUE;
> > 
> > 	connection_add() can return an error (ci<0).  That needs to be
> > checked.
> 
> Ahh, right you are. Updated patch follows.
> 	--Mark
> 
> --
> Mark Fasheh
> 
> From: Andrew Beekhof <abeekhof at suse.de>
> 
> [PATCH] ocfs2_controld.pcmk: Complete implementation of kill_stack_node()
> 
> This plugs ocfs2_controld.pcmk into the pacemaker-provided stonith library,
> so that kill_stack_node() can initiate fencing of a misbehaving node.
> 
> Signed-off-by: Andrew Beekhof <abeekhof at suse.de>
> Signed-off-by: Mark Fasheh <mfasheh at suse.com>

Sobby!

> ---
>  ocfs2_controld/Makefile    |    2 +-
>  ocfs2_controld/pacemaker.c |  137 +++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 135 insertions(+), 4 deletions(-)
> 
> diff --git a/ocfs2_controld/Makefile b/ocfs2_controld/Makefile
> index a7bc98a..6c88c74 100644
> --- a/ocfs2_controld/Makefile
> +++ b/ocfs2_controld/Makefile
> @@ -66,7 +66,7 @@ DIST_FILES =				\
>  ocfs2_controld.pcmk: $(PCMK_DAEMON_OBJS) $(LIBO2CB_DEPS)
>  	$(LINK) $(GLIB_LIBS) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) \
>  		$(OPENAIS_LIBS) $(COROSYNC_LIBS) \
> -		$(DLMCONTROL_LIBS) -lcrmcluster
> +		$(DLMCONTROL_LIBS) -lcrmcluster -lstonithd
>  
>  ocfs2_controld.cman: $(CMAN_DAEMON_OBJS) $(LIBO2CB_DEPS)
>  	$(LINK) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) $(OPENAIS_LIBS) \
> diff --git a/ocfs2_controld/pacemaker.c b/ocfs2_controld/pacemaker.c
> index eaa861a..88c675a 100644
> --- a/ocfs2_controld/pacemaker.c
> +++ b/ocfs2_controld/pacemaker.c
> @@ -22,6 +22,7 @@
>  
>  #include <crm/crm.h>
>  #include <crm/common/cluster.h>
> +#include <fencing/stonithd_api.h>
>  
>  #include "ocfs2-kernel/kernel-list.h"
>  #include "o2cb/o2cb.h"
> @@ -35,6 +36,7 @@
>  
>  int			our_nodeid = 0;
>  static int		pcmk_ci;
> +static int		stonithd_ci;
>  static char *		clustername = "pacemaker";
>  extern struct list_head mounts;
>  const char *stackname = "pcmk";
> @@ -45,10 +47,21 @@ char *local_node_uname = NULL;
>  int kill_stack_node(int nodeid)
>  {
>  	int error = 1;
> +	stonith_ops_t st_op;
> +	char *target = nodeid2name(nodeid);
>  
> -	log_debug("killing node %d", nodeid);
> +	log_debug("killing node %d (aka. %s)", nodeid, target);
> +
> +	if(target) {
> +		st_op.timeout = 150;
> +		st_op.node_uuid = NULL;
> +		st_op.private_data = NULL;
> +		st_op.node_name = target;
> +		st_op.optype = POWEROFF;
> +
> +		error = stonithd_node_fence(&st_op);
> +	}
>  
> -	/* error = cman_kill_node(ch_admin, nodeid); */
>  	if (error)
>  		log_debug("Unable to kill node %d, %d %d", nodeid, error,
>  			  errno);
> @@ -108,6 +121,9 @@ static void dead_pcmk(int ci)
>  
>  void exit_stack(void)
>  {
> +	log_debug("closing stonithd connection");
> +	stonithd_signoff();
> +
>  	log_debug("closing pacemaker connection");
>  	if (ais_fd_async) {
>  		close(ais_fd_async);
> @@ -242,6 +258,120 @@ bail:
>  	return;
>  }
>  
> +static void dead_stonithd(int ci)
> +{
> +	if (ci != stonithd_ci) {
> +		log_error("Unknown connection %d", ci);
> +		return;
> +	}
> +
> +	log_error("stonithd connection died");
> +	shutdown_daemon();
> +	connection_dead(ci);
> +}
> +
> +static void process_stonithd(int ci)
> +{
> +	IPC_Channel *stonithd_ch = stonithd_input_IPC_channel();
> +
> +	while (stonithd_op_result_ready()) {
> +		if (stonithd_ch->ch_status != IPC_CONNECT) {
> +			/* The message which was pending for us is that
> +			 * the IPC status is now IPC_DISCONNECT */
> +			break;
> +		}
> +
> +		if (ST_FAIL == stonithd_receive_ops_result(FALSE)) {
> +			log_error("stonithd_receive_ops_result() failed");
> +		}
> +	}
> +
> +	if (stonithd_ch->ch_status != IPC_CONNECT)
> +		dead_stonithd(stonithd_ci);
> +}
> +
> +static void result_stonithd(stonith_ops_t *op)
> +{
> +	if (op == NULL) {
> +		log_error("Called with a NULL op!");
> +		return;
> +	}
> +	
> +	log_debug("Stonithd result: call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s",
> +		  op->call_id, op->optype, op->node_name, op->op_result,
> +		  (char *)op->node_list, op->private_data);
> +
> +	switch(op->op_result) {
> +		case STONITH_SUCCEEDED:
> +			break;
> +		case STONITH_CANNOT:
> +		case STONITH_TIMEOUT:
> +		case STONITH_GENERIC:
> +			log_error("Stonith of %s failed (%d)",
> +				  op->node_name, op->op_result);
> +			break;
> +		default:
> +			log_error("Unsupported action result: %d", op->op_result);
> +	}
> +}
> +
> +static gboolean setup_stonith(void)
> +{
> +	int lpc = 0;
> +	int rc = ST_OK;
> +	int stonithd_fd;
> +	const char *reason = NULL;
> +	IPC_Channel *stonithd_ch = NULL;
> +
> +	for(lpc = 0; lpc < 30; lpc++) {
> +		log_debug("Attempting connection to fencing daemon...");
> +
> +		sleep(1);
> +		rc = stonithd_signon("ocfs2-tools");
> +		if(rc == ST_OK)
> +			break;
> +
> +		log_error("Sign-in failed: pausing and trying again in 2s...");
> +		sleep(1);
> +	}
> +
> +	if(rc != ST_OK) {
> +		reason = "Sign-in failed";
> +		goto bail;
> +	}
> +
> +	rc = stonithd_set_stonith_ops_callback(result_stonithd);
> +	if(rc != ST_OK) {
> +		reason = "Setup failed";
> +		goto bail;
> +	}
> +
> +	stonithd_ch = stonithd_input_IPC_channel();
> +	if(stonithd_ch == NULL) {
> +		reason = "No connection";
> +		goto bail;
> +	}
> +	stonithd_fd = stonithd_ch->ops->get_recv_select_fd(stonithd_ch);
> +	if(stonithd_ch <= 0) {
> +		reason = "No fd";
> +		goto bail;
> +	}
> +
> +	stonithd_ci = connection_add(stonithd_fd, process_stonithd,
> +				     dead_stonithd);
> +	if (stonithd_ci < 0) {
> +		log_error("Unable to add stonithd client: %s",
> +			  strerror(-stonithd_ci));
> +		goto bail;
> +	}
> +
> +	return TRUE;
> +
> +bail:
> +	log_error("Unable to add stonithd client: %s", reason);
> +	return FALSE;
> +}
> +
>  int setup_stack(void)
>  {
>  	int retries = 0;
> @@ -250,6 +380,7 @@ int setup_stack(void)
>  	int rc = SA_AIS_OK;
>  	struct utsname name;
>  
> +	crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL);
>  	crm_peer_init();
>  
>  	if (local_node_uname == NULL) {
> @@ -300,7 +431,7 @@ retry:
>  	log_debug("Local node id: %d", our_nodeid);
>  
>  	pcmk_ci = connection_add(ais_fd_async, process_pcmk, dead_pcmk);
> -	if (pcmk_ci >= 0)
> +	if (pcmk_ci >= 0 && setup_stonith())
>  		return ais_fd_async;
>  
>  	log_error("Unable to add pacemaker client: %s", strerror(-pcmk_ci));
> -- 
> 1.5.4.1
> 

-- 

Life's Little Instruction Book #15

	"Own a great stereo system."

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127



More information about the Ocfs2-tools-devel mailing list