[Ocfs2-tools-devel] [PATCH 3/3] ocfs2_controld.pcmk: Complete implementation of kill_stack_node()
Joel Becker
Joel.Becker at oracle.com
Thu Aug 21 14:55:47 PDT 2008
On Thu, Aug 21, 2008 at 02:34:19PM -0700, Mark Fasheh wrote:
> On Thu, Aug 21, 2008 at 02:21:08PM -0700, Joel Becker wrote:
> > On Thu, Aug 21, 2008 at 10:15:05AM -0700, Mark Fasheh wrote:
> > > This plugs ocfs2_controld.pcmk into the pacemaker-provided stonith library,
> > > so that kill_stack_node() can initiate fencing of a misbehaving node.
> > >
> > > Signed-off-by: Andrew Beekhof <abeekhof at suse.de>
> > > Signed-off-by: Mark Fasheh <mfasheh at suse.com>
> >
> > I like this! One comment:
> >
> > > + stonithd_ci = connection_add(stonithd_fd, process_stonithd,
> > > + dead_stonithd);
> > > + return TRUE;
> >
> > connection_add() can return an error (ci<0). That needs to be
> > checked.
>
> Ahh, right you are. Updated patch follows.
> --Mark
>
> --
> Mark Fasheh
>
> From: Andrew Beekhof <abeekhof at suse.de>
>
> [PATCH] ocfs2_controld.pcmk: Complete implementation of kill_stack_node()
>
> This plugs ocfs2_controld.pcmk into the pacemaker-provided stonith library,
> so that kill_stack_node() can initiate fencing of a misbehaving node.
>
> Signed-off-by: Andrew Beekhof <abeekhof at suse.de>
> Signed-off-by: Mark Fasheh <mfasheh at suse.com>
Sobby!
> ---
> ocfs2_controld/Makefile | 2 +-
> ocfs2_controld/pacemaker.c | 137 +++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 135 insertions(+), 4 deletions(-)
>
> diff --git a/ocfs2_controld/Makefile b/ocfs2_controld/Makefile
> index a7bc98a..6c88c74 100644
> --- a/ocfs2_controld/Makefile
> +++ b/ocfs2_controld/Makefile
> @@ -66,7 +66,7 @@ DIST_FILES = \
> ocfs2_controld.pcmk: $(PCMK_DAEMON_OBJS) $(LIBO2CB_DEPS)
> $(LINK) $(GLIB_LIBS) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) \
> $(OPENAIS_LIBS) $(COROSYNC_LIBS) \
> - $(DLMCONTROL_LIBS) -lcrmcluster
> + $(DLMCONTROL_LIBS) -lcrmcluster -lstonithd
>
> ocfs2_controld.cman: $(CMAN_DAEMON_OBJS) $(LIBO2CB_DEPS)
> $(LINK) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) $(OPENAIS_LIBS) \
> diff --git a/ocfs2_controld/pacemaker.c b/ocfs2_controld/pacemaker.c
> index eaa861a..88c675a 100644
> --- a/ocfs2_controld/pacemaker.c
> +++ b/ocfs2_controld/pacemaker.c
> @@ -22,6 +22,7 @@
>
> #include <crm/crm.h>
> #include <crm/common/cluster.h>
> +#include <fencing/stonithd_api.h>
>
> #include "ocfs2-kernel/kernel-list.h"
> #include "o2cb/o2cb.h"
> @@ -35,6 +36,7 @@
>
> int our_nodeid = 0;
> static int pcmk_ci;
> +static int stonithd_ci;
> static char * clustername = "pacemaker";
> extern struct list_head mounts;
> const char *stackname = "pcmk";
> @@ -45,10 +47,21 @@ char *local_node_uname = NULL;
> int kill_stack_node(int nodeid)
> {
> int error = 1;
> + stonith_ops_t st_op;
> + char *target = nodeid2name(nodeid);
>
> - log_debug("killing node %d", nodeid);
> + log_debug("killing node %d (aka. %s)", nodeid, target);
> +
> + if(target) {
> + st_op.timeout = 150;
> + st_op.node_uuid = NULL;
> + st_op.private_data = NULL;
> + st_op.node_name = target;
> + st_op.optype = POWEROFF;
> +
> + error = stonithd_node_fence(&st_op);
> + }
>
> - /* error = cman_kill_node(ch_admin, nodeid); */
> if (error)
> log_debug("Unable to kill node %d, %d %d", nodeid, error,
> errno);
> @@ -108,6 +121,9 @@ static void dead_pcmk(int ci)
>
> void exit_stack(void)
> {
> + log_debug("closing stonithd connection");
> + stonithd_signoff();
> +
> log_debug("closing pacemaker connection");
> if (ais_fd_async) {
> close(ais_fd_async);
> @@ -242,6 +258,120 @@ bail:
> return;
> }
>
> +static void dead_stonithd(int ci)
> +{
> + if (ci != stonithd_ci) {
> + log_error("Unknown connection %d", ci);
> + return;
> + }
> +
> + log_error("stonithd connection died");
> + shutdown_daemon();
> + connection_dead(ci);
> +}
> +
> +static void process_stonithd(int ci)
> +{
> + IPC_Channel *stonithd_ch = stonithd_input_IPC_channel();
> +
> + while (stonithd_op_result_ready()) {
> + if (stonithd_ch->ch_status != IPC_CONNECT) {
> + /* The message which was pending for us is that
> + * the IPC status is now IPC_DISCONNECT */
> + break;
> + }
> +
> + if (ST_FAIL == stonithd_receive_ops_result(FALSE)) {
> + log_error("stonithd_receive_ops_result() failed");
> + }
> + }
> +
> + if (stonithd_ch->ch_status != IPC_CONNECT)
> + dead_stonithd(stonithd_ci);
> +}
> +
> +static void result_stonithd(stonith_ops_t *op)
> +{
> + if (op == NULL) {
> + log_error("Called with a NULL op!");
> + return;
> + }
> +
> + log_debug("Stonithd result: call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s",
> + op->call_id, op->optype, op->node_name, op->op_result,
> + (char *)op->node_list, op->private_data);
> +
> + switch(op->op_result) {
> + case STONITH_SUCCEEDED:
> + break;
> + case STONITH_CANNOT:
> + case STONITH_TIMEOUT:
> + case STONITH_GENERIC:
> + log_error("Stonith of %s failed (%d)",
> + op->node_name, op->op_result);
> + break;
> + default:
> + log_error("Unsupported action result: %d", op->op_result);
> + }
> +}
> +
> +static gboolean setup_stonith(void)
> +{
> + int lpc = 0;
> + int rc = ST_OK;
> + int stonithd_fd;
> + const char *reason = NULL;
> + IPC_Channel *stonithd_ch = NULL;
> +
> + for(lpc = 0; lpc < 30; lpc++) {
> + log_debug("Attempting connection to fencing daemon...");
> +
> + sleep(1);
> + rc = stonithd_signon("ocfs2-tools");
> + if(rc == ST_OK)
> + break;
> +
> + log_error("Sign-in failed: pausing and trying again in 2s...");
> + sleep(1);
> + }
> +
> + if(rc != ST_OK) {
> + reason = "Sign-in failed";
> + goto bail;
> + }
> +
> + rc = stonithd_set_stonith_ops_callback(result_stonithd);
> + if(rc != ST_OK) {
> + reason = "Setup failed";
> + goto bail;
> + }
> +
> + stonithd_ch = stonithd_input_IPC_channel();
> + if(stonithd_ch == NULL) {
> + reason = "No connection";
> + goto bail;
> + }
> + stonithd_fd = stonithd_ch->ops->get_recv_select_fd(stonithd_ch);
> + if(stonithd_ch <= 0) {
> + reason = "No fd";
> + goto bail;
> + }
> +
> + stonithd_ci = connection_add(stonithd_fd, process_stonithd,
> + dead_stonithd);
> + if (stonithd_ci < 0) {
> + log_error("Unable to add stonithd client: %s",
> + strerror(-stonithd_ci));
> + goto bail;
> + }
> +
> + return TRUE;
> +
> +bail:
> + log_error("Unable to add stonithd client: %s", reason);
> + return FALSE;
> +}
> +
> int setup_stack(void)
> {
> int retries = 0;
> @@ -250,6 +380,7 @@ int setup_stack(void)
> int rc = SA_AIS_OK;
> struct utsname name;
>
> + crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL);
> crm_peer_init();
>
> if (local_node_uname == NULL) {
> @@ -300,7 +431,7 @@ retry:
> log_debug("Local node id: %d", our_nodeid);
>
> pcmk_ci = connection_add(ais_fd_async, process_pcmk, dead_pcmk);
> - if (pcmk_ci >= 0)
> + if (pcmk_ci >= 0 && setup_stonith())
> return ais_fd_async;
>
> log_error("Unable to add pacemaker client: %s", strerror(-pcmk_ci));
> --
> 1.5.4.1
>
--
Life's Little Instruction Book #15
"Own a great stereo system."
Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127
More information about the Ocfs2-tools-devel
mailing list