[Ocfs2-tools-devel] [PATCH 3/3] ocfs2_controld.pcmk: Complete implementation of kill_stack_node()

Mark Fasheh mfasheh at suse.com
Thu Aug 21 10:15:05 PDT 2008


This plugs ocfs2_controld.pcmk into the pacemaker-provided stonith library,
so that kill_stack_node() can initiate fencing of a misbehaving node.

Signed-off-by: Andrew Beekhof <abeekhof at suse.de>
Signed-off-by: Mark Fasheh <mfasheh at suse.com>
---
 ocfs2_controld/Makefile    |    2 +-
 ocfs2_controld/pacemaker.c |  131 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/ocfs2_controld/Makefile b/ocfs2_controld/Makefile
index a7bc98a..6c88c74 100644
--- a/ocfs2_controld/Makefile
+++ b/ocfs2_controld/Makefile
@@ -66,7 +66,7 @@ DIST_FILES =				\
 ocfs2_controld.pcmk: $(PCMK_DAEMON_OBJS) $(LIBO2CB_DEPS)
 	$(LINK) $(GLIB_LIBS) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) \
 		$(OPENAIS_LIBS) $(COROSYNC_LIBS) \
-		$(DLMCONTROL_LIBS) -lcrmcluster
+		$(DLMCONTROL_LIBS) -lcrmcluster -lstonithd
 
 ocfs2_controld.cman: $(CMAN_DAEMON_OBJS) $(LIBO2CB_DEPS)
 	$(LINK) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) $(OPENAIS_LIBS) \
diff --git a/ocfs2_controld/pacemaker.c b/ocfs2_controld/pacemaker.c
index eaa861a..246e1df 100644
--- a/ocfs2_controld/pacemaker.c
+++ b/ocfs2_controld/pacemaker.c
@@ -22,6 +22,7 @@
 
 #include <crm/crm.h>
 #include <crm/common/cluster.h>
+#include <fencing/stonithd_api.h>
 
 #include "ocfs2-kernel/kernel-list.h"
 #include "o2cb/o2cb.h"
@@ -35,6 +36,7 @@
 
 int			our_nodeid = 0;
 static int		pcmk_ci;
+static int		stonithd_ci;
 static char *		clustername = "pacemaker";
 extern struct list_head mounts;
 const char *stackname = "pcmk";
@@ -45,10 +47,21 @@ char *local_node_uname = NULL;
 int kill_stack_node(int nodeid)
 {
 	int error = 1;
+	stonith_ops_t st_op;
+	char *target = nodeid2name(nodeid);
 
-	log_debug("killing node %d", nodeid);
+	log_debug("killing node %d (aka. %s)", nodeid, target);
+
+	if(target) {
+		st_op.timeout = 150;
+		st_op.node_uuid = NULL;
+		st_op.private_data = NULL;
+		st_op.node_name = target;
+		st_op.optype = POWEROFF;
+
+		error = stonithd_node_fence(&st_op);
+	}
 
-	/* error = cman_kill_node(ch_admin, nodeid); */
 	if (error)
 		log_debug("Unable to kill node %d, %d %d", nodeid, error,
 			  errno);
@@ -108,6 +121,9 @@ static void dead_pcmk(int ci)
 
 void exit_stack(void)
 {
+	log_debug("closing stonithd connection");
+	stonithd_signoff();
+
 	log_debug("closing pacemaker connection");
 	if (ais_fd_async) {
 		close(ais_fd_async);
@@ -242,6 +258,114 @@ bail:
 	return;
 }
 
+static void dead_stonithd(int ci)
+{
+	if (ci != stonithd_ci) {
+		log_error("Unknown connection %d", ci);
+		return;
+	}
+
+	log_error("stonithd connection died");
+	shutdown_daemon();
+	connection_dead(ci);
+}
+
+static void process_stonithd(int ci)
+{
+	IPC_Channel *stonithd_ch = stonithd_input_IPC_channel();
+
+	while (stonithd_op_result_ready()) {
+		if (stonithd_ch->ch_status != IPC_CONNECT) {
+			/* The message which was pending for us is that
+			 * the IPC status is now IPC_DISCONNECT */
+			break;
+		}
+
+		if (ST_FAIL == stonithd_receive_ops_result(FALSE)) {
+			log_error("stonithd_receive_ops_result() failed");
+		}
+	}
+
+	if (stonithd_ch->ch_status != IPC_CONNECT)
+		dead_stonithd(stonithd_ci);
+}
+
+static void result_stonithd(stonith_ops_t *op)
+{
+	if (op == NULL) {
+		log_error("Called with a NULL op!");
+		return;
+	}
+	
+	log_debug("Stonithd result: call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s",
+		  op->call_id, op->optype, op->node_name, op->op_result,
+		  (char *)op->node_list, op->private_data);
+
+	switch(op->op_result) {
+		case STONITH_SUCCEEDED:
+			break;
+		case STONITH_CANNOT:
+		case STONITH_TIMEOUT:
+		case STONITH_GENERIC:
+			log_error("Stonith of %s failed (%d)",
+				  op->node_name, op->op_result);
+			break;
+		default:
+			log_error("Unsupported action result: %d", op->op_result);
+	}
+}
+
+static gboolean setup_stonith(void)
+{
+	int lpc = 0;
+	int rc = ST_OK;
+	int stonithd_fd;
+	const char *reason = NULL;
+	IPC_Channel *stonithd_ch = NULL;
+
+	for(lpc = 0; lpc < 30; lpc++) {
+		log_debug("Attempting connection to fencing daemon...");
+
+		sleep(1);
+		rc = stonithd_signon("ocfs2-tools");
+		if(rc == ST_OK)
+			break;
+
+		log_error("Sign-in failed: pausing and trying again in 2s...");
+		sleep(1);
+	}
+
+	if(rc != ST_OK) {
+		reason = "Sign-in failed";
+		goto bail;
+	}
+
+	rc = stonithd_set_stonith_ops_callback(result_stonithd);
+	if(rc != ST_OK) {
+		reason = "Setup failed";
+		goto bail;
+	}
+
+	stonithd_ch = stonithd_input_IPC_channel();
+	if(stonithd_ch == NULL) {
+		reason = "No connection";
+		goto bail;
+	}
+	stonithd_fd = stonithd_ch->ops->get_recv_select_fd(stonithd_ch);
+	if(stonithd_ch <= 0) {
+		reason = "No fd";
+		goto bail;
+	}
+
+	stonithd_ci = connection_add(stonithd_fd, process_stonithd,
+				     dead_stonithd);	
+	return TRUE;
+
+bail:
+	log_error("Unable to add stonithd client: %s", reason);
+	return FALSE;
+}
+
 int setup_stack(void)
 {
 	int retries = 0;
@@ -250,6 +374,7 @@ int setup_stack(void)
 	int rc = SA_AIS_OK;
 	struct utsname name;
 
+	crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL);
 	crm_peer_init();
 
 	if (local_node_uname == NULL) {
@@ -300,7 +425,7 @@ retry:
 	log_debug("Local node id: %d", our_nodeid);
 
 	pcmk_ci = connection_add(ais_fd_async, process_pcmk, dead_pcmk);
-	if (pcmk_ci >= 0)
+	if (pcmk_ci >= 0 && setup_stonith())
 		return ais_fd_async;
 
 	log_error("Unable to add pacemaker client: %s", strerror(-pcmk_ci));
-- 
1.5.4.1




More information about the Ocfs2-tools-devel mailing list