[Ocfs2-tools-devel] [PATCH 3/3] ocfs2_controld.pcmk: Complete implementation of kill_stack_node()
Mark Fasheh
mfasheh at suse.com
Thu Aug 21 10:15:05 PDT 2008
This plugs ocfs2_controld.pcmk into the pacemaker-provided stonith library,
so that kill_stack_node() can initiate fencing of a misbehaving node.
Signed-off-by: Andrew Beekhof <abeekhof at suse.de>
Signed-off-by: Mark Fasheh <mfasheh at suse.com>
---
ocfs2_controld/Makefile | 2 +-
ocfs2_controld/pacemaker.c | 131 +++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 129 insertions(+), 4 deletions(-)
diff --git a/ocfs2_controld/Makefile b/ocfs2_controld/Makefile
index a7bc98a..6c88c74 100644
--- a/ocfs2_controld/Makefile
+++ b/ocfs2_controld/Makefile
@@ -66,7 +66,7 @@ DIST_FILES = \
ocfs2_controld.pcmk: $(PCMK_DAEMON_OBJS) $(LIBO2CB_DEPS)
$(LINK) $(GLIB_LIBS) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) \
$(OPENAIS_LIBS) $(COROSYNC_LIBS) \
- $(DLMCONTROL_LIBS) -lcrmcluster
+ $(DLMCONTROL_LIBS) -lcrmcluster -lstonithd
ocfs2_controld.cman: $(CMAN_DAEMON_OBJS) $(LIBO2CB_DEPS)
$(LINK) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) $(OPENAIS_LIBS) \
diff --git a/ocfs2_controld/pacemaker.c b/ocfs2_controld/pacemaker.c
index eaa861a..246e1df 100644
--- a/ocfs2_controld/pacemaker.c
+++ b/ocfs2_controld/pacemaker.c
@@ -22,6 +22,7 @@
#include <crm/crm.h>
#include <crm/common/cluster.h>
+#include <fencing/stonithd_api.h>
#include "ocfs2-kernel/kernel-list.h"
#include "o2cb/o2cb.h"
@@ -35,6 +36,7 @@
int our_nodeid = 0;
static int pcmk_ci;
+static int stonithd_ci;
static char * clustername = "pacemaker";
extern struct list_head mounts;
const char *stackname = "pcmk";
@@ -45,10 +47,21 @@ char *local_node_uname = NULL;
int kill_stack_node(int nodeid)
{
int error = 1;
+ stonith_ops_t st_op;
+ char *target = nodeid2name(nodeid);
- log_debug("killing node %d", nodeid);
+ log_debug("killing node %d (aka. %s)", nodeid, target);
+
+ if(target) {
+ st_op.timeout = 150;
+ st_op.node_uuid = NULL;
+ st_op.private_data = NULL;
+ st_op.node_name = target;
+ st_op.optype = POWEROFF;
+
+ error = stonithd_node_fence(&st_op);
+ }
- /* error = cman_kill_node(ch_admin, nodeid); */
if (error)
log_debug("Unable to kill node %d, %d %d", nodeid, error,
errno);
@@ -108,6 +121,9 @@ static void dead_pcmk(int ci)
void exit_stack(void)
{
+ log_debug("closing stonithd connection");
+ stonithd_signoff();
+
log_debug("closing pacemaker connection");
if (ais_fd_async) {
close(ais_fd_async);
@@ -242,6 +258,114 @@ bail:
return;
}
+static void dead_stonithd(int ci)
+{
+ if (ci != stonithd_ci) {
+ log_error("Unknown connection %d", ci);
+ return;
+ }
+
+ log_error("stonithd connection died");
+ shutdown_daemon();
+ connection_dead(ci);
+}
+
+static void process_stonithd(int ci)
+{
+ IPC_Channel *stonithd_ch = stonithd_input_IPC_channel();
+
+ while (stonithd_op_result_ready()) {
+ if (stonithd_ch->ch_status != IPC_CONNECT) {
+ /* The message which was pending for us is that
+ * the IPC status is now IPC_DISCONNECT */
+ break;
+ }
+
+ if (ST_FAIL == stonithd_receive_ops_result(FALSE)) {
+ log_error("stonithd_receive_ops_result() failed");
+ }
+ }
+
+ if (stonithd_ch->ch_status != IPC_CONNECT)
+ dead_stonithd(stonithd_ci);
+}
+
+static void result_stonithd(stonith_ops_t *op)
+{
+ if (op == NULL) {
+ log_error("Called with a NULL op!");
+ return;
+ }
+
+ log_debug("Stonithd result: call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s",
+ op->call_id, op->optype, op->node_name, op->op_result,
+ (char *)op->node_list, op->private_data);
+
+ switch(op->op_result) {
+ case STONITH_SUCCEEDED:
+ break;
+ case STONITH_CANNOT:
+ case STONITH_TIMEOUT:
+ case STONITH_GENERIC:
+ log_error("Stonith of %s failed (%d)",
+ op->node_name, op->op_result);
+ break;
+ default:
+ log_error("Unsupported action result: %d", op->op_result);
+ }
+}
+
+static gboolean setup_stonith(void)
+{
+ int lpc = 0;
+ int rc = ST_OK;
+ int stonithd_fd;
+ const char *reason = NULL;
+ IPC_Channel *stonithd_ch = NULL;
+
+ for(lpc = 0; lpc < 30; lpc++) {
+ log_debug("Attempting connection to fencing daemon...");
+
+ sleep(1);
+ rc = stonithd_signon("ocfs2-tools");
+ if(rc == ST_OK)
+ break;
+
+ log_error("Sign-in failed: pausing and trying again in 2s...");
+ sleep(1);
+ }
+
+ if(rc != ST_OK) {
+ reason = "Sign-in failed";
+ goto bail;
+ }
+
+ rc = stonithd_set_stonith_ops_callback(result_stonithd);
+ if(rc != ST_OK) {
+ reason = "Setup failed";
+ goto bail;
+ }
+
+ stonithd_ch = stonithd_input_IPC_channel();
+ if(stonithd_ch == NULL) {
+ reason = "No connection";
+ goto bail;
+ }
+ stonithd_fd = stonithd_ch->ops->get_recv_select_fd(stonithd_ch);
+ if(stonithd_ch <= 0) {
+ reason = "No fd";
+ goto bail;
+ }
+
+ stonithd_ci = connection_add(stonithd_fd, process_stonithd,
+ dead_stonithd);
+ return TRUE;
+
+bail:
+ log_error("Unable to add stonithd client: %s", reason);
+ return FALSE;
+}
+
int setup_stack(void)
{
int retries = 0;
@@ -250,6 +374,7 @@ int setup_stack(void)
int rc = SA_AIS_OK;
struct utsname name;
+ crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL);
crm_peer_init();
if (local_node_uname == NULL) {
@@ -300,7 +425,7 @@ retry:
log_debug("Local node id: %d", our_nodeid);
pcmk_ci = connection_add(ais_fd_async, process_pcmk, dead_pcmk);
- if (pcmk_ci >= 0)
+ if (pcmk_ci >= 0 && setup_stonith())
return ais_fd_async;
log_error("Unable to add pacemaker client: %s", strerror(-pcmk_ci));
--
1.5.4.1
More information about the Ocfs2-tools-devel
mailing list