[Ocfs2-tools-commits] jlbec commits r1399 - in branches/cman-based:
o2cb_controld ocfs2_controld
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Mon Aug 20 14:03:46 PDT 2007
Author: jlbec
Date: 2007-08-20 14:03:44 -0700 (Mon, 20 Aug 2007)
New Revision: 1399
Modified:
branches/cman-based/o2cb_controld/action.c
branches/cman-based/o2cb_controld/main.c
branches/cman-based/o2cb_controld/member_cman.c
branches/cman-based/o2cb_controld/o2cb_controld.h
branches/cman-based/ocfs2_controld/main.c
Log:
Provide proper signal handling and exit path for o2cb_controld. This is
the same signal handling setup as ocfs2_controld.
As of right now, there is nothing preventing o2cb_controld from exiting
on a signal. We probably don't want it to exit unless ocfs2_controld is
already offline. We'll fix that later.
Modified: branches/cman-based/o2cb_controld/action.c
===================================================================
--- branches/cman-based/o2cb_controld/action.c 2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/action.c 2007-08-20 21:03:44 UTC (rev 1399)
@@ -95,46 +95,97 @@
return 0;
}
-static void finalize_nodes(void)
+static void del_configfs_node_full(const char *cluster, const char *name)
{
+ errcode_t err;
+
+ log_debug("del_configfs_node \"%s\"", name);
+
+ err = o2cb_del_node(cluster, name);
+ if (err) {
+ com_err(prog_name, err,
+ "while deleting node \"%s\" in cluster\"%s\"",
+ name, cluster);
+ }
+}
+
+void del_configfs_node(const char *name)
+{
+ del_configfs_node_full(cluster_name, name);
+}
+
+static void finalize_nodes(const char *cluster)
+{
int i;
char **nodes = NULL;
errcode_t err;
- err = o2cb_list_nodes(cluster_name, &nodes);
+ err = o2cb_list_nodes((char *)cluster, &nodes);
if (err) {
if (err != O2CB_ET_SERVICE_UNAVAILABLE) {
com_err(prog_name, err,
"while listing nodes for cluster \"%s\"",
- cluster_name);
+ cluster);
}
return;
}
for (i = 0; nodes && nodes[i] && &(nodes[i]); i++)
- del_configfs_node(nodes[i]);
+ del_configfs_node_full(cluster, nodes[i]);
o2cb_free_nodes_list(nodes);
}
-void finalize_cluster(void)
+/*
+ * This can be called with NULL and it will query cman for the name
+ */
+void finalize_cluster(const char *cluster)
{
errcode_t err;
- if (fill_cluster_name())
- return;
+ if (!cluster) {
+ if (fill_cluster_name())
+ return;
+ cluster = cluster_name;
+ }
- log_debug("Cleaning up cluster \"%s\"", cluster_name);
+ log_debug("Cleaning up cluster \"%s\"", cluster);
- finalize_nodes();
+ finalize_nodes(cluster);
- err = o2cb_remove_cluster(cluster_name);
+ err = o2cb_remove_cluster(cluster);
if (err && (err != O2CB_ET_SERVICE_UNAVAILABLE))
com_err(prog_name, err,
- "Unable to de-configure cluster \"%s\"",
- cluster_name);
+ "Unable to de-configure cluster \"%s\"", cluster);
}
+/*
+ * This is used during startup. CMan is not connected, so we'll use
+ * o2cb to find any stale clusters.
+ */
+void remove_stale_clusters(void)
+{
+ errcode_t err;
+ int i;
+ char **clusters;
+
+ err = o2cb_list_clusters(&clusters);
+ if (err) {
+ /*
+ * We shouldn't get SERVICE_UNAVAILABLE, as o2cb_init()
+ * would have failed.
+ */
+ com_err(prog_name, err,
+ "while trying to find any stale clusters");
+ return;
+ }
+
+ for (i = 0; clusters && clusters[i] && &(clusters[i]); i++)
+ finalize_cluster(clusters[i]);
+
+ o2cb_free_cluster_list(clusters);
+}
+
static int initialize_cluster(void)
{
static int initialized = 0;
@@ -187,28 +238,17 @@
return rv;
}
-void del_configfs_node(const char *name)
-{
- errcode_t err;
- log_debug("del_configfs_node \"%s\"", name);
- err = o2cb_del_node(cluster_name, name);
- if (err) {
- com_err(prog_name, err,
- "while deleting node \"%s\" in cluster\"%s\"",
- name, cluster_name);
- }
-}
-
-
void initialize_o2cb(void)
{
errcode_t err;
+ initialize_o2cb_error_table();
+
err = o2cb_init();
if (err) {
- com_err(prog_name, err, "Cannot initialize o2cb\n");
+ com_err(prog_name, err, "while initializing o2cb");
exit(EXIT_FAILURE);
}
}
Modified: branches/cman-based/o2cb_controld/main.c
===================================================================
--- branches/cman-based/o2cb_controld/main.c 2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/main.c 2007-08-20 21:03:44 UTC (rev 1399)
@@ -28,12 +28,121 @@
#define LOCKFILE_NAME "/var/run/o2cb_controld.pid"
static int member_fd;
+static int sigpipe_write_fd, sigpipe_fd;
-static void sigterm_handler(int sig)
+static void handler(int signum)
{
- finalize_cluster();
+ log_debug("Caught signal %d", signum);
+ if (write(sigpipe_write_fd, &signum, sizeof(signum)) < sizeof(signum))
+ log_error("Problem writing signal: %s\n", strerror(-errno));
}
+static int handle_signal(void)
+{
+ int rc, caught_sig, abortp = 0;
+ static int segv_already = 0;
+
+ rc = read(sigpipe_fd, (char *)&caught_sig, sizeof(caught_sig));
+ if (rc < 0) {
+ rc = -errno;
+ log_error("Error reading from signal pipe: %s",
+ strerror(-rc));
+ goto out;
+ }
+
+ if (rc != sizeof(caught_sig)) {
+ rc = -EIO;
+ log_error("Error reading from signal pipe: %s",
+ strerror(-rc));
+ goto out;
+ }
+
+ switch (caught_sig) {
+ case SIGQUIT:
+ abortp = 1;
+ /* FALL THROUGH */
+
+ case SIGTERM:
+ case SIGINT:
+ case SIGHUP:
+ log_error("Caught signal %d, exiting", caught_sig);
+ rc = 1;
+#if 0 /* XXX: We probably need some safety check like this */
+ if (list_empty(&mounts)) {
+ log_error("Caught signal %d, exiting",
+ caught_sig);
+ rc = 1;
+ } else {
+ log_error("Caught signal %d, but mounts exist. Ignoring.",
+ caught_sig);
+ rc = 0;
+ }
+#endif
+ break;
+
+ case SIGSEGV:
+ log_error("Segmentation fault, exiting");
+ rc = 1;
+ if (segv_already) {
+ log_error("Segmentation fault loop detected");
+ abortp = 1;
+ } else
+ segv_already = 1;
+ break;
+
+ default:
+ log_error("Caught signal %d, ignoring", caught_sig);
+ rc = 0;
+ break;
+ }
+
+ if (rc && abortp)
+ abort();
+
+out:
+ return rc;
+}
+
+static int setup_sigpipe(void)
+{
+ int rc;
+ int signal_pipe[2];
+ struct sigaction act;
+
+ rc = pipe(signal_pipe);
+ if (rc) {
+ rc = -errno;
+ log_error("Unable to set up signal pipe: %s",
+ strerror(-rc));
+ goto out;
+ }
+
+ sigpipe_fd = signal_pipe[0];
+ sigpipe_write_fd = signal_pipe[1];
+
+ act.sa_sigaction = NULL;
+ act.sa_restorer = NULL;
+ sigemptyset(&act.sa_mask);
+ act.sa_handler = handler;
+#ifdef SA_INTERRUPT
+ act.sa_flags = SA_INTERRUPT;
+#endif
+
+ rc += sigaction(SIGTERM, &act, NULL);
+ rc += sigaction(SIGINT, &act, NULL);
+ rc += sigaction(SIGHUP, &act, NULL);
+ rc += sigaction(SIGQUIT, &act, NULL);
+ rc += sigaction(SIGSEGV, &act, NULL);
+ act.sa_handler = SIG_IGN;
+ rc += sigaction(SIGPIPE, &act, NULL); /* Get EPIPE instead */
+
+ if (rc)
+ log_error("Unable to set up signal handlers");
+
+out:
+ return rc;
+}
+
static int loop(void)
{
struct pollfd *pollfd;
@@ -49,28 +158,40 @@
pollfd[0].fd = member_fd;
pollfd[0].events = POLLIN;
- maxi = 0;
+ rv = setup_sigpipe();
+ if (rv)
+ goto out;
+ pollfd[1].fd = sigpipe_fd;
+ pollfd[1].events = POLLIN;
+ maxi = 1;
+
for (;;) {
rv = poll(pollfd, maxi + 1, -1);
if (rv == -1 && errno == EINTR)
continue;
if (rv < 0) {
log_error("poll errno %d", errno);
- goto out;
+ goto stop;
}
for (i = 0; i <= maxi; i++) {
if (pollfd[i].revents & POLLIN) {
- if (pollfd[i].fd == member_fd)
- process_member();
+ if (pollfd[i].fd == member_fd) {
+ rv = process_member();
+ if (rv)
+ goto stop;
+ } else if (pollfd[i].fd == sigpipe_fd) {
+ rv = handle_signal();
+ if (rv)
+ goto stop;
+ }
}
if (pollfd[i].revents & POLLHUP) {
if (pollfd[i].fd == member_fd) {
log_error("cluster is down, exiting");
- finalize_cluster();
- exit(1);
+ goto stop;
}
log_debug("closing fd %d", pollfd[i].fd);
close(pollfd[i].fd);
@@ -78,7 +199,11 @@
}
}
rv = 0;
- out:
+
+stop:
+ finalize_cluster(NULL);
+
+out:
free(pollfd);
return rv;
}
@@ -239,11 +364,7 @@
if (!daemon_debug_opt)
daemonize();
- else
- signal(SIGINT, sigterm_handler);
- signal(SIGTERM, sigterm_handler);
-
set_scheduler();
set_oom_adj(-16);
@@ -253,11 +374,8 @@
* If this daemon was killed and the cluster shut down, and
* then the cluster brought back up and this daemon restarted,
* there will be old configfs entries we need to clear out.
- * XXX: This can't work becasue we don't have cman to talk to. We
- * need a cleanup that doesn't use cman_get_cluster(). Probably
- * needs to look at o2cb_list_clusters() and go from there.
*/
- finalize_cluster();
+ remove_stale_clusters();
return loop();
}
Modified: branches/cman-based/o2cb_controld/member_cman.c
===================================================================
--- branches/cman-based/o2cb_controld/member_cman.c 2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/member_cman.c 2007-08-20 21:03:44 UTC (rev 1399)
@@ -172,8 +172,7 @@
/* do we want to try to forcibly clean some stuff up
in the kernel here? */
log_error("cluster is down, exiting");
- finalize_cluster();
- exit(1);
+ return 1;
}
return 0;
}
Modified: branches/cman-based/o2cb_controld/o2cb_controld.h
===================================================================
--- branches/cman-based/o2cb_controld/o2cb_controld.h 2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/o2cb_controld.h 2007-08-20 21:03:44 UTC (rev 1399)
@@ -82,8 +82,9 @@
int add_configfs_node(const char *name, int nodeid, char *addr, int addrlen,
int local);
void del_configfs_node(const char *name);
-void finalize_cluster(void);
+void finalize_cluster(const char *name);
void initialize_o2cb(void);
+void remove_stale_clusters(void);
/* member_xxx.c */
int setup_member(void);
Modified: branches/cman-based/ocfs2_controld/main.c
===================================================================
--- branches/cman-based/ocfs2_controld/main.c 2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/ocfs2_controld/main.c 2007-08-20 21:03:44 UTC (rev 1399)
@@ -54,9 +54,9 @@
static void handler(int signum)
{
- log_debug("Caught signal %d\n", signum);
+ log_debug("Caught signal %d", signum);
if (write(sigpipe_write_fd, &signum, sizeof(signum)) < sizeof(signum))
- log_error("Problem writing signal: %s\n", strerror(-errno));
+ log_error("Problem writing signal: %s", strerror(-errno));
}
static int handle_signal(void)
More information about the Ocfs2-tools-commits
mailing list