[Ocfs2-tools-commits] jlbec commits r1399 - in branches/cman-based: o2cb_controld ocfs2_controld

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Mon Aug 20 14:03:46 PDT 2007


Author: jlbec
Date: 2007-08-20 14:03:44 -0700 (Mon, 20 Aug 2007)
New Revision: 1399

Modified:
   branches/cman-based/o2cb_controld/action.c
   branches/cman-based/o2cb_controld/main.c
   branches/cman-based/o2cb_controld/member_cman.c
   branches/cman-based/o2cb_controld/o2cb_controld.h
   branches/cman-based/ocfs2_controld/main.c
Log:

Provide proper signal handling and exit path for o2cb_controld.  This is
the same signal handling setup as ocfs2_controld.

As of right now, there is nothing preventing o2cb_controld from exiting
on a signal.  We probably don't want it to exit unless ocfs2_controld is
already offline.  We'll fix that later.



Modified: branches/cman-based/o2cb_controld/action.c
===================================================================
--- branches/cman-based/o2cb_controld/action.c	2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/action.c	2007-08-20 21:03:44 UTC (rev 1399)
@@ -95,46 +95,97 @@
 	return 0;
 }
 
-static void finalize_nodes(void)
+static void del_configfs_node_full(const char *cluster, const char *name)
 {
+	errcode_t err;
+
+	log_debug("del_configfs_node \"%s\"", name);
+
+	err = o2cb_del_node(cluster, name);
+	if (err) {
+		com_err(prog_name, err,
+			"while deleting node \"%s\" in cluster\"%s\"",
+			name, cluster);
+	}
+}
+
+void del_configfs_node(const char *name)
+{
+	del_configfs_node_full(cluster_name, name);
+}
+
+static void finalize_nodes(const char *cluster)
+{
 	int i;
 	char **nodes = NULL;
 	errcode_t err;
 
-	err = o2cb_list_nodes(cluster_name, &nodes);
+	err = o2cb_list_nodes((char *)cluster, &nodes);
 	if (err) {
 		if (err != O2CB_ET_SERVICE_UNAVAILABLE) {
 			com_err(prog_name, err,
 				"while listing nodes for cluster \"%s\"",
-				cluster_name);
+				cluster);
 		}
 		return;
 	}
 
 	for (i = 0; nodes && nodes[i] && &(nodes[i]); i++)
-		del_configfs_node(nodes[i]);
+		del_configfs_node_full(cluster, nodes[i]);
 
 	o2cb_free_nodes_list(nodes);
 }
 
-void finalize_cluster(void)
+/*
+ * This can be called with NULL and it will query cman for the name
+ */
+void finalize_cluster(const char *cluster)
 {
 	errcode_t err;
 
-	if (fill_cluster_name())
-		return;
+	if (!cluster) {
+		if (fill_cluster_name())
+			return;
+		cluster = cluster_name;
+	}
 
-	log_debug("Cleaning up cluster \"%s\"", cluster_name);
+	log_debug("Cleaning up cluster \"%s\"", cluster);
 
-	finalize_nodes();
+	finalize_nodes(cluster);
 
-	err = o2cb_remove_cluster(cluster_name);
+	err = o2cb_remove_cluster(cluster);
 	if (err && (err != O2CB_ET_SERVICE_UNAVAILABLE))
 		com_err(prog_name, err,
-			"Unable to de-configure cluster \"%s\"",
-			cluster_name);
+			"Unable to de-configure cluster \"%s\"", cluster);
 }
 
+/*
+ * This is used during startup.  CMan is not connected, so we'll use
+ * o2cb to find any stale clusters.
+ */
+void remove_stale_clusters(void)
+{
+	errcode_t err;
+	int i;
+	char **clusters;
+
+	err = o2cb_list_clusters(&clusters);
+	if (err) {
+		/*
+		 * We shouldn't get SERVICE_UNAVAILABLE, as o2cb_init()
+		 * would have failed.
+		 */
+		com_err(prog_name, err,
+			"while trying to find any stale clusters");
+		return;
+	}
+
+	for (i = 0; clusters && clusters[i] && &(clusters[i]); i++)
+		finalize_cluster(clusters[i]);
+
+	o2cb_free_cluster_list(clusters);
+}
+
 static int initialize_cluster(void)
 {
 	static int initialized = 0;
@@ -187,28 +238,17 @@
 	return rv;
 }
 
-void del_configfs_node(const char *name)
-{
-	errcode_t err;
 
-	log_debug("del_configfs_node \"%s\"", name);
 
-	err = o2cb_del_node(cluster_name, name);
-	if (err) {
-		com_err(prog_name, err,
-			"while deleting node \"%s\" in cluster\"%s\"",
-			name, cluster_name);
-	}
-}
-
-
 void initialize_o2cb(void)
 {
 	errcode_t err;
 
+	initialize_o2cb_error_table();
+
 	err = o2cb_init();
 	if (err) {
-		com_err(prog_name, err, "Cannot initialize o2cb\n");
+		com_err(prog_name, err, "while initializing o2cb");
 		exit(EXIT_FAILURE);
 	}
 }

Modified: branches/cman-based/o2cb_controld/main.c
===================================================================
--- branches/cman-based/o2cb_controld/main.c	2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/main.c	2007-08-20 21:03:44 UTC (rev 1399)
@@ -28,12 +28,121 @@
 #define LOCKFILE_NAME			"/var/run/o2cb_controld.pid"
 
 static int member_fd;
+static int sigpipe_write_fd, sigpipe_fd;
 
-static void sigterm_handler(int sig)
+static void handler(int signum)
 {
-	finalize_cluster();
+	log_debug("Caught signal %d", signum);
+	if (write(sigpipe_write_fd, &signum, sizeof(signum)) < sizeof(signum))
+		log_error("Problem writing signal: %s\n", strerror(-errno));
 }
 
+static int handle_signal(void)
+{
+	int rc, caught_sig, abortp = 0;
+	static int segv_already = 0;
+
+	rc = read(sigpipe_fd, (char *)&caught_sig, sizeof(caught_sig));
+	if (rc < 0) {
+		rc = -errno;
+		log_error("Error reading from signal pipe: %s",
+			  strerror(-rc));
+		goto out;
+	}
+
+	if (rc != sizeof(caught_sig)) {
+		rc = -EIO;
+		log_error("Error reading from signal pipe: %s",
+			  strerror(-rc));
+		goto out;
+	}
+
+	switch (caught_sig) {
+		case SIGQUIT:
+			abortp = 1;
+			/* FALL THROUGH */
+
+		case SIGTERM:
+		case SIGINT:
+		case SIGHUP:
+			log_error("Caught signal %d, exiting", caught_sig);
+			rc = 1;
+#if 0  /* XXX: We probably need some safety check like this */
+			if (list_empty(&mounts)) {
+				log_error("Caught signal %d, exiting",
+					  caught_sig);
+				rc = 1;
+			} else {
+				log_error("Caught signal %d, but mounts exist.  Ignoring.",
+					  caught_sig);
+				rc = 0;
+			}
+#endif
+			break;
+
+		case SIGSEGV:
+			log_error("Segmentation fault, exiting");
+			rc = 1;
+			if (segv_already) {
+				log_error("Segmentation fault loop detected");
+				abortp = 1;
+			} else
+				segv_already = 1;
+			break;
+
+		default:
+			log_error("Caught signal %d, ignoring", caught_sig);
+			rc = 0;
+			break;
+	}
+
+	if (rc && abortp)
+		abort();
+
+out:
+	return rc;
+}
+
+static int setup_sigpipe(void)
+{
+	int rc;
+	int signal_pipe[2];
+	struct sigaction act;
+
+	rc = pipe(signal_pipe);
+	if (rc) {
+		rc = -errno;
+		log_error("Unable to set up signal pipe: %s",
+			  strerror(-rc));
+		goto out;
+	}
+
+	sigpipe_fd = signal_pipe[0];
+	sigpipe_write_fd = signal_pipe[1];
+
+	act.sa_sigaction = NULL;
+	act.sa_restorer = NULL;
+	sigemptyset(&act.sa_mask);
+	act.sa_handler = handler;
+#ifdef SA_INTERRUPT
+	act.sa_flags = SA_INTERRUPT;
+#endif
+
+	rc += sigaction(SIGTERM, &act, NULL);
+	rc += sigaction(SIGINT, &act, NULL);
+	rc += sigaction(SIGHUP, &act, NULL);
+	rc += sigaction(SIGQUIT, &act, NULL);
+	rc += sigaction(SIGSEGV, &act, NULL);
+	act.sa_handler = SIG_IGN;
+	rc += sigaction(SIGPIPE, &act, NULL);  /* Get EPIPE instead */
+
+	if (rc)
+		log_error("Unable to set up signal handlers");
+
+out:
+	return rc;
+}
+
 static int loop(void)
 {
 	struct pollfd *pollfd;
@@ -49,28 +158,40 @@
 	pollfd[0].fd = member_fd;
 	pollfd[0].events = POLLIN;
 
-	maxi = 0;
+	rv = setup_sigpipe();
+	if (rv)
+		goto out;
+	pollfd[1].fd = sigpipe_fd;
+	pollfd[1].events = POLLIN;
 
+	maxi = 1;
+
 	for (;;) {
 		rv = poll(pollfd, maxi + 1, -1);
 		if (rv == -1 && errno == EINTR)
 			continue;
 		if (rv < 0) {
 			log_error("poll errno %d", errno);
-			goto out;
+			goto stop;
 		}
 
 		for (i = 0; i <= maxi; i++) {
 			if (pollfd[i].revents & POLLIN) {
-				if (pollfd[i].fd == member_fd)
-					process_member();
+				if (pollfd[i].fd == member_fd) {
+					rv = process_member();
+					if (rv)
+						goto stop;
+				} else if (pollfd[i].fd == sigpipe_fd) {
+					rv = handle_signal();
+					if (rv)
+						goto stop;
+				}
 			}
 
 			if (pollfd[i].revents & POLLHUP) {
 				if (pollfd[i].fd == member_fd) {
 					log_error("cluster is down, exiting");
-					finalize_cluster();
-					exit(1);
+					goto stop;
 				}
 				log_debug("closing fd %d", pollfd[i].fd);
 				close(pollfd[i].fd);
@@ -78,7 +199,11 @@
 		}
 	}
 	rv = 0;
- out:
+
+stop:
+	finalize_cluster(NULL);
+
+out:
 	free(pollfd);
 	return rv;
 }
@@ -239,11 +364,7 @@
 
 	if (!daemon_debug_opt)
 		daemonize();
-        else
-                signal(SIGINT, sigterm_handler);
 
-	signal(SIGTERM, sigterm_handler);
-
 	set_scheduler();
 	set_oom_adj(-16);
 
@@ -253,11 +374,8 @@
 	 * If this daemon was killed and the cluster shut down, and
 	 * then the cluster brought back up and this daemon restarted,
 	 * there will be old configfs entries we need to clear out.
-	 * XXX: This can't work becasue we don't have cman to talk to.  We
-	 * need a cleanup that doesn't use cman_get_cluster().  Probably
-	 * needs to look at o2cb_list_clusters() and go from there.
 	 */
-	finalize_cluster();
+	remove_stale_clusters();
 
 	return loop();
 }

Modified: branches/cman-based/o2cb_controld/member_cman.c
===================================================================
--- branches/cman-based/o2cb_controld/member_cman.c	2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/member_cman.c	2007-08-20 21:03:44 UTC (rev 1399)
@@ -172,8 +172,7 @@
 		/* do we want to try to forcibly clean some stuff up
 		   in the kernel here? */
 		log_error("cluster is down, exiting");
-		finalize_cluster();
-		exit(1);
+		return 1;
 	}
 	return 0;
 }

Modified: branches/cman-based/o2cb_controld/o2cb_controld.h
===================================================================
--- branches/cman-based/o2cb_controld/o2cb_controld.h	2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/o2cb_controld/o2cb_controld.h	2007-08-20 21:03:44 UTC (rev 1399)
@@ -82,8 +82,9 @@
 int add_configfs_node(const char *name, int nodeid, char *addr, int addrlen,
                       int local);
 void del_configfs_node(const char *name);
-void finalize_cluster(void);
+void finalize_cluster(const char *name);
 void initialize_o2cb(void);
+void remove_stale_clusters(void);
 
 /* member_xxx.c */
 int setup_member(void);

Modified: branches/cman-based/ocfs2_controld/main.c
===================================================================
--- branches/cman-based/ocfs2_controld/main.c	2007-08-20 20:27:48 UTC (rev 1398)
+++ branches/cman-based/ocfs2_controld/main.c	2007-08-20 21:03:44 UTC (rev 1399)
@@ -54,9 +54,9 @@
 
 static void handler(int signum)
 {
-	log_debug("Caught signal %d\n", signum);
+	log_debug("Caught signal %d", signum);
 	if (write(sigpipe_write_fd, &signum, sizeof(signum)) < sizeof(signum))
-		log_error("Problem writing signal: %s\n", strerror(-errno));
+		log_error("Problem writing signal: %s", strerror(-errno));
 }
 
 static int handle_signal(void)




More information about the Ocfs2-tools-commits mailing list