[Ocfs2-tools-devel] [PATCH 20/39] ocfs2_controld: Leave groups

Fri Mar 14 16:52:43 PDT 2008

The daemon now handles leaving a group.  When a umount request is
recieved, it will leave the matching group.  When we are notified that
we've left, we'll tell the mount code.

In turn, the mount code handles normal and abnormal leaves.  In the
abnormal case, it will drop all nodes and then drop the group.

This allows us to reinstate bail_on_mounts() for the unplanned daemon
exit.  It just relies on simulating abnormal leaves.

Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
 ocfs2_controld/cpg.c            |  116 ++++++++++++++++++++++++++-------
 ocfs2_controld/main.c           |   26 -------
 ocfs2_controld/mount.c          |  137 +++++++++++++++++++++++----------------
 ocfs2_controld/ocfs2_controld.h |    4 +-
 4 files changed, 175 insertions(+), 108 deletions(-)

diff --git a/ocfs2_controld/cpg.c b/ocfs2_controld/cpg.c
index 61276c6..c4e9322 100644
--- a/ocfs2_controld/cpg.c
+++ b/ocfs2_controld/cpg.c
@@ -221,6 +221,7 @@ static void handle_node_join(struct cpg_address *addr,
 					   cg->cg_cb_member_count,
 					   push_node_on_join,
 					   cg);
+			cg->cg_joined = 1;
 			cg->cg_set_cgroup(cg, cg->cg_user_data);
 		}
 	} else
@@ -228,6 +229,37 @@ static void handle_node_join(struct cpg_address *addr,
 
 }
 
+static void pop_nodes_on_leave(struct cpg_address *addr, void *user_data)
+{
+	struct cgroup *cg = user_data;
+
+	pop_node(cg, addr->nodeid);
+}
+
+static void finalize_group(struct cgroup *cg)
+{
+	/* First, tell our mounter */
+	cg->cg_set_cgroup(NULL, cg->cg_user_data);
+
+	for_each_node_list(cg->cg_members, cg->cg_member_count,
+			   pop_nodes_on_leave, cg);
+	/* We're not in members anymore */
+	pop_node(cg, our_nodeid);
+
+	if (cg->cg_node_count)
+		log_error("node count is not zero on group %.*s!",
+			  cg->cg_name.length, cg->cg_name.value);
+	if (!list_empty(&cg->cg_nodes))
+		log_error("node list is not empty on group %.*s!",
+			  cg->cg_name.length, cg->cg_name.value);
+
+	cpg_finalize(cg->cg_handle);
+	connection_dead(cg->cg_ci);
+
+	list_del(&cg->cg_list);
+	free(cg);
+}
+
 static void handle_node_leave(struct cpg_address *addr,
 			      void *user_data)
 {
@@ -238,7 +270,10 @@ static void handle_node_leave(struct cpg_address *addr,
 			log_debug("Node %d leaves group %.*s",
 				  addr->nodeid, cg->cg_name.length,
 				  cg->cg_name.value);
-			pop_node(cg, addr->nodeid);
+			if (addr->nodeid == our_nodeid)
+				finalize_group(cg);
+			else
+				pop_node(cg, addr->nodeid);
 			break;
 
 		case CPG_REASON_NODEDOWN:
@@ -520,15 +555,20 @@ static void process_cpg(int ci)
 
 static void dead_cpg(int ci)
 {
+	struct cgroup *cg;
+
 	if (ci == daemon_group.cg_ci) {
 		log_error("cpg connection died");
 		shutdown_daemon();
 
 		/* We can't talk to cpg anymore */
 		daemon_group.cg_handle = 0;
+		connection_dead(ci);
+	} else {
+		cg = client_to_group(ci);
+		if (cg)
+			finalize_group(cg);
 	}
-
-	connection_dead(ci);
 }
 
 static int start_join(struct cgroup *cg)
@@ -550,6 +590,44 @@ static int start_join(struct cgroup *cg)
 	return error;
 }
 
+static int start_leave(struct cgroup *cg)
+{
+	int i;
+	cpg_error_t error;
+
+	if (!cg->cg_handle)
+		return -EINVAL;
+
+	log_debug("leaving group %.*s", cg->cg_name.length,
+		  cg->cg_name.value);
+
+	for (i = 0; i < 10; i++) {
+		error = cpg_leave(cg->cg_handle,
+				  &cg->cg_name);
+		if (error == CPG_ERR_TRY_AGAIN) {
+			if (!i)
+				log_debug("cpg_leave retry");
+			sleep(1);
+			continue;
+		}
+
+		if (error == CPG_OK)
+			log_debug("cpg_leave succeeded");
+		else
+			log_error("cpg_leave error %d", error);
+
+		break;
+	}
+
+	if (error == CPG_OK)
+		return 0;
+	else if (error == CPG_ERR_TRY_AGAIN)
+		return -EAGAIN;
+	else
+		return -EIO;
+}
+
+
 static int init_group(struct cgroup *cg, const char *name)
 {
 	cpg_error_t error;
@@ -593,6 +671,17 @@ out:
 	return error;
 }
 
+int group_leave(struct cgroup *cg)
+{
+	if (!cg->cg_joined) {
+		log_error("Unable to leave unjoined group %.*s",
+			  cg->cg_name.length, cg->cg_name.value);
+		return -EINVAL;
+	}
+
+	return start_leave(cg);
+}
+
 int group_join(const char *name,
 	       void (*set_cgroup)(struct cgroup *cg, void *user_data),
 	       void (*node_down)(int nodeid, void *user_data),
@@ -637,29 +726,10 @@ int setup_cpg(void)
 
 void exit_cpg(void)
 {
-	int i;
-	cpg_error_t error;
-
 	if (!daemon_group.cg_handle)
 		return;
 
-	for (i = 0; i < 10; i++) {
-		error = cpg_leave(daemon_group.cg_handle,
-				  &daemon_group.cg_name);
-		if (error == CPG_ERR_TRY_AGAIN) {
-			if (!i)
-				log_debug("cpg_leave retry");
-			sleep(1);
-			continue;
-		}
-
-		if (error == CPG_OK)
-			log_debug("cpg_leave succeeded");
-		else
-			log_error("cpg_leave error %d", error);
-
-		break;
-	}
+	start_leave(&daemon_group);
 
 	log_debug("closing cpg connection");
 	cpg_finalize(daemon_group.cg_handle);
diff --git a/ocfs2_controld/main.c b/ocfs2_controld/main.c
index c5515f4..23fe97d 100644
--- a/ocfs2_controld/main.c
+++ b/ocfs2_controld/main.c
@@ -418,8 +418,6 @@ static void process_client(int ci)
 
 		case CM_UNMOUNT:
 		rv = do_unmount(ci, fd, argv[0], argv[1], argv[2]);
-		if (!rv)
-			hack_leave(argv[1]);
 		break;
 
 		case CM_STATUS:
@@ -448,28 +446,6 @@ static void process_client(int ci)
 	return;
 }
 
-#if 0
-/*
- * THIS FUNCTION CAUSES PROBLEMS.
- *
- * bail_on_mounts() is called when we are forced to exit via a signal or
- * cman dying on us.  As such, it removes regions from o2cb but does
- * not communicate with cman.  This can cause o2cb to self-fence or cman
- * to go nuts.  But hey, if you SIGKILL the daemon, you get what you pay
- * for.
- */
-static void bail_on_mounts(void)
-{
-	struct list_head *p, *t;
-	struct mountgroup *mg;
-
-	list_for_each_safe(p, t, &mounts) {
-		mg = list_entry(p, struct mountgroup, list);
-		clean_up_mountgroup(mg);
-	}
-}
-#endif
-
 static void process_listener(int ci)
 {
 	int fd, i;
@@ -572,9 +548,7 @@ stop:
 	if (!rv && have_mounts())
 		rv = 1;
 
-#if 0
 	bail_on_mounts();
-#endif
 
 	exit_cpg();
 	exit_cman();
diff --git a/ocfs2_controld/mount.c b/ocfs2_controld/mount.c
index c59752f..04617f9 100644
--- a/ocfs2_controld/mount.c
+++ b/ocfs2_controld/mount.c
@@ -51,9 +51,6 @@ struct mountgroup {
 	int			mg_mount_fd;
 	int			mg_mount_notified;
 
-	/* Interaction with cpg.c */
-	struct cgroup		*mg_cg;
-
 	int			mg_error;
 	char			mg_error_msg[128];
 };
@@ -224,7 +221,11 @@ static void remove_mountpoint(struct mountgroup *mg,
 		if (mg->mg_group) {
 			log_debug("calling LEAVE for group %s",
 				  mg->mg_uuid);
-			/* XXX leave the group */
+			if (group_leave(mg->mg_group)) {
+				log_error("Unable to leave group %s",
+					  mg->mg_uuid);
+				/* XXX what to do?  finalize?  Shutdown? */
+			}
 		} else {
 			/*
 			 * Join is in progress, let's leave when we get
@@ -238,37 +239,6 @@ static void remove_mountpoint(struct mountgroup *mg,
 		free(mp);
 }
 
-void hack_leave(char *uuid)
-{
-	struct mountgroup *mg;
-	struct mountpoint *mp;
-
-	mg = find_mg_by_uuid(uuid);
-	if (!mg) {
-		log_error("Unable to find mg for \"%s\"", uuid);
-		return;
-	}
-
-	if (!list_empty(&mg->mg_mountpoints))
-		return;
-
-	mp = mg->mg_mp_in_progress;
-	if (!mp) {
-		log_error("No mp in progress for \"%s\"", uuid);
-		return;
-	}
-
-	if (!mg->mg_leave_on_join) {
-		log_error("leave_on_join not set on \"%s\"", uuid);
-		return;
-	}
-
-	log_debug("leaving group %s", uuid);
-	free(mp);
-	list_del(&mg->mg_list);
-	free(mg);
-}
-
 static void add_mountpoint(struct mountgroup *mg, const char *device,
 			   const char *mountpoint, int ci, int fd)
 {
@@ -331,9 +301,9 @@ static void finish_join(struct mountgroup *mg, struct cgroup *cg)
 {
 	struct mountpoint *mp;
 
-	if (mg->mg_cg) {
+	if (mg->mg_group) {
 		log_error("cgroup passed, but one already exists! (mg %s, existing %p, new %p)",
-			  mg->mg_uuid, mg->mg_cg, cg);
+			  mg->mg_uuid, mg->mg_group, cg);
 		return;
 	}
 
@@ -346,7 +316,11 @@ static void finish_join(struct mountgroup *mg, struct cgroup *cg)
 
 	if (list_empty(&mp->mp_list)) {
 		if (mg->mg_leave_on_join) {
-			/* XXX Start leave */
+			if (group_leave(cg)) {
+				log_error("Unable to leave group %s",
+					  mg->mg_uuid);
+				/* XXX What to do? */
+			}
 		} else {
 			log_error("mountgroup %s is in the process of leaving, not joining",
 				  mg->mg_uuid);
@@ -360,34 +334,71 @@ static void finish_join(struct mountgroup *mg, struct cgroup *cg)
 	}
 
 	/* Ok, we've successfully joined the group */
-	mg->mg_cg = cg;
+	mg->mg_group = cg;
 	notify_mount_client(mg);
 }
 
+static void mount_node_down(int nodeid, void *user_data)
+{
+	struct mountgroup *mg = user_data;
+
+	log_debug("Node %d has left mountgroup %s", nodeid, mg->mg_uuid);
+
+	/* XXX Write to sysfs */
+}
+
+static void force_node_down(int nodeid, void *user_data)
+{
+	struct mountgroup *mg = user_data;
+
+	log_error("Forcing node %d down in group %s", nodeid, mg->mg_uuid);
+	mount_node_down(nodeid, mg);
+}
+
 static void finish_leave(struct mountgroup *mg)
 {
+	struct list_head *p, *n;
+	struct mountpoint *mp;
+
 	if (list_empty(&mg->mg_mountpoints) &&
 	    mg->mg_mp_in_progress) {
 		/* We're done */
 		notify_mount_client(mg);
 
 		/* This is possible due to leave_on_join */
-		if (!mg->mg_cg)
-			log_debug("mg_cg was NULL");
+		if (!mg->mg_group)
+			log_debug("mg_group was NULL");
 
 		free(mg->mg_mp_in_progress);
-		list_del(&mg->mg_list);
-		free(mg);
-		return;
+		goto out;
 	}
 
 	/* This leave is unexpected */
 
 	log_error("Unexpected leave of group %s", mg->mg_uuid);
-	if (!mg->mg_cg)
-		log_error("No mg_cg for group %s", mg->mg_uuid);
+	if (mg->mg_group)
+		for_each_node(mg->mg_group, force_node_down, mg);
+	else
+		log_error("No mg_group for group %s", mg->mg_uuid);
+
+	list_for_each_safe(p, n, &mg->mg_mountpoints) {
+		mp = list_entry(p, struct mountpoint, mp_list);
+		list_del(&mp->mp_list);
+		/* The in-progress mp may or may not be on the list */
+		if (mp != mg->mg_mp_in_progress)
+			free(mp);
+	}
+	/* So free the in-progress mp last */
+	if (mg->mg_mp_in_progress)
+		free(mg->mg_mp_in_progress);
 
-	/* XXX Do dire things */
+	/* If we had a client attached, let it know we died */
+	if (mg->mg_mount_ci != -1)
+		connection_dead(mg->mg_mount_ci);
+
+out:
+	list_del(&mg->mg_list);
+	free(mg);
 }
 
 /*
@@ -397,20 +408,20 @@ static void finish_leave(struct mountgroup *mg)
  * 1) We've asked to join a group for a new filesystem.
  *    - mg_mp_in_progress != NULL
  *    - length(mg_mountpoints) == 1
- *    - mg_cg == NULL
+ *    - mg_group == NULL
  *
  *    cg will be our now-joined group.
  *
  * 2) We've asked to leave a group upon the last unmount of a filesystem.
  *   - mg_mp_in_progress != NULL
  *   - mg_mountpoints is empty
- *   - mg_cg is only NULL if we had to set leave_on_join.
+ *   - mg_group is only NULL if we had to set leave_on_join.
  *
  *   cg is NULL.  We should complete our leave.
  *
  * 3) We've dropped out of the group unexpectedly.
  *   - mg_mountpoints is not empty.
- *   - mg_cg != NULL
+ *   - mg_group != NULL
  *
  *   cg is NULL.  We should basically crash.  This usually is handled by
  *   closing our sysfs fd.
@@ -425,13 +436,24 @@ static void mount_set_group(struct cgroup *cg, void *user_data)
 		finish_leave(mg);
 }
 
-static void mount_node_down(int nodeid, void *user_data)
+/*
+ * THIS FUNCTION CAUSES PROBLEMS.
+ *
+ * bail_on_mounts() is called when we are forced to exit via a signal or
+ * cman dying on us.  As such, it tells ocfs2 that nodes are down but
+ * not communicate with cman or cpg.  This can cause ocfs2 to self-fence or
+ * cman to go nuts.  But hey, if you SIGKILL the daemon, you get what you
+ * pay for.
+ */
+void bail_on_mounts(void)
 {
-	struct mountgroup *mg = user_data;
-
-	log_debug("Node %d has left mountgroup %s", nodeid, mg->mg_uuid);
+	struct list_head *p, *n;
+	struct mountgroup *mg;
 
-	/* XXX Write to sysfs */
+	list_for_each_safe(p, n, &mounts) {
+		mg = list_entry(p, struct mountgroup, mg_list);
+		finish_leave(mg);
+	}
 }
 
 int start_mount(int ci, int fd, const char *uuid, const char *device,
@@ -481,7 +503,7 @@ int start_mount(int ci, int fd, const char *uuid, const char *device,
 			   mg->mg_uuid);
 
 		/*
-		 * Because we never started a join, mg->mg_cg is NULL.
+		 * Because we never started a join, mg->mg_group is NULL.
 		 * remove_mountpoint() will set up for leave_on_join, but
 		 * that actually never happens.  Thus, it is safe to
 		 * clear mp_in_progress.
@@ -661,7 +683,8 @@ int remove_mount(int ci, int fd, const char *uuid, const char *mountpoint)
 
 	mg = find_mg_by_uuid(uuid);
 	if (!mg) {
-		fill_error(&mg_error, ENOENT, "Unknown filesystem %s",
+		fill_error(&mg_error, ENOENT,
+			   "Filesystem %s is unknown or not mounted anywhere",
 			   uuid);
 		goto out;
 	}
diff --git a/ocfs2_controld/ocfs2_controld.h b/ocfs2_controld/ocfs2_controld.h
index e70e298..91de939 100644
--- a/ocfs2_controld/ocfs2_controld.h
+++ b/ocfs2_controld/ocfs2_controld.h
@@ -78,6 +78,7 @@ int group_join(const char *name,
 	       void (*set_cgroup)(struct cgroup *cg, void *user_data),
 	       void (*node_down)(int nodeid, void *user_data),
 	       void *user_data);
+int group_leave(struct cgroup *cg);
 
 /* mount.c */
 void init_mounts(void);
@@ -88,7 +89,6 @@ int complete_mount(int ci, int fd, const char *uuid, const char *errcode,
 		   const char *mountpoint);
 int remove_mount(int ci, int fd, const char *uuid, const char *mountpoint);
 void dead_mounter(int ci, int fd);
+void bail_on_mounts(void);
 
-/* This is a hack to test umount until cpg leave happens */
-void hack_leave(char *uuid);
 #endif
-- 
1.5.3.8