[Ocfs2-tools-devel] [PATCH 19/39] ocfs2_controld: Join a group on
filesystem mount.
Joel Becker
joel.becker at oracle.com
Fri Mar 14 16:52:42 PDT 2008
When a filesystem mounts for the first time, join a group in CPG. This
encompasses all the join code required, but does not handle leave yet.
Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
ocfs2_controld/cman.c | 19 +++--
ocfs2_controld/cpg.c | 210 +++++++++++++++++++++++++++++++++++++--
ocfs2_controld/mount.c | 136 ++++++++++++++++++++++++-
ocfs2_controld/ocfs2_controld.h | 4 +
4 files changed, 347 insertions(+), 22 deletions(-)
diff --git a/ocfs2_controld/cman.c b/ocfs2_controld/cman.c
index 520bf99..c5950e1 100644
--- a/ocfs2_controld/cman.c
+++ b/ocfs2_controld/cman.c
@@ -51,7 +51,16 @@ static int cman_node_count;
int kill_cman(int nodeid)
{
- return cman_kill_node(ch_admin, nodeid);
+ int error;
+
+ log_debug("killing node %d", nodeid);
+
+ error = cman_kill_node(ch_admin, nodeid);
+ if (error)
+ log_debug("Unable to kill node %d, %d %d", nodeid, error,
+ errno);
+
+ return error;
}
static int is_member(cman_node_t *node_list, int count, int nodeid)
@@ -149,16 +158,12 @@ static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
{
switch (reason) {
case CMAN_REASON_TRY_SHUTDOWN:
-#if 0
- if (list_empty(&mounts))
-#endif
+ if (!have_mounts())
cman_replyto_shutdown(ch, 1);
-#if 0
else {
log_debug("no to cman shutdown");
cman_replyto_shutdown(ch, 0);
}
-#endif
break;
case CMAN_REASON_STATECHANGE:
@@ -208,7 +213,7 @@ int setup_cman(void)
}
ch_admin = cman_admin_init(NULL);
- if (!ch) {
+ if (!ch_admin) {
log_error("cman_admin_init error %d", errno);
rv = -ENOTCONN;
goto fail_finish;
diff --git a/ocfs2_controld/cpg.c b/ocfs2_controld/cpg.c
index 0d99f18..61276c6 100644
--- a/ocfs2_controld/cpg.c
+++ b/ocfs2_controld/cpg.c
@@ -34,6 +34,11 @@
#include "ocfs2_controld.h"
+struct cnode {
+ struct list_head cn_list;
+ int cn_nodeid;
+};
+
struct cgroup {
struct list_head cg_list; /* List of all CPG groups */
@@ -42,10 +47,25 @@ struct cgroup {
int cg_fd;
int cg_ci;
+ /* CPG's idea of the group */
struct cpg_name cg_name;
struct cpg_address cg_members[CPG_MEMBERS_MAX];
int cg_member_count;
+ /*
+ * Our idea of the group.
+ * This lags cg_members until join/leave processing is complete.
+ */
+ struct list_head cg_nodes;
+ int cg_node_count;
+
+ /* Hooks for mounters */
+ void (*cg_set_cgroup)(struct cgroup *cg,
+ void *user_data);
+ void (*cg_node_down)(int nodeid,
+ void *user_data);
+ void *cg_user_data;
+
/* Callback state */
int cg_got_confchg;
struct cpg_address cg_cb_members[CPG_MEMBERS_MAX];
@@ -99,13 +119,126 @@ void for_each_node(struct cgroup *cg,
for_each_node_proxy, &fen);
}
+static struct cnode *find_node(struct cgroup *cg, int nodeid)
+{
+ struct list_head *p;
+ struct cnode *cn = NULL;
+
+ list_for_each(p, &cg->cg_nodes) {
+ cn = list_entry(p, struct cnode, cn_list);
+ if (cn->cn_nodeid == nodeid)
+ break;
+
+ cn = NULL;
+ }
+
+ return cn;
+}
+
+static void push_node(struct cgroup *cg, int nodeid)
+{
+ struct cnode *cn;
+
+ if (find_node(cg, nodeid)) {
+ log_error("Node %d is already part of group %.*s", nodeid,
+ cg->cg_name.length, cg->cg_name.value);
+ /*
+ * If we got lost in our group members, we can't interact
+ * safely.
+ */
+ shutdown_daemon();
+ return;
+ }
+
+ cn = malloc(sizeof(struct cnode));
+ if (!cn) {
+ log_error("Unable to allocate node structure, exiting");
+ /*
+ * If we can't keep track of the group, we can't
+ * interact safely.
+ */
+ shutdown_daemon();
+ return;
+ }
+
+ cn->cn_nodeid = nodeid;
+ list_add(&cn->cn_list, &cg->cg_nodes);
+ cg->cg_node_count++;
+}
+
+static void pop_node(struct cgroup *cg, int nodeid)
+{
+ struct cnode *cn = find_node(cg, nodeid);
+
+ if (cn) {
+ list_del(&cn->cn_list);
+ cg->cg_node_count--;
+ } else {
+ log_error("Unable to find node %d in group %.*s", nodeid,
+ cg->cg_name.length, cg->cg_name.value);
+ }
+
+ if (cg->cg_node_count < 0) {
+ log_error("cg_node_count went negative for group %.*s",
+ cg->cg_name.length, cg->cg_name.value);
+ cg->cg_node_count = 0;
+ }
+}
+
+static void push_node_on_join(struct cpg_address *addr,
+ void *user_data)
+{
+ struct cgroup *cg = user_data;
+
+ log_debug("Filling node %d to group %.*s", addr->nodeid,
+ cg->cg_name.length, cg->cg_name.value);
+
+ push_node(cg, addr->nodeid);
+}
+
+static void handle_node_join(struct cpg_address *addr,
+ void *user_data)
+{
+ struct cgroup *cg = user_data;
+
+ log_debug("Node %d joins group %.*s",
+ addr->nodeid, cg->cg_name.length, cg->cg_name.value);
+
+ /*
+ * If I read group/daemon/cpg.c correctly, you cannot have more than
+ * one entry in the join_list when you yourself join. Thus, it is
+ * safe to add all members of cg_cb_members. There will not be
+ * a duplicate in cg_cb_joined.
+ */
+ if (addr->nodeid == our_nodeid) {
+ if (cg->cg_joined) {
+ log_error("This node has joined group %.*s more than once",
+ cg->cg_name.length, cg->cg_name.value);
+ } else {
+ log_debug("This node joins group %.*s",
+ cg->cg_name.length, cg->cg_name.value);
+ for_each_node_list(cg->cg_cb_members,
+ cg->cg_cb_member_count,
+ push_node_on_join,
+ cg);
+ cg->cg_set_cgroup(cg, cg->cg_user_data);
+ }
+ } else
+ push_node(cg, addr->nodeid);
+
+}
static void handle_node_leave(struct cpg_address *addr,
void *user_data)
{
+ struct cgroup *cg = user_data;
+
switch (addr->reason) {
case CPG_REASON_LEAVE:
- /* XXX Handle leave */
+ log_debug("Node %d leaves group %.*s",
+ addr->nodeid, cg->cg_name.length,
+ cg->cg_name.value);
+ pop_node(cg, addr->nodeid);
break;
case CPG_REASON_NODEDOWN:
@@ -133,17 +266,16 @@ static void handle_node_leave(struct cpg_address *addr,
static void group_change(struct cgroup *cg)
{
- log_debug("group %s confchg: members %d, left %d, joined %d",
- cg->cg_name.value, cg->cg_cb_member_count,
- cg->cg_cb_left_count, cg->cg_cb_joined_count);
+ log_debug("group %.*s confchg: members %d, left %d, joined %d",
+ cg->cg_name.length, cg->cg_name.value,
+ cg->cg_cb_member_count, cg->cg_cb_left_count,
+ cg->cg_cb_joined_count);
-#if 0
for_each_node_list(cg->cg_cb_joined, cg->cg_cb_joined_count,
- handle_node_join, NULL);
-#endif
+ handle_node_join, cg);
for_each_node_list(cg->cg_cb_left, cg->cg_cb_left_count,
- handle_node_leave, NULL);
+ handle_node_leave, cg);
}
static void handle_daemon_left(struct cpg_address *addr,
@@ -197,13 +329,22 @@ static void handle_daemon_left(struct cpg_address *addr,
static void handle_node_down(struct cpg_address *addr,
void *user_data)
{
+ struct list_head *p;
+ struct cgroup *cg;
+
if ((addr->reason != CPG_REASON_NODEDOWN) &&
(addr->reason != CPG_REASON_PROCDOWN))
return;
log_debug("node down %d", addr->nodeid);
- /* XXX For each mount group, process node down */
+ list_for_each(p, &group_list) {
+ cg = list_entry(p, struct cgroup, cg_list);
+ if (find_node(cg, addr->nodeid)) {
+ cg->cg_node_down(addr->nodeid, cg->cg_user_data);
+ pop_node(cg, addr->nodeid);
+ }
+ }
}
static void daemon_change(struct cgroup *cg)
@@ -254,18 +395,36 @@ static void process_configuration_change(struct cgroup *cg)
static struct cgroup *client_to_group(int ci)
{
+ struct list_head *p;
+ struct cgroup *cg;
+
if (ci == daemon_group.cg_ci)
return &daemon_group;
+ list_for_each(p, &group_list) {
+ cg = list_entry(p, struct cgroup, cg_list);
+ if (cg->cg_ci == ci)
+ return cg;
+ }
+
log_error("unknown client %d", ci);
return NULL;
}
static struct cgroup *handle_to_group(cpg_handle_t handle)
{
+ struct list_head *p;
+ struct cgroup *cg;
+
if (handle == daemon_group.cg_handle)
return &daemon_group;
+ list_for_each(p, &group_list) {
+ cg = list_entry(p, struct cgroup, cg_list);
+ if (cg->cg_handle == handle)
+ return cg;
+ }
+
log_error("unknown handle %llu", (unsigned long long)handle);
return NULL;
@@ -434,10 +593,43 @@ out:
return error;
}
+int group_join(const char *name,
+ void (*set_cgroup)(struct cgroup *cg, void *user_data),
+ void (*node_down)(int nodeid, void *user_data),
+ void *user_data)
+{
+ int rc;
+ struct cgroup *cg;
+
+ cg = malloc(sizeof(struct cgroup));
+ if (!cg) {
+ log_error("Unable to allocate cgroup structure");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memset(cg, 0, sizeof(struct cgroup));
+ INIT_LIST_HEAD(&cg->cg_nodes);
+
+ cg->cg_set_cgroup = set_cgroup;
+ cg->cg_node_down = node_down;
+ cg->cg_user_data = user_data;
+
+ rc = init_group(cg, name);
+ if (rc)
+ free(cg);
+ else
+ list_add(&cg->cg_list, &group_list);
+
+out:
+ return rc;
+}
+
int setup_cpg(void)
{
cpg_error_t error;
+ INIT_LIST_HEAD(&group_list);
error = init_group(&daemon_group, "ocfs2_controld");
return error;
diff --git a/ocfs2_controld/mount.c b/ocfs2_controld/mount.c
index d849cc8..c59752f 100644
--- a/ocfs2_controld/mount.c
+++ b/ocfs2_controld/mount.c
@@ -51,6 +51,9 @@ struct mountgroup {
int mg_mount_fd;
int mg_mount_notified;
+ /* Interaction with cpg.c */
+ struct cgroup *mg_cg;
+
int mg_error;
char mg_error_msg[128];
};
@@ -324,6 +327,113 @@ static void add_mountpoint(struct mountgroup *mg, const char *device,
list_add(&mp->mp_list, &mg->mg_mountpoints);
}
+static void finish_join(struct mountgroup *mg, struct cgroup *cg)
+{
+ struct mountpoint *mp;
+
+ if (mg->mg_cg) {
+ log_error("cgroup passed, but one already exists! (mg %s, existing %p, new %p)",
+ mg->mg_uuid, mg->mg_cg, cg);
+ return;
+ }
+
+ mp = mg->mg_mp_in_progress;
+ if (!mp) {
+ log_error("No mountpoint in progress for mountgroup %s",
+ mg->mg_uuid);
+ return;
+ }
+
+ if (list_empty(&mp->mp_list)) {
+ if (mg->mg_leave_on_join) {
+ /* XXX Start leave */
+ } else {
+ log_error("mountgroup %s is in the process of leaving, not joining",
+ mg->mg_uuid);
+ }
+ return;
+ }
+
+ if (list_empty(&mg->mg_mountpoints)) {
+ log_error("No mountpoints on mountgroup %s", mg->mg_uuid);
+ return;
+ }
+
+ /* Ok, we've successfully joined the group */
+ mg->mg_cg = cg;
+ notify_mount_client(mg);
+}
+
+static void finish_leave(struct mountgroup *mg)
+{
+ if (list_empty(&mg->mg_mountpoints) &&
+ mg->mg_mp_in_progress) {
+ /* We're done */
+ notify_mount_client(mg);
+
+ /* This is possible due to leave_on_join */
+ if (!mg->mg_cg)
+ log_debug("mg_cg was NULL");
+
+ free(mg->mg_mp_in_progress);
+ list_del(&mg->mg_list);
+ free(mg);
+ return;
+ }
+
+ /* This leave is unexpected */
+
+ log_error("Unexpected leave of group %s", mg->mg_uuid);
+ if (!mg->mg_cg)
+ log_error("No mg_cg for group %s", mg->mg_uuid);
+
+ /* XXX Do dire things */
+}
+
+/*
+ * This is called when we join or leave a group. There are three possible
+ * states.
+ *
+ * 1) We've asked to join a group for a new filesystem.
+ * - mg_mp_in_progress != NULL
+ * - length(mg_mountpoints) == 1
+ * - mg_cg == NULL
+ *
+ * cg will be our now-joined group.
+ *
+ * 2) We've asked to leave a group upon the last unmount of a filesystem.
+ * - mg_mp_in_progress != NULL
+ * - mg_mountpoints is empty
+ * - mg_cg is only NULL if we had to set leave_on_join.
+ *
+ * cg is NULL. We should complete our leave.
+ *
+ * 3) We've dropped out of the group unexpectedly.
+ * - mg_mountpoints is not empty.
+ * - mg_cg != NULL
+ *
+ * cg is NULL. We should basically crash. This usually is handled by
+ * closing our sysfs fd.
+ */
+static void mount_set_group(struct cgroup *cg, void *user_data)
+{
+ struct mountgroup *mg = user_data;
+
+ if (cg)
+ finish_join(mg, cg);
+ else
+ finish_leave(mg);
+}
+
+static void mount_node_down(int nodeid, void *user_data)
+{
+ struct mountgroup *mg = user_data;
+
+ log_debug("Node %d has left mountgroup %s", nodeid, mg->mg_uuid);
+
+ /* XXX Write to sysfs */
+}
+
int start_mount(int ci, int fd, const char *uuid, const char *device,
const char *mountpoint)
{
@@ -360,15 +470,29 @@ int start_mount(int ci, int fd, const char *uuid, const char *device,
if (mg->mg_error)
goto out;
- /* XXX This is where we do the asynchronous join
- *
- * Here we fire off a group join. The cpg infrastructure will
+ /*
+ * Fire off a group join. The cpg infrastructure will
* let us know when the group is joined, at which point we
* notify_mount_client(). If there's a failure, we notify as well.
- *
- * XXX: For now, let's pretend :-)
*/
- notify_mount_client(mg);
+ rc = group_join(mg->mg_uuid, mount_set_group, mount_node_down, mg);
+ if (rc) {
+ fill_error(mg, -rc, "Unable to start join to group %s",
+ mg->mg_uuid);
+
+ /*
+ * Because we never started a join, mg->mg_cg is NULL.
+ * remove_mountpoint() will set up for leave_on_join, but
+ * that actually never happens. Thus, it is safe to
+ * clear mp_in_progress.
+ */
+ remove_mountpoint(mg, mountpoint);
+ if (mg->mg_mp_in_progress) {
+ free(mg->mg_mp_in_progress);
+ mg->mg_mp_in_progress = NULL;
+ } else
+ log_error("First mount of %s failed a join, yet mp_in_progress was NULL", mg->mg_uuid);
+ }
out:
/*
diff --git a/ocfs2_controld/ocfs2_controld.h b/ocfs2_controld/ocfs2_controld.h
index fed98f9..e70e298 100644
--- a/ocfs2_controld/ocfs2_controld.h
+++ b/ocfs2_controld/ocfs2_controld.h
@@ -74,6 +74,10 @@ void for_each_node(struct cgroup *cg,
void (*func)(int nodeid,
void *user_data),
void *user_data);
+int group_join(const char *name,
+ void (*set_cgroup)(struct cgroup *cg, void *user_data),
+ void (*node_down)(int nodeid, void *user_data),
+ void *user_data);
/* mount.c */
void init_mounts(void);
--
1.5.3.8
More information about the Ocfs2-tools-devel
mailing list