[Ocfs2-tools-commits] jlbec commits r1374 - branches/cman-based/ocfs2_controld

Wed Aug 8 13:45:04 PDT 2007

Author: jlbec
Date: 2007-08-08 13:45:02 -0700 (Wed, 08 Aug 2007)
New Revision: 1374

Added:
   branches/cman-based/ocfs2_controld/action.c
   branches/cman-based/ocfs2_controld/client_proto.c
   branches/cman-based/ocfs2_controld/ocfs2_controld.h
Modified:
   branches/cman-based/ocfs2_controld/Makefile
   branches/cman-based/ocfs2_controld/group.c
   branches/cman-based/ocfs2_controld/main.c
   branches/cman-based/ocfs2_controld/member_cman.c
   branches/cman-based/ocfs2_controld/ocfs2_controld_internal.h
Log:

We do a lot of things.  The state machine for mount groups is much different,
we've reworked the client protocol.  Etc.  I'm really just commiting to save
the work.



Modified: branches/cman-based/ocfs2_controld/Makefile
===================================================================

--- branches/cman-based/ocfs2_controld/Makefile	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/Makefile	2007-08-08 20:45:02 UTC (rev 1374)
@@ -26,7 +26,7 @@
 
 DEFINES = -DO2CB_FLAT_INCLUDES -DVERSION=\"$(VERSION)\"
 
-CFILES = group.c main.c member_cman.c
+CFILES = group.c main.c member_cman.c client_proto.c action.c
 
 
 HFILES = ocfs2_controld.h

Added: branches/cman-based/ocfs2_controld/action.c
===================================================================
--- branches/cman-based/ocfs2_controld/action.c	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/action.c	2007-08-08 20:45:02 UTC (rev 1374)
@@ -0,0 +1,911 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ *  This copyrighted material is made available to anyone wishing to use,
+ *  modify, copy, or redistribute it subject to the terms and conditions
+ *  of the GNU General Public License v.2.
+ */
+
+/* Portions of this file are: */
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include "o2cb.h"
+#include "ocfs2_controld.h"
+#include "ocfs2_controld_internal.h"
+
+
+enum mountgroup_state {
+	MG_CREATED		= 1 << 0,
+	MG_JOIN_SENT		= 1 << 1,
+	MG_JOIN_START		= 1 << 2,
+	MG_JOIN_START_DONE	= 1 << 3,
+#define MG_JOINING	(MG_JOIN_SENT | MG_JOIN_START | MG_JOIN_START_DONE)
+	MG_JOINED		= 1 << 4,
+	MG_MOUNTED		= 1 << 5,
+#define MG_MEMBER	(MG_JOINED | MG_MOUNTED)
+	MG_LEAVE_SENT		= 1 << 6,
+	MG_LEAVE_START		= 1 << 7,
+	MG_LEAVE_START_DONE	= 1 << 8,
+#define MG_LEAVING	(MG_LEAVE_SENT | MG_LEAVE_START | MG_LEAVE_START_DONE)
+	MG_DEAD			= 1 << 9,
+};
+
+struct list_head mounts;
+
+static void fill_error(struct mountgroup *mg, int error, char *errfmt, ...)
+{
+	int rc;
+	va_list args;
+
+	if (mg->error)
+		return;
+
+	mg->error = error;
+
+	va_start(args, errfmt);
+	rc = vsnprintf(mg->error_msg, sizeof(mg->error_msg), errfmt, args);
+	va_end(args);
+
+	if (rc >= sizeof(mg->error_msg)) {
+		log_debug("Error message truncated");
+		mg->error_msg[sizeof(mg->error_msg) - 1] = '\0';
+	}
+}
+
+static int mg_statep(struct mountgroup *mg, enum mountgroup_state test,
+		     enum mountgroup_state allowed)
+{
+	if (mg->state == test)
+		return 1;
+
+	if (allowed) {
+		if (!(mg->state & allowed))
+			log_error("mountgroup %s is in state %d, testing for %d, allowed %d",
+				  mg->uuid, mg->state, test, allowed);
+	}
+
+	return 0;
+}
+
+static int mg_joining(struct mountgroup *mg)
+{
+	return mg_statep(mg, MG_JOINING, 0);
+}
+
+static int mg_leaving(struct mountgroup *mg)
+{
+	return mg_statep(mg, MG_LEAVING, 0);
+}
+
+static void notify_mount_client(struct mountgroup *mg)
+{
+	int error = mg->error;
+	char *error_msg = "OK";
+
+	if (error) {
+		if (mg->error_msg[0]) {
+			error_msg = mg->error_msg;
+		} else
+			error_msg = strerror(-error);
+		mg->error = 0;
+	}
+
+	log_group(mg, "notify_mount_client sending %d \"%s\"", -error,
+		  error_msg);
+
+	error = send_message(mg->mount_client_fd, CM_STATUS, error,
+			     error_msg);
+	if (error)
+		log_error("Unable to notify client, send_message failed with %d: %s",
+			  -error, strerror(-error));
+	else
+		mg->mount_client_notified = 1;
+
+	/*
+	 * XXX If we failed to notify the client, what can we do?  I'm
+	 * guessing that our main loop will get POLLHUP of some sort.
+	 */
+}
+
+static int create_mountpoint(struct mountgroup *mg, const char *mountpoint,
+			     int ci)
+{
+	int rc = -ENOMEM;
+	struct mountpoint *mp;
+
+	mp = malloc(sizeof(struct mountpoint));
+	if (mp) {
+		memset(mp, 0, sizeof(struct mountpoint));
+		strncpy(mp->mountpoint, mountpoint, sizeof(mp->mountpoint));
+		mp->client = ci;
+		list_add(&mp->list, &mg->mountpoints);
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static struct mountpoint *find_mountpoint(struct mountgroup *mg,
+					  const char *mountpoint, int ci)
+{
+	struct mountpoint *mp;
+	struct list_head *p;
+	int found = 0;
+
+	list_for_each(p, &mg->mountpoints) {
+		mp = list_entry(p, struct mountpoint, list);
+		if (ci && (mp->client != ci))
+			continue;
+		if (!strcmp(mp->mountpoint, mountpoint)) {
+			found = 1;
+			break;
+		}
+	}
+
+	return found ? mp : NULL;
+}
+
+static void remove_failed_mountpoint(struct mountgroup *mg,
+				     const char *mountpoint, int ci)
+{
+	struct mountpoint *mp;
+
+	mp = find_mountpoint(mg, mountpoint, ci);
+	if (mp) {
+		list_del(&mp->list);
+		free(mp);
+	}
+	
+	assert(mp);
+}
+
+static void add_another_mountpoint(struct mountgroup *mg,
+				   const char *mountpoint,
+				   const char *device, int ci)
+{
+	log_group(mg, "add_another_mountpoint %s device %s ci %d",
+		  mountpoint, device, ci);
+
+	if (strcmp(mg->device, device)) {
+		fill_error(mg, -EINVAL,
+			   "Trying to mount fs %s on device %s, but it already is mounted from device %s",
+			   mg->uuid, mg->device, device);
+		goto out;
+	}
+
+	if (find_mountpoint(mg, mountpoint, 0)) {
+		fill_error(mg, -EBUSY,
+			   "Filesystem %s is already mounted on %s",
+			   mg->uuid, mountpoint);
+		goto out;
+	}
+
+	if (mg->mount_client || mg->mount_client_fd || !mg->kernel_mount_done) {
+		fill_error(mg, -EBUSY, "Another mount is in process");
+		goto out;
+	}
+
+	if (create_mountpoint(mg, mountpoint, ci)) {
+		fill_error(mg, -ENOMEM,
+			   "Unable to allocate mountpoint structure");
+		goto out;
+	}
+
+	mg->mount_client = ci;
+
+	/*
+	 * This special error is returned to mount.ocfs2 to tell it that
+	 * the mount is a secondary one and no additional work is required.
+	 * It will just call mount(2).
+	 */
+	fill_error(mg, -EALREADY, "Kernel mounted, go ahead");
+
+out:
+	return;
+}
+
+struct mountgroup *find_mg(const char *uuid)
+{
+	struct list_head *p;
+	struct mountgroup *mg;
+
+	list_for_each(p, &mounts) {
+		mg = list_entry(p, struct mountgroup, list);
+		if ((strlen(mg->uuid) == strlen(uuid)) &&
+		    !strncmp(mg->uuid, uuid, strlen(uuid)))
+			return mg;
+	}
+	return NULL;
+}
+
+static struct mountgroup *create_mg(const char *uuid, const char *mountpoint,
+				    int ci)
+{
+	struct mountgroup *mg = NULL;
+
+	mg = malloc(sizeof(struct mountgroup));
+	if (!mg)
+		goto out;
+
+	memset(mg, 0, sizeof(struct mountgroup));
+	INIT_LIST_HEAD(&mg->members);
+	INIT_LIST_HEAD(&mg->mountpoints);
+	mg->state = MG_CREATED;
+	strncpy(mg->uuid, uuid, sizeof(mg->uuid));
+
+	if (create_mountpoint(mg, mountpoint, ci)) {
+		free(mg);
+		mg = NULL;
+	}
+
+out:
+	return mg;
+}
+
+int do_mount(int ci, int fd, const char *fstype, const char *uuid,
+	     const char *cluster, const char *device,
+	     const char *mountpoint, struct mountgroup **mg_ret)
+{
+	int rc = 0;
+	struct mountgroup mg_error = { /* Until we have a real one */
+		.error		= 0,
+	};
+	struct mountgroup *mg = &mg_error;
+
+	log_debug("mount: MOUNT %s %s %s %s %s",
+		  fstype, uuid, cluster, device, mountpoint);
+
+	if (strcmp(fstype, "ocfs2")) {
+		fill_error(mg, -EINVAL, "Unsupported fstype: %s", fstype);
+		goto out;
+	}
+
+	if (!strlen(cluster) || (strlen(cluster) != strlen(clustername)) ||
+	    strcmp(cluster, clustername)) {
+		fill_error(mg, -EINVAL,
+			   "Request for mount in cluster %s but we belong to %s",
+			  cluster, clustername);
+		goto out;
+	}
+
+	if (strlen(uuid) > MAXNAME) {
+		fill_error(mg, -ENAMETOOLONG, "UUID too long: %s", uuid);
+		goto out;
+	}
+
+	mg = find_mg(uuid);
+	if (mg) {
+		add_another_mountpoint(mg, mountpoint, device, ci);
+		goto out;
+	}
+
+	/* Here we stop using &mg_error and start using our real one */
+	mg = create_mg(uuid, mountpoint, ci);
+	if (!mg) {
+		mg = &mg_error;  /* Well, almost */
+		fill_error(mg, -ENOMEM,
+			   "Unable to allocate mountgroup structure");
+		goto out;
+	}
+
+	mg->mount_client = ci;
+	strncpy(mg->type, fstype, sizeof(mg->type));
+	strncpy(mg->cluster, cluster, sizeof(mg->cluster));
+	strncpy(mg->device, device, sizeof(mg->device));
+	list_add(&mg->list, &mounts);
+
+	rc = group_join(gh, (char *)uuid);
+	if (rc) {
+		fill_error(mg, -errno, "Unable to start group join: %s",
+			   strerror(errno));
+
+		/*
+		 * Remove the mountpoint so we can free the mountgroup
+		 * at the bottom.
+		 */
+		remove_failed_mountpoint(mg, mountpoint, ci);
+		goto out;
+	}
+
+	mg->state = MG_JOIN_SENT;
+
+	*mg_ret = mg;
+	log_group(mg, "mount successfully started");
+
+out:
+	/*
+	 * Only reply on error.  If we're doing OK, the reply is delayed
+	 * until join completes (notify_mount_client()).
+	 */
+	if (mg->error) {
+		rc = mg->error;
+		send_message(fd, CM_STATUS, mg->error, mg->error_msg);
+
+		/* -EALREADY magic is sent, clear it */
+		if (mg->error == -EALREADY)
+			mg->error = 0;
+		else {
+			log_error("mount: %s", mg->error_msg);
+
+			if ((mg != &mg_error) &&
+			    list_empty(&mg->mountpoints)) {
+				log_debug("mount: freeing failed mountgroup");
+				list_del(&mg->list);
+				free(mg);
+			}
+		}
+	}
+
+	log_debug("do_mount returns %d", rc);
+	return rc;
+}
+
+int do_mount_result(struct mountgroup *mg, int ci, int another,
+		    const char *fstype, const char *uuid,
+		    const char *errcode, const char *mountpoint)
+{
+	int rc = 0;
+	int reply = 1;
+	char *ptr = NULL;
+	long err;
+
+	log_debug("mount: MRESULT %s %s %s %s",
+		  fstype, uuid, errcode, mountpoint);
+
+	assert(mg->mount_client == ci);
+	assert(!mg->error);
+
+	if (strcmp(fstype, "ocfs2")) {
+		fill_error(mg, -EINVAL, "Unsupported fstype: %s", fstype);
+		goto out;
+	}
+
+	if (strlen(uuid) > MAXNAME) {
+		fill_error(mg, -ENAMETOOLONG, "UUID too long: %s", uuid);
+		goto out;
+	}
+
+	if (strcmp(uuid, mg->uuid)) {
+		fill_error(mg, -EINVAL,
+			   "UUID %s does not match mountgroup %s", uuid,
+			   mg->uuid);
+		goto out;
+	}
+
+	/* XXX Check that mountpoint is valid */
+
+	err = strtol(errcode, &ptr, 10);
+	if (ptr && *ptr != '\0') {
+		fill_error(mg, -EINVAL, "Invalid error code string: %s",
+			   errcode);
+		goto out;
+	}
+	if ((err == LONG_MIN) || (err == LONG_MAX) || (err < INT_MIN) ||
+	    (err > INT_MAX)) {
+		fill_error(mg, -ERANGE, "Error code %ld out of range", err);
+		goto out;
+	}
+
+	if (another) {
+		if (err) {
+			remove_failed_mountpoint(mg, mountpoint, ci);
+			assert(!list_empty(&mg->mountpoints));
+		}
+		/*
+		 * rc is zero, we're responding to mount.ocfs2 that we
+		 * got the message.  There's nothing else to do.
+		 */
+		goto out;
+	}
+
+	mg->kernel_mount_done = 1;
+	mg->kernel_mount_error = err;
+
+	if (!err) {
+		/* Everyone's happy */
+		mg->mount_client = 0;
+		mg->mount_client_fd = 0;
+
+		goto out;
+	}
+
+	/*
+	 * We're failing an initial mount.  We keep mount_client_fd around
+	 * both to send the result of the LEAVE as well as to keep other
+	 * clients from trying to race us.  We don't reply to the client
+	 * until the LEAVE has completed.
+	 */
+	reply = 0;
+	remove_failed_mountpoint(mg, mountpoint, ci);
+	assert(list_empty(&mg->mountpoints));
+
+	/* We shouldn't get to MRESULT unless we're a member, but... */
+	if (!mg_statep(mg, MG_MEMBER, MG_MEMBER)) {
+		mg->group_leave_on_finish = 1;
+		goto out;
+	}
+
+	if (group_leave(gh, mg->uuid))
+		fill_error(mg, -errno, "Unable to start group leave: %s",
+			   strerror(errno));
+	else
+		mg->state = MG_LEAVE_SENT;
+
+out:
+	if (reply)
+		send_message(mg->mount_client_fd, CM_STATUS, mg->error,
+			     mg->error ? mg->error_msg : "OK");
+
+	return rc;
+}
+
+int do_unmount(int ci, int fd, const char *fstype, const char *uuid,
+	       const char *mountpoint)
+{
+	int rc = 0;
+	int reply = 1;
+	struct mountgroup mg_error = {
+		.error = 0,
+	};
+	struct mountgroup *mg;
+	struct mountpoint *mp;
+
+	log_debug("unmount: UMOUNT %s %s %s",
+		  fstype, uuid, mountpoint);
+
+	if (strcmp(fstype, "ocfs2")) {
+		fill_error(&mg_error, -EINVAL, "Unsupported fstype: %s",
+			   fstype);
+		goto out;
+	}
+
+	if (strlen(uuid) > MAXNAME) {
+		fill_error(&mg_error, -ENAMETOOLONG, "UUID too long: %s",
+			   uuid);
+		goto out;
+	}
+
+	/* Once we have our mg, we're done with &mg_error */
+	mg = find_mg(uuid);
+	if (!mg) {
+		fill_error(&mg_error, -ENOENT, "Unknown uuid %s", uuid);
+		goto out;
+	}
+
+	/* We shouldn't find the mg if the uuid isn't mounted *somewhere* */
+	assert(!list_empty(&mg->mountpoints));
+
+	mp = find_mountpoint(mg, mountpoint, 0);
+	if (!mp) {
+		fill_error(&mg_error, -ENOENT,
+			   "Filesystem %s is not mounted on %s", uuid,
+			   mountpoint);
+		goto out;
+	}
+
+	/* XXX Do we check kernel_mount_done? */
+
+	log_group(mg, "removing mountpoint %s", mountpoint);
+	list_del(&mp->list);
+	free(mp);
+
+	if (!list_empty(&mg->mountpoints)) {
+		log_group(mg, "mounts still remain");
+		goto out;
+	}
+
+	/*
+	 * We're clearing the last mount.  We must leave the group before
+	 * we let umount.ocfs2 complete.  Thus, we'll let
+	 * notify_mount_client() handle the rest.
+	 */
+
+	reply = 0;
+
+	/*
+	 * We shouldn't be allowing another client to connect before
+	 * we get to MG_MEMBER, but let's be safe
+	 */
+	if (!mg_statep(mg, MG_MEMBER, MG_MEMBER)) {
+		mg->group_leave_on_finish = 1;
+		goto out;
+	}
+
+	if (group_leave(gh, mg->uuid)) {
+		/* We spoke too soon! */
+		/* XXX How can a client clean this up? */
+		reply = 1;
+		fill_error(&mg_error, -errno, "Unable to leave group: %s",
+			   strerror(errno));
+	} else
+		mg->state = MG_LEAVE_SENT;
+
+out:
+	if (reply)
+		send_message(fd, CM_STATUS, mg_error.error,
+			     mg_error.error ? mg_error.error_msg : "OK");
+
+	return rc;
+}
+
+static struct mg_member *find_memb_nodeid(struct mountgroup *mg, int nodeid)
+{
+	struct list_head *p;
+	struct mg_member *memb;
+
+	list_for_each(p, &mg->members) {
+		memb = list_entry(p, struct mg_member, list);
+		if (memb->nodeid == nodeid)
+			return memb;
+	}
+	return NULL;
+}
+
+#define MEMBER_LINK_FORMAT	"/sys/kernel/config/cluster/%s/region/%s/%s"
+#define MEMBER_TARGET_FORMAT	"/sys/kernel/config/cluster/%s/node/%s"
+static int drop_member(struct mountgroup *mg, struct mg_member *memb)
+{
+	int rc;
+	char link[PATH_MAX+1];
+
+	/* 
+	 * XXX Can we just remove here, or should we wait until
+	 * do_finish()?  I think we can just remove them
+	 */
+
+	list_del(&memb->list);
+	memb->gone_event = mg->start_event_nr;
+	memb->gone_type = mg->start_type;
+	mg->memb_count--;
+
+	snprintf(link, PATH_MAX, MEMBER_LINK_FORMAT, clustername, mg->uuid,
+		 memb->name);
+	rc = rmdir(link);
+	if (rc)
+		log_error("rmdir of %s failed: %d", link, errno);
+
+	free(memb);
+
+	return rc;
+}
+
+static int add_member(struct mountgroup *mg, int nodeid)
+{
+	int rc;
+	struct list_head *p;
+	char *node_name;
+	struct mg_member *memb, *test, *target = NULL;
+	char link[PATH_MAX+1], nodepath[PATH_MAX+1];
+
+	memb = malloc(sizeof(struct mg_member));
+	if (!memb) {
+		rc = -errno;
+		goto out;
+	}
+
+	memset(memb, 0, sizeof(struct mg_member));
+	memb->nodeid = nodeid;
+	node_name = nodeid2name(nodeid);
+	if (!node_name) {
+		log_error("Unable to determine name for node %d", nodeid);
+		rc = -EINVAL;
+		goto out_free;
+	}
+
+	strncpy(memb->name, node_name, NAME_MAX);
+	mg->memb_count++;
+
+	list_for_each(p, &mg->members) {
+		test = list_entry(p, struct mg_member, list);
+		if (memb->nodeid < test->nodeid) {
+			target = test;
+			break;
+		}
+	}
+
+	snprintf(link, PATH_MAX, MEMBER_LINK_FORMAT, clustername, mg->uuid,
+		 memb->name);
+	snprintf(nodepath, PATH_MAX, MEMBER_TARGET_FORMAT, clustername,
+		 memb->name);
+
+	rc = symlink(nodepath, link);
+	if (rc) {
+		rc = -errno;
+		goto out_free;
+	}
+
+	if (target)
+		list_add_tail(&memb->list, &target->list);
+	else
+		list_add_tail(&memb->list, &mg->members);
+
+out_free:
+	if (rc)
+		free(memb);
+
+out:
+	return rc;
+}
+
+static int is_member(struct mountgroup *mg, int nodeid)
+{
+	return find_memb_nodeid(mg, nodeid) != NULL;
+}
+
+static int path_exists(const char *path)
+{
+	struct stat buf;
+
+	if (stat(path, &buf) < 0) {
+		if (errno != ENOENT)
+			log_error("%s: stat failed: %d", path, errno);
+		return 0;
+	}
+	return 1;
+}
+
+static int create_path(char *path)
+{
+	mode_t old_umask;
+	int rv;
+
+	old_umask = umask(0022);
+	rv = mkdir(path, 0777);
+	umask(old_umask);
+
+	if (rv < 0) {
+		rv = -errno;
+		log_error("%s: mkdir failed: %d", path, -rv);
+		if (-rv == EEXIST)
+			rv = 0;
+	}
+	return rv;
+}
+
+#define REGION_FORMAT "/sys/kernel/config/cluster/%s/heartbeat/%s"
+static int initialize_region(struct mountgroup *mg)
+{
+	int rc = 0;
+	char path[PATH_MAX+1];
+
+	snprintf(path, PATH_MAX, REGION_FORMAT, clustername, mg->uuid);
+
+	if (!path_exists(path)) {
+		rc = create_path(path);
+		if (rc) {
+			fill_error(mg, -rc, "Unable to create region %s",
+				   mg->uuid);
+			mg->group_leave_on_finish = 1;
+		}
+	}
+
+	return rc;
+}
+
+static int drop_region(struct mountgroup *mg)
+{
+	int rc = 0;
+	char path[PATH_MAX+1];
+
+	snprintf(path, PATH_MAX, REGION_FORMAT, clustername, mg->uuid);
+
+	if (path_exists(path)) {
+		rc = rmdir(path);
+		if (rc) {
+			rc = -errno;
+			fill_error(mg, -rc, "Unable to remove region %s",
+				   mg->uuid);
+		}
+	}
+
+	return rc;
+}
+
+static void down_members(struct mountgroup *mg, int member_count,
+			 int *nodeids)
+{
+	int found, rc, i;
+	struct list_head *p, *t;
+	struct mg_member *memb;
+
+	list_for_each_safe(p, t, &mg->members) {
+		memb = list_entry(p, struct mg_member, list);
+		found = 0;
+		for (i = 0; i < member_count; i++) {
+			if (memb->nodeid == nodeids[i]) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (found)
+			continue;
+
+		if (mg->start_type == GROUP_NODE_JOIN) {
+			log_error("down_members: Somehow we got a member gone (%d) during a JOIN!",
+				  memb->nodeid);
+			/* Continue anyway, it's gone */
+		}
+
+		rc = drop_member(mg, memb);
+		/*
+		 * I don't think we care that drop_member failed, even
+		 * during join.  Yes, we probably don't like that ocfs2
+		 * won't get a notification if drop_member() failed, but
+		 * there's nothing we can do.  Let's just ignore rc.
+		 */
+	}
+}
+
+static void up_members(struct mountgroup *mg, int member_count,
+		       int *nodeids)
+{
+	int i, rc;
+
+	for (i = 0; i < member_count; i++) {
+		if (is_member(mg, nodeids[i]))
+			continue;
+
+		if (mg->start_type == GROUP_NODE_LEAVE) {
+			log_error("up_members: Somehow we got at member added (%d) during a LEAVE!",
+				  nodeids[i]);
+			/* Continue anyway */
+		}
+
+		if ((nodeids[i] == our_nodeid) &&
+		    (!mg_statep(mg, MG_JOIN_START,
+				MG_MEMBER | MG_LEAVE_START)))
+			log_error("up_members: we got ourselves up in a join event we didn't expect! Group is %s ",
+				  mg->uuid);
+
+		rc = add_member(mg, nodeids[i]);
+		if (rc && mg_joining(mg)) {
+			fill_error(mg, -rc, "Unable to join group %s",
+				   mg->uuid);
+			mg->group_leave_on_finish = 1;
+		}
+	}
+}
+
+void do_stop(struct mountgroup *mg)
+{
+	/*
+	 * As far as I can tell, we don't have to do anything here.  Later
+	 * we might want to freeze ocfs2, but currently it handles its own
+	 * thang.
+	 */
+	log_group(mg, "do_stop() called");
+
+	group_stop_done(gh, mg->uuid);
+}
+
+/*
+ * The ocfs2 membership scheme makes this pretty simple.  We can just 
+ * go ahead and modify the group in o2cb.  After all members have called
+ * start_done, the mounting node's do_finish() can notify mount.ocfs2.
+ */
+void do_start(struct mountgroup *mg, int type, int member_count,
+	      int *nodeids)
+{
+	if (mg_statep(mg, MG_JOIN_SENT, MG_MEMBER | MG_LEAVE_SENT))
+		mg->state = MG_JOIN_START;
+	else if (mg_statep(mg, MG_LEAVE_SENT, MG_MEMBER | MG_JOIN_SENT))
+		mg->state = MG_LEAVE_START;
+
+	mg->start_event_nr = mg->last_start;
+	mg->start_type = type;
+
+	log_group(mg,
+		  "start %d state %d type %d member_count %d",
+		  mg->last_start, mg->state, type,
+		  member_count);
+
+	if (mg_joining(mg)) {
+		if (initialize_region(mg))
+			goto out;
+	}
+
+	down_members(mg, member_count, nodeids);
+	up_members(mg, member_count, nodeids);
+
+out:
+	group_start_done(gh, mg->uuid, mg->start_event_nr);
+	if (mg_statep(mg, MG_JOIN_START, MG_MEMBER | MG_LEAVE_START))
+		mg->state = MG_JOIN_START_DONE;
+	else if (mg_statep(mg, MG_LEAVE_START, MG_MEMBER | MG_JOIN_START))
+		mg->state = MG_LEAVE_START_DONE;
+}
+
+void do_finish(struct mountgroup *mg)
+{
+	log_group(mg, "finish called");
+
+	if (mg_statep(mg, MG_JOIN_START_DONE, MG_MEMBER)) {
+		mg->state = MG_JOINED;
+		if (!mg->error) {
+			assert(!mg->group_leave_on_finish);
+			notify_mount_client(mg);
+		} else {
+			/*
+			 * We had a problem joining the group.  We're going
+			 * to leave it, and we don't want to notify
+			 * mount.ocfs2 until we've done that.  It will
+			 * happen in do_terminate().
+			 */
+			assert(mg->group_leave_on_finish);
+		}
+	}
+
+	/*
+	 * We do this when we determine it's time to leave but we're
+	 * processing another node's join/leave events.  Here we know they
+	 * are done, so we can call leave.
+	 *
+	 * We trust that if a node is both (MG_MEMBER && ->leave_on_finish),
+	 * the notify_mount_client() above will have sent mount.ocfs2 an
+	 * error.
+	 */
+	if (mg_statep(mg, MG_MEMBER, MG_MEMBER) &&
+	    mg->group_leave_on_finish)  {
+		log_group(mg, "leaving group after delay for join to finish");
+		if (group_leave(gh, mg->uuid))
+			log_error("group_leave(%s) failed: %s",
+				  mg->uuid, strerror(errno));
+		else
+			mg->state = MG_LEAVE_SENT;
+		mg->group_leave_on_finish = 0;
+	}
+}
+
+void do_terminate(struct mountgroup *mg)
+{
+	log_group(mg, "termination of our unmount leave");
+
+	if (!mg_statep(mg, MG_LEAVE_START_DONE, MG_LEAVE_START_DONE))
+		log_error("terminate called from state %d for group %s",
+			  mg->state, mg->uuid);
+
+	mg->state = MG_DEAD;
+
+	if (mg->mount_client)
+		notify_mount_client(mg);
+
+	/*
+	 * A successful mount means that do_unmount() must have cleared
+	 * the mountpoint.  A failed mount means that do_mount_result()
+	 * cleared the mountpoint.  Either way, the list of mountpoints
+	 * had better be empty by the time we've left the group.
+	 */
+	assert(list_empty(&mg->mountpoints));
+
+	/*
+	 * Drop all members from our local region, as we don't care about
+	 * them anymore.
+	 */
+	down_members(mg, 0, NULL);
+	assert(list_empty(&mg->members));
+
+	if (drop_region(mg))
+		log_error("Error removing region %s", mg->uuid);
+
+	list_del(&mg->list);
+	free(mg);
+}

Added: branches/cman-based/ocfs2_controld/client_proto.c
===================================================================
--- branches/cman-based/ocfs2_controld/client_proto.c	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/client_proto.c	2007-08-08 20:45:02 UTC (rev 1374)
@@ -0,0 +1,255 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ *  This copyrighted material is made available to anyone wishing to use,
+ *  modify, copy, or redistribute it subject to the terms and conditions
+ *  of the GNU General Public License v.2.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "ocfs2_controld.h"
+
+struct client_message {
+	char *cm_command;
+	int cm_argcount;
+	char *cm_format;
+};
+
+#define BEGIN_MESSAGES(_list) struct client_message _list[] = {
+#define END_MESSAGES(_list) }; \
+	int _list##_len = sizeof(_list) / sizeof(_list[0]);
+#define DEFINE_MESSAGE(_name, _argcount, _format) [CM_##_name] = {	\
+	.cm_command = #_name, 				\
+	.cm_argcount = _argcount,			\
+	.cm_format = #_name " " _format,		\
+},
+
+BEGIN_MESSAGES(message_list)
+DEFINE_MESSAGE(MOUNT, 5, "%s %s %s %s %s")
+DEFINE_MESSAGE(MRESULT, 4, "%s %s %d %s")
+DEFINE_MESSAGE(UNMOUNT, 3, "%s %s %s")
+DEFINE_MESSAGE(STATUS, 2, "%d %s")
+END_MESSAGES(message_list)
+
+/* No short reads allowed */
+static int full_read(int fd, void *buf, size_t count)
+{
+	size_t off = 0;
+	ssize_t rc = 0;
+
+	while (off < count) {
+		rc = read(fd, buf + off, count - off);
+		if (rc == 0)
+			return -EPIPE;
+		if (rc == -1) {
+			rc = -errno;
+			if (rc == -EINTR)
+				continue;
+			break;
+		}
+		off += rc;
+		rc = 0;
+	}
+	return rc;
+}
+
+/* No short writes allowed */
+static int full_write(int fd, void *buf, size_t count)
+{
+	size_t off = 0;
+	ssize_t rc = 0;
+
+	while (off < count) {
+		rc = write(fd, buf + off, count - off);
+		if (rc == 0)
+			return -EPIPE;
+		if (rc == -1) {
+			rc = -errno;
+			if (rc == -EINTR)
+				continue;
+			break;
+		}
+		off += rc;
+		rc = 0;
+	}
+	return rc;
+}
+
+int send_message(int fd, client_message message, ...)
+{
+	int rc;
+	va_list args;
+	char mbuf[OCFS2_CONTROLD_MAXLINE];
+
+	va_start(args, message);
+	rc = vsnprintf(mbuf, OCFS2_CONTROLD_MAXLINE,
+		       message_list[message].cm_format, args);
+	va_end(args);
+	if (rc >= OCFS2_CONTROLD_MAXLINE)
+		rc = -E2BIG;
+	else
+		rc = full_write(fd, mbuf, OCFS2_CONTROLD_MAXLINE);
+
+	return rc;
+}
+
+static char *get_args(char *buf, int *argc, char **argv, char sep, int want)
+{
+	char *p = buf, *rp = NULL;
+	int i = 0;
+
+	/* Skip the first word, which is the command */
+	p = strchr(buf, sep);
+	if (!p)
+		goto out;
+	p += 1;
+	argv[0] = p;
+
+	for (i = 1; i < OCFS2_CONTROLD_MAXARGS; i++) {
+		p = strchr(p, sep);
+		if (!p) {
+			rp = p + 1;
+			break;
+		}
+
+		if (want == i)
+			break;
+
+		*p = '\0';
+		p += 1;
+		argv[i] = p;
+	}
+
+out:
+	if (argc)
+		*argc = i;
+
+	/* Terminate the list, the caller expects us to */
+	argv[i] = NULL;
+
+	/* we ended by hitting \0, return the point following that */
+	if (!rp)
+		rp = strchr(buf, '\0') + 1;
+
+	return rp;
+}
+
+int receive_message_full(int fd, client_message *message, char **argv,
+			 char **rest)
+{
+	int i, rc, len, count;
+	client_message msg;
+	char *r;
+	char mbuf[OCFS2_CONTROLD_MAXLINE];
+
+	rc = full_read(fd, mbuf, OCFS2_CONTROLD_MAXLINE);
+	if (rc)
+		goto out;
+
+	/* Safety first */
+	mbuf[OCFS2_CONTROLD_MAXLINE - 1] = '\0';
+
+
+	for (i = 0; i < message_list_len; i++) {
+		len = strlen(message_list[i].cm_command);
+		if (!strncmp(mbuf, message_list[i].cm_command, len) &&
+		    (mbuf[len] == ' '))
+			break;
+	}
+	if (i >= message_list_len) {
+		rc = -EBADMSG;
+		goto out;
+	}
+	msg = i;
+	
+	r = get_args(mbuf, &count, argv, ' ',
+		     message_list[msg].cm_argcount);
+	if (count != message_list[msg].cm_argcount) {
+		rc = -EBADMSG;
+	} else {
+		if (message)
+			*message = msg;
+		if (rest)
+			*rest = r;
+	}
+
+out:
+	return rc;
+}
+
+int receive_message(int fd, client_message *message, char **argv)
+{
+	return receive_message_full(fd, message, argv, NULL);
+}
+
+int client_listen(void)
+{
+	struct sockaddr_un addr;
+	socklen_t addrlen;
+	int rv, s;
+
+	/* we listen for new client connections on socket s */
+
+	s = socket(AF_LOCAL, SOCK_STREAM, 0);
+	if (s < 0) {
+		/* log_error("socket error %d %d", s, errno); */
+		return s;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_LOCAL;
+	strcpy(&addr.sun_path[1], OCFS2_CONTROLD_SOCK_PATH);
+	addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
+
+	rv = bind(s, (struct sockaddr *) &addr, addrlen);
+	if (rv < 0) {
+		/* log_error("bind error %d %d", rv, errno); */
+		close(s);
+		return rv;
+	}
+
+	rv = listen(s, 5);
+	if (rv < 0) {
+		/* log_error("listen error %d %d", rv, errno); */
+		close(s);
+		return rv;
+	}
+
+	/* log_debug("listen %d", s); */
+
+	return s;
+}
+
+int client_connect(void)
+{
+	struct sockaddr_un sun;
+	socklen_t addrlen;
+	int rv, fd;
+
+	fd = socket(PF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		goto out;
+
+	memset(&sun, 0, sizeof(sun));
+	sun.sun_family = AF_UNIX;
+	strcpy(&sun.sun_path[1], OCFS2_CONTROLD_SOCK_PATH);
+	addrlen = sizeof(sa_family_t) + strlen(sun.sun_path+1) + 1;
+
+	rv = connect(fd, (struct sockaddr *) &sun, addrlen);
+	if (rv < 0) {
+		close(fd);
+		fd = rv;
+	}
+ out:
+	return fd;
+}

Modified: branches/cman-based/ocfs2_controld/group.c
===================================================================
--- branches/cman-based/ocfs2_controld/group.c	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/group.c	2007-08-08 20:45:02 UTC (rev 1374)
@@ -22,7 +22,6 @@
  *  of the GNU General Public License v.2.
  */
 
-//#include "ocfs2_controld.h"
 #include "ocfs2_controld_internal.h"
 
 #define OCFS2_CONTROLD_GROUP_NAME "ocfs2"
@@ -44,12 +43,7 @@
 static int cb_member_count;
 static int cb_members[MAX_GROUP_MEMBERS];
 
-int do_stop(struct mountgroup *mg);
-int do_finish(struct mountgroup *mg);
-int do_terminate(struct mountgroup *mg);
-int do_start(struct mountgroup *mg, int type, int count, int *nodeids);
 
-
 static void stop_cbfn(group_handle_t h, void *private, char *name)
 {
 	cb_action = DO_STOP;
@@ -109,12 +103,13 @@
 
 static char *str_members(void)
 {
-	static char buf[MAXLINE];
+#define LINESIZE 1024
+	static char buf[LINESIZE];
 	int i, len = 0;
 
-	memset(buf, 0, MAXLINE);
+	memset(buf, 0, LINESIZE);
 
-	for (i = 0; i < cb_member_count; i++)
+	for (i = 0; (i < cb_member_count) && (len < LINESIZE); i++)
 		len += sprintf(buf+len, "%d ", cb_members[i]);
 	return buf;
 }

Modified: branches/cman-based/ocfs2_controld/main.c
===================================================================
--- branches/cman-based/ocfs2_controld/main.c	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/main.c	2007-08-08 20:45:02 UTC (rev 1374)
@@ -22,11 +22,11 @@
  *  of the GNU General Public License v.2.
  */
 
-//#include "ocfs2_controld.h"
+#include "ocfs2_controld.h"
 #include "ocfs2_controld_internal.h"
 
-#define OPTION_STRING			"DPhVwpl:"
-#define LOCKFILE_NAME			"/var/run/gfs_controld.pid"
+#define OPTION_STRING			"DhVw"
+#define LOCKFILE_NAME			"/var/run/ocfs2_controld.pid"
 
 #define DEFAULT_PLOCK_RATE_LIMIT 100
 
@@ -88,56 +88,7 @@
 	return 0;
 }
 
-#if 0
-static void make_args(char *buf, int *argc, char **argv, char sep)
-{
-	char *p = buf;
-	int i;
 
-	argv[0] = p;
-
-	for (i = 1; i < MAXARGS; i++) {
-		p = strchr(buf, sep);
-		if (!p)
-			break;
-		*p = '\0';
-		argv[i] = p + 1;
-		buf = p + 1;
-	}
-	*argc = i;
-}
-#endif
-
-static char *get_args(char *buf, int *argc, char **argv, char sep, int want)
-{
-	char *p = buf, *rp = NULL;
-	int i;
-
-	argv[0] = p;
-
-	for (i = 1; i < MAXARGS; i++) {
-		p = strchr(buf, sep);
-		if (!p)
-			break;
-		*p = '\0';
-
-		if (want == i) { 
-			rp = p + 1;
-			break;
-		}
-
-		argv[i] = p + 1;
-		buf = p + 1;
-	}
-	*argc = i;
-
-	/* we ended by hitting \0, return the point following that */
-	if (!rp)
-		rp = strchr(buf, '\0') + 1;
-
-	return rp;
-}
-
 static int client_add(int fd)
 {
 	int i;
@@ -186,11 +137,6 @@
 	client[ci].mg = NULL;
 }
 
-int client_send(int ci, char *buf, int len)
-{
-	return do_write(client[ci].fd, buf, len);
-}
-
 static int dump_debug(int ci)
 {
 	int len = DUMP_SIZE;
@@ -260,45 +206,63 @@
 static int process_client(int ci)
 {
 	struct mountgroup *mg;
-	char buf[MAXLINE], *argv[MAXARGS], out[MAXLINE];
-	char *cmd = NULL;
-	int argc = 0, rv, fd;
+	client_message message;
+	char out[OCFS2_CONTROLD_MAXLINE];
+	char *argv[OCFS2_CONTROLD_MAXARGS + 1];
+	int rv, fd = client[ci].fd;
 
-	memset(buf, 0, MAXLINE);
-	memset(out, 0, MAXLINE);
-	memset(argv, 0, sizeof(char *) * MAXARGS);
+	memset(out, 0, OCFS2_CONTROLD_MAXLINE);
 
-	rv = read(client[ci].fd, buf, MAXLINE);
-	if (!rv) {
+	/* receive_message ensures we have the proper number of arguments */
+	rv = receive_message(fd, &message, argv);
+	if (rv == -EPIPE) {
 		client_dead(ci);
 		return 0;
 	}
 	if (rv < 0) {
-		log_debug("client %d fd %d read error %d %d", ci,
-			   client[ci].fd, rv, errno);
+		/* XXX: Should print better errors matching our returns */
+		log_debug("client %d fd %d read error %d", ci, fd, -rv);
 		return rv;
 	}
 
-	log_debug("client %d: %s", ci, buf);
+	log_debug("client message %d: %d", ci, message);
 
-	get_args(buf, &argc, argv, ' ', 7);
-	cmd = argv[0];
-	rv = 0;
-
-	if (!strcmp(cmd, "join")) {
-		/* ci, dir (mountpoint), type (gfs/gfs2), proto (lock_dlm),
-		   table (fsname:clustername), extra (rw), dev (/dev/sda1) */
-
-		rv = do_mount(ci, argv[1], argv[2], argv[3], argv[4], argv[5],
-			      argv[6], &mg);
-		fd = client[ci].fd;
+	switch (message) {
+		case CM_MOUNT:
+		rv = do_mount(ci, fd, argv[0], argv[1], argv[2], argv[3],
+			      argv[4], &mg);
 		fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK);
 		if (!rv || rv == -EALREADY) {
 			client[ci].another_mount = rv;
 			client[ci].mg = mg;
 			mg->mount_client_fd = fd;
 		}
-		goto reply;
+		break;
+
+		case CM_MRESULT:
+		rv = do_mount_result(client[ci].mg, ci,
+				     client[ci].another_mount,
+				     argv[0], argv[1], argv[2], argv[3]);
+		break;
+
+		case CM_UNMOUNT:
+		rv = do_unmount(ci, fd, argv[0], argv[1], argv[2]);
+		if (!rv) {
+			client[ci].mg = mg;
+			mg->mount_client_fd = fd;
+		}
+		break;
+
+		case CM_STATUS:
+		log_error("Someone sent us cm_status!");
+		break;
+
+		default:
+		log_error("Invalid message received");
+		break;
+	}
+
+#if 0
 	} else if (!strcmp(cmd, "mount_result")) {
 		got_mount_result(client[ci].mg, atoi(argv[3]), ci,
 				 client[ci].another_mount);
@@ -317,58 +281,16 @@
 		rv = -EINVAL;
 		goto reply;
 	}
+#endif
 
 	return rv;
-
- reply:
-	sprintf(out, "%d", rv);
-	rv = client_send(ci, out, MAXLINE);
-	return rv;
 }
 
-static int setup_listen(void)
-{
-	struct sockaddr_un addr;
-	socklen_t addrlen;
-	int rv, s;
-
-	/* we listen for new client connections on socket s */
-
-	s = socket(AF_LOCAL, SOCK_STREAM, 0);
-	if (s < 0) {
-		log_error("socket error %d %d", s, errno);
-		return s;
-	}
-
-	memset(&addr, 0, sizeof(addr));
-	addr.sun_family = AF_LOCAL;
-	strcpy(&addr.sun_path[1], OCFS2_CONTROLD_SOCK_PATH);
-	addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
-
-	rv = bind(s, (struct sockaddr *) &addr, addrlen);
-	if (rv < 0) {
-		log_error("bind error %d %d", rv, errno);
-		close(s);
-		return rv;
-	}
-
-	rv = listen(s, 5);
-	if (rv < 0) {
-		log_error("listen error %d %d", rv, errno);
-		close(s);
-		return rv;
-	}
-
-	log_debug("listen %d", s);
-
-	return s;
-}
-
 static int loop(void)
 {
 	int rv, i, f, poll_timeout = -1;
 
-	rv = listen_fd = setup_listen();
+	rv = listen_fd = client_listen();
 	if (rv < 0)
 		goto out;
 	client_add(listen_fd);
@@ -587,7 +509,7 @@
 {
 	prog_name = argv[0];
 	INIT_LIST_HEAD(&mounts);
-	INIT_LIST_HEAD(&withdrawn_mounts);
+	/* INIT_LIST_HEAD(&withdrawn_mounts); */
 
 	decode_arguments(argc, argv);
 

Modified: branches/cman-based/ocfs2_controld/member_cman.c
===================================================================
--- branches/cman-based/ocfs2_controld/member_cman.c	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/member_cman.c	2007-08-08 20:45:02 UTC (rev 1374)
@@ -32,7 +32,7 @@
 #include <arpa/inet.h>
 #include <libcman.h>
 
-//#include "ocfs2_controld.h"
+#include "o2cb.h"
 #include "ocfs2_controld_internal.h"
 
 int			our_nodeid;
@@ -40,20 +40,108 @@
 cman_cluster_t		cluster;
 static cman_handle_t	ch;
 extern struct list_head mounts;
+static cman_node_t      old_nodes[O2NM_MAX_NODES];
+static int              old_node_count;
+static cman_node_t      cman_nodes[O2NM_MAX_NODES];
+static int              cman_node_count;
 
 
-static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
+static int is_member(cman_node_t *node_list, int count, int nodeid)
 {
-	if (reason == CMAN_REASON_TRY_SHUTDOWN) {
-		if (list_empty(&mounts))
-			cman_replyto_shutdown(ch, 1);
-		else {
-			log_debug("no to cman shutdown");
-			cman_replyto_shutdown(ch, 0);
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (node_list[i].cn_nodeid == nodeid)
+			return node_list[i].cn_member;
+	}
+	return 0;
+}
+
+static int is_old_member(int nodeid)
+{
+	return is_member(old_nodes, old_node_count, nodeid);
+}
+
+static int is_cman_member(int nodeid)
+{
+	return is_member(cman_nodes, cman_node_count, nodeid);
+}
+
+static cman_node_t *find_cman_node(int nodeid)
+{
+	int i;
+
+	for (i = 0; i < cman_node_count; i++) {
+		if (cman_nodes[i].cn_nodeid == nodeid)
+			return &cman_nodes[i];
+	}
+	return NULL;
+}
+
+char *nodeid2name(int nodeid)
+{
+	cman_node_t *cn;
+
+	cn = find_cman_node(nodeid);
+	if (!cn)
+		return NULL;
+	return cn->cn_name;
+}
+
+/* keep track of the nodes */
+static void statechange(void)
+{
+	int i, rv;
+
+	old_node_count = cman_node_count;
+	memcpy(&old_nodes, &cman_nodes, sizeof(old_nodes));
+
+	cman_node_count = 0;
+	memset(&cman_nodes, 0, sizeof(cman_nodes));
+	rv = cman_get_nodes(ch, O2NM_MAX_NODES, &cman_node_count,
+			    cman_nodes);
+	if (rv < 0) {
+		log_debug("cman_get_nodes error %d %d", rv, errno);
+		return;
+	}
+
+	for (i = 0; i < old_node_count; i++) {
+		if (old_nodes[i].cn_member &&
+		    !is_cman_member(old_nodes[i].cn_nodeid)) {
+
+			log_debug("cman: node %d removed",
+				   old_nodes[i].cn_nodeid);
 		}
 	}
+
+	for (i = 0; i < cman_node_count; i++) {
+		if (cman_nodes[i].cn_member &&
+		    !is_old_member(cman_nodes[i].cn_nodeid)) {
+
+			log_debug("cman: node %d added",
+				  cman_nodes[i].cn_nodeid);
+		}
+	}
 }
 
+static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
+{
+	switch (reason) {
+		case CMAN_REASON_TRY_SHUTDOWN:
+			if (list_empty(&mounts))
+				cman_replyto_shutdown(ch, 1);
+			else {
+				log_debug("no to cman shutdown");
+				cman_replyto_shutdown(ch, 0);
+			}
+			break;
+
+		case CMAN_REASON_STATECHANGE:
+			statechange();
+			break;
+	}
+}
+
 void exit_cman(void)
 {
 	log_error("cluster is down, exiting");
@@ -108,6 +196,15 @@
 	our_nodeid = node.cn_nodeid;
 
 	fd = cman_get_fd(ch);
+
+	old_node_count = 0;
+	memset(&old_nodes, 0, sizeof(old_nodes));
+	cman_node_count = 0;
+	memset(&cman_nodes, 0, sizeof(cman_nodes));
+
+	/* Fill the node list */
+	statechange();
+
 	return fd;
 
  fail_stop:

Added: branches/cman-based/ocfs2_controld/ocfs2_controld.h
===================================================================
--- branches/cman-based/ocfs2_controld/ocfs2_controld.h	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/ocfs2_controld.h	2007-08-08 20:45:02 UTC (rev 1374)
@@ -0,0 +1,48 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ */
+
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ *  This copyrighted material is made available to anyone wishing to use,
+ *  modify, copy, or redistribute it subject to the terms and conditions
+ *  of the GNU General Public License v.2.
+ */
+
+#ifndef __OCFS2_CONTROLD_H
+#define __OCFS2_CONTROLD_H
+
+/* Basic communication properties */
+#define OCFS2_CONTROLD_MAXLINE		256
+#define OCFS2_CONTROLD_MAXARGS		16
+#define OCFS2_CONTROLD_SOCK_PATH	"ocfs2_controld_sock"
+
+/* Client messages */
+typedef enum {
+	CM_MOUNT,
+	CM_MRESULT,
+	CM_UNMOUNT,
+	CM_STATUS,
+} client_message;
+
+int client_listen(void);
+int client_connect(void);
+int send_message(int fd, client_message message, ...);
+int receive_message(int fd, client_message *message, char **argv);
+int receive_message_full(int fd, client_message *message, char **argv,
+			 char **rest);
+
+#endif

Modified: branches/cman-based/ocfs2_controld/ocfs2_controld_internal.h
===================================================================
--- branches/cman-based/ocfs2_controld/ocfs2_controld_internal.h	2007-07-25 07:26:25 UTC (rev 1373)
+++ branches/cman-based/ocfs2_controld/ocfs2_controld_internal.h	2007-08-08 20:45:02 UTC (rev 1374)
@@ -22,8 +22,8 @@
  *  of the GNU General Public License v.2.
  */
 
-#ifndef __OCFS2_CONTROLD_H
-#define __OCFS2_CONTROLD_H
+#ifndef __OCFS2_CONTROLD_INTERNAL_H
+#define __OCFS2_CONTROLD_INTERNAL_H
 
 #include <unistd.h>
 #include <stdio.h>
@@ -39,13 +39,13 @@
 #include <asm/types.h>
 #include <sys/socket.h>
 #include <sys/poll.h>
-#include <sys/un.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/errno.h>
 #include <linux/netlink.h>
 
+#include <sys/un.h>
 #include "kernel-list.h"
 #include "libgroup.h"
 
@@ -76,6 +76,9 @@
 };
 
 extern char *prog_name;
+extern char *clustername;
+extern int our_nodeid;
+extern group_handle_t gh;
 extern int daemon_debug_opt;
 extern char daemon_debug_buf[256];
 extern char dump_buf[DUMP_SIZE];
@@ -94,18 +97,11 @@
 #define log_group(g, fmt, args...) \
 do { \
 	snprintf(daemon_debug_buf, 255, "%ld %s " fmt "\n", time(NULL), \
-		 (g)->name, ##args); \
+		 (g)->uuid, ##args); \
 	if (daemon_debug_opt) fprintf(stderr, "%s", daemon_debug_buf); \
 	daemon_dump_save(); \
 } while (0)
 
-#define log_plock(g, fmt, args...) \
-do { \
-	snprintf(daemon_debug_buf, 255, "%ld %s " fmt "\n", time(NULL), \
-		 (g)->name, ##args); \
-	if (plock_debug_opt) fprintf(stderr, "%s", daemon_debug_buf); \
-} while (0)
-
 #define log_error(fmt, args...) \
 do { \
 	log_debug(fmt, ##args); \
@@ -123,7 +119,7 @@
 
 struct mountpoint {
 	struct list_head	list;
-	char			dir[PATH_MAX+1];
+	char			mountpoint[PATH_MAX+1];
 	int			client;
 };
 
@@ -131,16 +127,14 @@
 	struct list_head	list;
 	uint32_t		id;
 	struct list_head	members;
-	struct list_head	members_gone;
 	int			memb_count;
-	struct list_head	resources; /* for plocks */
 	struct list_head	mountpoints;
 
-	char			name[MAXNAME+1];
-	char			table[MAXNAME+1];
+	char			uuid[MAXNAME+1];
+	char			cluster[MAXNAME+1];
 	char			type[5];
 	char			options[MAX_OPTIONS_LEN+1];
-	char			dev[PATH_MAX+1];
+	char			device[PATH_MAX+1];
 
 	int			last_stop;
 	int			last_start;
@@ -149,6 +143,7 @@
 	int			start_event_nr;
 	int			start_type;
 
+	int			error;
 	char			error_msg[128];
 	int			mount_client;
 	int			mount_client_fd;
@@ -156,7 +151,7 @@
 	int			mount_client_delay;
 	int                     group_leave_on_finish;
 	int			remount_client;
-	int			init;
+	int			state;
 	int			got_our_options;
 	int			got_our_journals;
 	int			delay_send_journals;
@@ -168,7 +163,6 @@
 	int			readonly;
 	int			rw;
 
-	struct list_head	saved_messages;
 	void			*start2_fn;
 };
 
@@ -200,6 +194,7 @@
 struct mg_member {
 	struct list_head	list;
 	int			nodeid;
+	char			name[NAME_MAX+1];
 
 	int			spectator;
 	int			readonly;
@@ -228,21 +223,31 @@
 
 int do_read(int fd, void *buf, size_t count);
 int do_write(int fd, void *buf, size_t count);
-struct mountgroup *find_mg(char *name);
+struct mountgroup *find_mg(const char *uuid);
 struct mountgroup *find_mg_id(uint32_t id);
+void do_stop(struct mountgroup *mg);
+void do_start(struct mountgroup *mg, int type, int member_count,
+	      int *nodeids);
+void do_finish(struct mountgroup *mg);
+void do_terminate(struct mountgroup *mg);
 
 int setup_cman(void);
 int process_cman(void);
+char *nodeid2name(int nodeid);
 int setup_groupd(void);
 int process_groupd(void);
 void exit_cman(void);
 
-int do_mount(int ci, char *dir, char *type, char *proto, char *table,
-	     char *options, char *dev, struct mountgroup **mg_ret);
-int do_unmount(int ci, char *dir, int mnterr);
+int do_mount(int ci, int fd, const char *fstype, const char *uuid,
+	     const char *cluster, const char *device,
+	     const char *mountpoint, struct mountgroup **mg_ret);
+int do_mount_result(struct mountgroup *mg, int ci, int another,
+		    const char *fstype, const char *uuid,
+		    const char *errcode, const char *mountpoint);
+int do_unmount(int ci, int fd, const char *fstype, const char *uuid,
+	       const char *mountpoint);
 int do_remount(int ci, char *dir, char *mode);
 void ping_kernel_mount(char *table);
-void got_mount_result(struct mountgroup *mg, int result, int ci, int another);
 
 int client_send(int ci, char *buf, int len);