[Ocfs2-tools-commits] khackel commits r411 - branches/dlm-glue/dlmtools

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Nov 18 16:08:02 CST 2004


Author: khackel
Date: 2004-11-18 16:08:00 -0600 (Thu, 18 Nov 2004)
New Revision: 411

Added:
   branches/dlm-glue/dlmtools/clusterbo.c
   branches/dlm-glue/dlmtools/compat_libfs.h
   branches/dlm-glue/dlmtools/dlm_compat.h
   branches/dlm-glue/dlmtools/dlm_warning_hack.h
   branches/dlm-glue/dlmtools/dlmcommon.h
   branches/dlm-glue/dlmtools/dlmhb.h
   branches/dlm-glue/dlmtools/dlmmod.h
   branches/dlm-glue/dlmtools/dlmnet.h
   branches/dlm-glue/dlmtools/dlmnm.h
   branches/dlm-glue/dlmtools/dlmtcp.h
   branches/dlm-glue/dlmtools/dlmutil.h
   branches/dlm-glue/dlmtools/mount.ocfs2.c
Modified:
   branches/dlm-glue/dlmtools/Makefile
Log:
first checkin of the two main dlm tools.  the headers here will live in an external svn project at some point, not here.

Modified: branches/dlm-glue/dlmtools/Makefile
===================================================================
--- branches/dlm-glue/dlmtools/Makefile	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/Makefile	2004-11-18 22:08:00 UTC (rev 411)
@@ -15,7 +15,7 @@
 
 LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
 
-SBIN_PROGRAMS = listuuid.ocfs2
+SBIN_PROGRAMS = listuuid.ocfs2 mount.ocfs2 clusterbo
 
 INCLUDES = -I$(TOPDIR)/libocfs2/include
 
@@ -27,4 +27,10 @@
 listuuid.ocfs2: $(OBJS)
 	$(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
 
+mount.ocfs2: mount.ocfs2.o
+	$(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
+
+clusterbo: clusterbo.o
+	$(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
+
 include $(TOPDIR)/Postamble.make

Added: branches/dlm-glue/dlmtools/clusterbo.c
===================================================================
--- branches/dlm-glue/dlmtools/clusterbo.c	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/clusterbo.c	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,379 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <asm/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#define __u8 unsigned char 
+#define u8 unsigned char 
+#define u16 unsigned short int
+#define u32 unsigned int       
+#define u64 unsigned long long
+#define atomic_t int
+#define spinlock_t unsigned long
+typedef unsigned short kdev_t;
+
+typedef struct list_head {
+        struct list_head *next, *prev;
+} list_t;
+
+
+//#include "dlm_compat.h"
+#include "dlmutil.h"
+#include "dlmcommon.h"
+#include "dlmnm.h"
+
+
+
+#define NIPQUAD(addr) \
+        ((unsigned char *)&addr)[0], \
+        ((unsigned char *)&addr)[1], \
+        ((unsigned char *)&addr)[2], \
+        ((unsigned char *)&addr)[3]
+
+
+#define CLUSTER_FILE   "/proc/cluster/nm/.cluster"
+#define GROUP_FILE     "/proc/cluster/nm/.group"
+#define NODE_FILE      "/proc/cluster/nm/.node"
+
+#define CONF_FILE   "/etc/cluster.conf"
+
+/* are these right ? */
+#define MIN_PORT_NUM   1024
+#define MAX_PORT_NUM   65535
+
+#define  NET_IOC_MAGIC          'O'
+#define  NET_IOC_ACTIVATE       _IOR(NET_IOC_MAGIC, 1, net_ioc)
+#define  NET_IOC_GETSTATE       _IOR(NET_IOC_MAGIC, 2, net_ioc)
+
+
+typedef struct _net_ioc
+{
+	unsigned int status;
+} net_ioc;
+
+
+
+
+int activate_cluster(void);
+int add_node(nm_node_info *newnode);
+int set_cluster_name(char *cluster_name);
+int activate_net(void);
+int load_module(char *module, char *mountpoint, char *fstype);
+
+
+nm_node_info *nodes;
+int total_nodes = 0;
+
+int main(int argc, char **argv)
+{
+	int ret, i;
+	FILE *conf;
+	char *cluster_name = NULL;
+
+	ret = load_module("onm", "/proc/cluster/nm", "nm");
+	if (ret) {
+		fprintf(stderr, "failed to load and/or mount nm: %d\n", ret);
+		exit(1);
+	}
+	ret = load_module("ohb", "/proc/cluster/heartbeat", "hb");
+	if (ret) {
+		fprintf(stderr, "failed to load and/or mount hb: %d\n", ret);
+		exit(1);
+	}
+	ret = load_module("otcp", NULL, NULL);
+	if (ret) {
+		fprintf(stderr, "failed to load tcp: %d\n", ret);
+		exit(1);
+	}
+
+	nodes = malloc(NM_MAX_NODES * sizeof(nm_node_info));
+	if (!nodes) {
+		fprintf(stderr, "failed to malloc node array\n");
+		exit(1);
+	}
+	memset(nodes, 0, NM_MAX_NODES * sizeof(nm_node_info));
+	
+	conf = fopen(CONF_FILE, "r");
+	if (!conf) {
+		fprintf(stderr, "failed to open %s: %s\n", CONF_FILE, strerror(errno));
+		exit(1);
+	}
+
+	ret = fscanf(conf, "cluster_name=%64as\n", &cluster_name);
+	if (ret != 1) {
+		fprintf(stderr, "bad file format: expected cluster_name=XXX\n");
+		exit(1);
+	}
+	printf("found cluster named %s\n", cluster_name);
+
+	while (1) {
+		int node_num, port;
+		char *node_name = NULL;
+		char *ip = NULL;
+		u32 real_ip;
+
+		ret = fscanf(conf, "%d,%64a[^,],%15a[0-9.],%d\n", &node_num, &node_name, &ip, &port);
+		if (ret == 0 || ret == -1) {
+			printf("done.  found %d nodes\n", total_nodes);
+			break;
+		}
+		if (ret != 4) {
+			fprintf(stderr, "bad file format: node_num,node_name,ipaddr,ipport\n");
+			exit(1);
+		}
+
+		if (node_num < 0 || node_num >= NM_MAX_NODES) {
+			fprintf(stderr, "bad node number: got %d, range is 0 - %d\n", node_num, NM_MAX_NODES-1);
+			exit(1);
+		}
+		if (nodes[node_num].node_name[0]) {
+			fprintf(stderr, "already have a node in slot %d: orig=%s, this=%s\n", 
+				node_num, nodes[node_num].node_name, node_name);
+			exit(1);
+		}
+		if (port < MIN_PORT_NUM || port > MAX_PORT_NUM) {
+			fprintf(stderr, "bad port number: got %d, range is %d - %d\n", port, MIN_PORT_NUM, MAX_PORT_NUM);
+			exit(1);
+		}
+		if (!inet_aton(ip, (struct in_addr*)&real_ip)) {
+			fprintf(stderr, "bad ipv4 address: %s\n", ip);
+			exit(1);
+		}
+
+
+		total_nodes++;
+		nodes[node_num].node_num = node_num;
+		memcpy(nodes[node_num].node_name, node_name, NM_MAX_NAME_LEN);
+		nodes[node_num].node_name[NM_MAX_NAME_LEN]=0;
+		nodes[node_num].ifaces[0].ip_port = htons(port);
+		nodes[node_num].ifaces[0].addr_u.ip_addr4 = real_ip;
+		free(ip);
+		free(node_name);
+	}
+	fclose(conf);
+	
+	set_cluster_name(cluster_name);
+	i=0;
+	while (1) {
+		if (!total_nodes--)
+			break;
+		if (!nodes[i].node_name[0]) {
+			i++;
+			continue;
+		}
+		add_node(&nodes[i]);
+		i++;
+	}
+	printf("done.  activating cluster now...\n");
+	activate_cluster();
+	printf("done.  nm ready!\n");
+	activate_net();
+	printf("done.  net ready!\n");
+	free(nodes);
+	return 0;
+}
+
+int set_cluster_name(char *cluster_name)
+{
+	int fd;
+	nm_op *op;
+	int ret;
+	char *buf;
+	
+	buf = malloc(4096);
+	op = (nm_op *)buf;
+	memset(buf, 0, 4096);
+	op->magic = NM_OP_MAGIC;
+
+	printf("setting cluster name...\n");
+	fd = open(CLUSTER_FILE, O_RDWR);
+	if (fd == -1) {
+		printf("failed to open %s\n", CLUSTER_FILE);
+		exit(1);
+	}
+	op->opcode = NM_OP_NAME_CLUSTER;
+	strcpy(&op->arg_u.name[0], cluster_name);
+
+	ret = write(fd, op, sizeof(nm_op));
+	printf("write called returned %d\n", ret);
+	if (ret < 0) {
+		printf("error is: %s\n", strerror(errno));
+		exit(1);
+	}
+	memset(buf, 0, 4096);
+	ret = read(fd, buf, 4096);
+	printf("read returned %d\n", ret);
+	if (ret < 0)
+		exit(1);
+	printf("<<<<%*s>>>>\n", ret, buf);
+	close(fd);
+	free(buf);
+	return 0;
+
+}
+
+int add_node(nm_node_info *newnode)
+{
+	int fd;
+	nm_op *op;
+	int ret;
+	char *buf;
+	nm_node_info *node;
+	
+	buf = malloc(4096);
+	op = (nm_op *)buf;
+	memset(buf, 0, 4096);
+	op->magic = NM_OP_MAGIC;
+
+
+	printf("adding cluster node....\n");
+	fd = open(CLUSTER_FILE, O_RDWR);
+	if (fd == -1) {
+		printf("failed to open %s\n", CLUSTER_FILE);
+		exit(1);
+	}
+	op->opcode = NM_OP_ADD_CLUSTER_NODE;
+	node = &(op->arg_u.node);
+	memcpy(node, newnode, sizeof(nm_node_info));
+	printf("passing port=%u, vers=%u, addr=%d.%d.%d.%d\n",
+	       node->ifaces[0].ip_port,
+	       node->ifaces[0].ip_version,
+	       NIPQUAD(node->ifaces[0].addr_u.ip_addr4));
+
+	ret = write(fd, op, sizeof(nm_op));
+	printf("write called returned %d\n", ret);
+	if (ret < 0) {
+		printf("error is: %s\n", strerror(errno));
+		exit(1);
+	}
+	memset(buf, 0, 4096);
+	ret = read(fd, buf, 4096);
+	printf("read returned %d\n", ret);
+	if (ret < 0)
+		exit(1);
+	printf("<<<<%*s>>>>\n", ret, buf);
+	close(fd);
+
+	free(buf);
+	return 0;
+
+}
+
+int activate_cluster(void)
+{
+	int fd;
+	nm_op *op;
+	int ret;
+	char *buf;
+	
+	buf = malloc(4096);
+	op = (nm_op *)buf;
+	memset(buf, 0, 4096);
+	op->magic = NM_OP_MAGIC;
+
+	printf("activating cluster....\n");
+	fd = open(CLUSTER_FILE, O_RDWR);
+	if (fd == -1) {
+		printf("failed to open %s\n", CLUSTER_FILE);
+		exit(1);
+	}
+	op->opcode = NM_OP_CREATE_CLUSTER;
+
+	ret = write(fd, op, sizeof(nm_op));
+	printf("write called returned %d\n", ret);
+	if (ret < 0) {
+		printf("error is: %s\n", strerror(errno));
+		exit(1);
+	}
+	memset(buf, 0, 4096);
+	ret = read(fd, buf, 4096);
+	printf("read returned %d\n", ret);
+	if (ret < 0)
+		exit(1);
+	printf("<<<<%*s>>>>\n", ret, buf);
+	close(fd);
+
+	free(buf);
+	return 0;
+
+}
+
+
+int activate_net(void)
+{
+	int fd;
+	net_ioc net;
+
+	memset(&net, 0, sizeof(net_ioc));
+	fd = open("/proc/cluster/net", O_RDONLY);
+	if (fd == -1) {
+		printf("eeek.  failed to open\n");
+		exit(1);
+	}
+	
+	if (ioctl(fd, NET_IOC_ACTIVATE, &net) == -1) {
+		printf("eeek.  ioctl failed\n");
+		close(fd);
+		exit(1);
+	}
+	close(fd);
+	printf("ioctl returned: %u\n", net.status);
+	return 0;
+}
+
+
+int load_module(char *module, char *mountpoint, char *fstype)
+{
+	int ret;
+	int pid;
+	int status;
+	struct stat st;
+
+	pid = fork();
+	switch (pid) {
+		case 0:
+			ret = execl("/sbin/modprobe", "/sbin/modprobe", module, (char *)NULL);
+			fprintf(stderr, "eeek! exec returned %d: %s\n", ret, strerror(errno));
+			exit(1);
+			break;
+		case -1:
+			fprintf(stderr, "fork failed: %s\n", strerror(errno));
+			return -errno;
+
+		default:
+			ret = wait(&status);
+			if (WIFEXITED(status)) {
+				if (WEXITSTATUS(status) != 0) {
+					fprintf(stderr, "modprobe returned %d!\n", WEXITSTATUS(status));
+					return -WEXITSTATUS(status);
+				}
+			} else {
+				fprintf(stderr, "modprobe has not exited!\n");
+				return -EINVAL;
+			}
+			break;
+	}
+	if (!mountpoint)
+		return 0;
+
+	if (stat("/proc/cluster", &st) != 0 ||
+	    stat(mountpoint, &st) != 0) {
+		fprintf(stderr, "mountpoint %s does not exist!\n", mountpoint);
+		return -EINVAL;
+	}
+
+	ret = mount("none", mountpoint, fstype, 0, "");
+	return ret;
+}

Added: branches/dlm-glue/dlmtools/compat_libfs.h
===================================================================
--- branches/dlm-glue/dlmtools/compat_libfs.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/compat_libfs.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,35 @@
+#ifndef COMPAT_LIBFS_H
+#define COMPAT_LIBFS_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define TA_GENERIC_SB_MEMBER(sb)  ((sb)->s_fs_info)
+#else
+#define TA_GENERIC_SB_MEMBER(sb)  ((sb)->u.generic_sbp)
+#endif
+
+
+/* an argresp is stored in an allocated page and holds the 
+ * size of the argument or response, along with its content
+ */
+struct argresp {
+	ssize_t size;
+	char data[0];
+};
+
+typedef ssize_t (TA_write_op)(struct file *, char *, size_t);
+typedef struct _TA_write_ops
+{
+	int num_ops;
+	TA_write_op *write_op[0];
+} TA_write_ops;
+
+struct tree_descr 
+{ 
+	char *name; 
+	struct file_operations *ops; 
+	int mode; 
+};
+
+int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files);
+struct dentry * simple_find_child(struct dentry *dentry, struct qstr *name);
+#endif  /* COMPAT_LIBFS_H */

Added: branches/dlm-glue/dlmtools/dlm_compat.h
===================================================================
--- branches/dlm-glue/dlmtools/dlm_compat.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlm_compat.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,120 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlm_compat.h
+ *
+ * Compatibility stuff for 2.4
+ *
+ * Copyright (C) 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version
+ * 2 of the License.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLM_COMPAT_H
+#define DLM_COMPAT_H
+
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+# include <linux/locks.h>
+#else
+# include <linux/buffer_head.h>
+#endif
+
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+#ifdef __ia64__
+extern inline void prefetch(const void *x);
+extern inline void prefetchw(const void *x);
+#else
+static inline void prefetch(const void *x);
+static inline void prefetchw(const void *x);
+#endif
+extern inline int generic_fls(int x);
+extern inline int get_bitmask_order(unsigned int count);
+/* XXX Hack to avoid warning */
+struct mem_dqinfo;
+extern inline void mark_info_dirty(struct mem_dqinfo *info);
+
+
+
+
+#define flush_scheduled_work	flush_scheduled_tasks
+#define work_struct		tq_struct
+#define INIT_WORK(w, f, d)	INIT_TQUEUE(w, f, d)
+#define schedule_work(w)	schedule_task(w)
+
+#ifdef HAVE_NPTL
+static inline void dequeue_signal_lock(struct task_struct *task,
+				       sigset_t *blocked, siginfo_t *info)
+{
+	spin_lock_irq(&task->sighand->siglock);
+	dequeue_signal(blocked, info);
+	spin_unlock_irq(&task->sighand->siglock);
+}
+#else
+static inline void dequeue_signal_lock(struct task_struct *task,
+				       sigset_t *blocked, siginfo_t *info)
+{
+	spin_lock_irq(&task->sigmask_lock);
+	dequeue_signal(blocked, info);
+	spin_unlock_irq(&task->sigmask_lock);
+}
+#endif
+#define kstatfs statfs
+
+
+
+/*
+ * Copied right out of the 2.6.2 kernel's buffer_head.h:
+ * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
+ * and buffer_foo() functions.
+ */
+#define BUFFER_FNS(bit, name)						\
+static inline void set_buffer_##name(struct buffer_head *bh)		\
+{									\
+	set_bit(BH_##bit, &(bh)->b_state);				\
+}									\
+static inline void clear_buffer_##name(struct buffer_head *bh)		\
+{									\
+	clear_bit(BH_##bit, &(bh)->b_state);				\
+}									\
+static inline int buffer_##name(struct buffer_head *bh)			\
+{									\
+	return test_bit(BH_##bit, &(bh)->b_state);			\
+}
+
+#undef buffer_uptodate
+#undef buffer_dirty
+BUFFER_FNS(Uptodate, uptodate)
+BUFFER_FNS(Dirty, dirty)
+
+#define clear_buffer_dirty  mark_buffer_clean
+
+#endif  /* LINUX_VERSION_CODE < 2.6 */
+
+
+#endif  /* OCFS_COMPAT_H */
+

Added: branches/dlm-glue/dlmtools/dlm_warning_hack.h
===================================================================
--- branches/dlm-glue/dlmtools/dlm_warning_hack.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlm_warning_hack.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlm_warning_hack.h
+ *
+ * just to get rid of stupid warnings
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLM_WARNING_HACK_H
+#define DLM_WARNING_HACK_H
+
+struct mem_dqinfo;
+struct request;
+
+extern __inline__ int generic_fls(int x);
+extern __inline__ int get_bitmask_order(unsigned int count);
+extern inline void mark_info_dirty(struct mem_dqinfo *info);
+extern inline int rq_data_dir(struct request *rq);
+	
+
+#endif /* DLM_WARNING_HACK_H */

Added: branches/dlm-glue/dlmtools/dlmcommon.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmcommon.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmcommon.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,53 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmcommon.h
+ *
+ * Common stuff
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMCOMMON_H
+#define DLMCOMMON_H
+
+#define DLM_ASSERT(x)       ({  if (!(x)) { printk("assert failed! %s:%d\n", __FILE__, __LINE__); BUG(); } })
+
+typedef struct _nm_ctxt nm_ctxt;
+typedef struct _dlm_ctxt dlm_ctxt;
+typedef struct _heartbeat_ctxt heartbeat_ctxt;
+
+#define CLUSTER_DISK_UUID_LEN      32      // 16 byte binary == 32 char hex string
+
+typedef struct _cluster_disk
+{
+	// uuid of disk
+	char uuid[CLUSTER_DISK_UUID_LEN+1];
+	// all the rest are for heartbeat
+	kdev_t dev;
+	u32 blocksize_bits;
+	u32 num_blocks;
+	u64 start_block;
+	util_rarray slots;
+} cluster_disk;
+
+
+#endif /* DLMCOMMON_H */

Added: branches/dlm-glue/dlmtools/dlmhb.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmhb.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmhb.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,130 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmhb.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMHB_H
+#define DLMHB_H
+
+
+enum {
+	HB_NODE_STATE_INIT = 0,
+	HB_NODE_STATE_DOWN,
+	HB_NODE_STATE_UP
+};
+
+struct _heartbeat_ctxt
+{
+	int dummy;
+};
+
+typedef struct _hb_disk_slot
+{
+	struct inode *inode;
+	struct buffer_head *bh;
+	struct list_head list;
+	unsigned long last_time;
+	u16 margin;
+	u16 state;
+} hb_disk_slot;
+
+
+
+#define HB_THREAD_MS                  2000   // every 2 seconds
+
+
+#define HB_OP_MAGIC      0xf00d
+enum {
+	HB_OP_START_DISK_HEARTBEAT=371,
+	HB_OP_GET_NODE_MAP
+};
+
+typedef struct _hb_op
+{
+	u16 magic;
+	u16 opcode;
+	unsigned int fd;
+	char disk_uuid[CLUSTER_DISK_UUID_LEN+1];
+	u16 group_num;
+	u32 bits;
+	u32 blocks;
+	u64 start;
+} hb_op;
+
+enum {
+	HB_TYPE_DISK = 0,
+	HB_TYPE_NET
+};
+
+
+/* callback stuff */
+
+enum {
+	HB_NODE_DOWN_CB = 0,
+	HB_NODE_UP_CB,
+	HB_NODE_RESPONDED_CB,    // this one is very chatty
+	HB_NUM_CB
+};
+
+typedef void (hb_cb_func)(void *, void *, int, void *);
+
+typedef struct _hb_callback_func
+{
+	struct list_head list;
+	hb_cb_func *func;
+	void *data;
+	int priority;
+} hb_callback_func;
+
+
+enum {
+	HB_Root = 1,
+	HB_Disk,
+	HB_WriteOpArraySize
+};
+
+typedef struct _hb_disk_heartbeat_block
+{
+	u64 time;
+} hb_disk_heartbeat_block;
+
+
+// number of initial allowed misses 
+#define HB_INITIAL_DISK_MARGIN     60
+#define HB_INITIAL_NET_MARGIN      60
+
+// number of allowed misses in steady state
+#define HB_DISK_MARGIN             30
+#define HB_NET_MARGIN              30
+
+
+int hb_unregister_callback(int type, void (*func)(void *, void *, int, void *), void *data);
+int hb_register_callback(int type, void (*func)(void *, void *, int, void *), void *data, int priority);
+int hb_fill_node_map(struct inode *group, void *map, int size);
+
+
+
+#endif /* DLMHB_H */

Added: branches/dlm-glue/dlmtools/dlmmod.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmmod.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmmod.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,418 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmmod.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMMOD_H
+#define DLMMOD_H
+
+#define DLM_HB_NODE_DOWN_PRI     (0xf000000)
+#define DLM_HB_NODE_UP_PRI       (0x8000000)  
+
+#define DLM_LVB_LEN  64
+#define DLM_LOCKID_NAME_MAX    32
+
+#define DLM_DOMAIN_NAME_MAX_LEN    255
+#define DLM_LOCK_RES_OWNER_UNKNOWN     NM_MAX_NODES
+#define DLM_THREAD_SHUFFLE_INTERVAL    5     // flush everything every 5 passes
+#define DLM_THREAD_MS                  200   // flush at least every 200 ms
+
+#define DLM_HASH_BITS     7
+#define DLM_HASH_SIZE     (1 << DLM_HASH_BITS)
+#define DLM_HASH_MASK     (DLM_HASH_SIZE - 1)
+
+typedef enum _dlm_ast_type {
+	DLM_AST = 0,
+	DLM_BAST,
+	DLM_ASTUNLOCK
+} dlm_ast_type;
+
+
+#define LKM_IVMODE      (-1)            /* invalid mode */
+#define LKM_NLMODE      0               /* null lock */
+#define LKM_CRMODE      1               /* concurrent read */    /* unsupported */
+#define LKM_CWMODE      2               /* concurrent write */    /* unsupported */
+#define LKM_PRMODE      3               /* protected read */
+#define LKM_PWMODE      4               /* protected write */    /* unsupported */
+#define LKM_EXMODE      5               /* exclusive */
+#define LKM_MAXMODE     5
+#define LKM_MODEMASK    0xff
+
+
+
+#define LKM_ORPHAN      0x10            /* this lock is orphanable */    /* unsupported */
+#define LKM_PARENTABLE  0x20            /* this lock was orphaned */    /* unsupported */
+#define LKM_BLOCK       0x40            /* blocking lock request */    /* unsupported */
+#define LKM_LOCAL       0x80            /* local lock request */    /* unsupported */
+#define LKM_VALBLK      0x100           /* lock value block request */
+#define LKM_NOQUEUE     0x200           /* non blocking request */    /* unsupported */
+#define LKM_CONVERT     0x400           /* conversion request */
+#define LKM_NODLCKWT    0x800           /* this lock wont deadlock */    /* unsupported */
+#define LKM_UNLOCK      0x1000          /* deallocate this lock */
+#define LKM_CANCEL      0x2000          /* cancel conversion request */
+#define LKM_DEQALL      0x4000          /* remove all locks held by proc */    /* unsupported */
+#define LKM_INVVALBLK   0x8000          /* invalidate lock value block */
+#define LKM_SYNCSTS     0x10000         /* return synchronous status if poss */    /* unsupported */
+#define LKM_TIMEOUT     0x20000         /* lock request contains timeout */    /* unsupported */
+#define LKM_SNGLDLCK    0x40000         /* request can self-deadlock */    /* unsupported */
+#define LKM_FINDLOCAL   0x80000         /* find local lock request */    /* unsupported */
+#define LKM_PROC_OWNED  0x100000        /* owned by process, not group */    /* unsupported */
+#define LKM_XID         0x200000        /* use transaction id for deadlock */    /* unsupported */
+#define LKM_XID_CONFLICT 0x400000       /* do not allow lock inheritance */    /* unsupported */
+#define LKM_FORCE       0x800000        /* force unlock flag */
+#define LKM_REVVALBLK   0x1000000       /* temporary solution: re-validate lock value block */    /* unsupported */
+
+typedef enum _dlm_status {
+	DLM_NORMAL,               /* request in progress */
+	DLM_GRANTED,              /* request granted */
+	DLM_DENIED,               /* request denied */
+	DLM_DENIED_NOLOCKS,       /* request denied, out of system resources */
+	DLM_WORKING,              /* async request in progress */
+	DLM_BLOCKED,              /* lock request blocked */
+	DLM_BLOCKED_ORPHAN,       /* lock request blocked by a orphan lock*/
+	DLM_DENIED_GRACE_PERIOD,  /* topological change in progress */
+	DLM_SYSERR,               /* system error */
+	DLM_NOSUPPORT,            /* unsupported */
+	DLM_CANCELGRANT,          /* can't cancel convert: already granted */
+	DLM_IVLOCKID,             /* bad lockid */
+	DLM_SYNC,                 /* synchronous request granted */
+	DLM_BADTYPE,              /* bad resource type */
+	DLM_BADRESOURCE,          /* bad resource handle */
+	DLM_MAXHANDLES,           /* no more resource handles */
+	DLM_NOCLINFO,             /* can't contact cluster manager */
+	DLM_NOLOCKMGR,            /* can't contact lock manager */
+	DLM_NOPURGED,             /* can't contact purge daemon */
+	DLM_BADARGS,              /* bad api args */
+	DLM_VOID,                 /* no status */
+	DLM_NOTQUEUED,            /* NOQUEUE was specified and request failed */
+	DLM_IVBUFLEN,             /* invalid resource name length */
+	DLM_CVTUNGRANT,           /* attempted to convert ungranted lock */
+	DLM_BADPARAM,             /* invalid lock mode specified */
+	DLM_VALNOTVALID,          /* value block has been invalidated */
+	DLM_REJECTED,             /* request rejected, unrecognized client */
+	DLM_ABORT,                /* blocked lock request cancelled */
+	DLM_CANCEL,               /* conversion request cancelled */
+	DLM_IVRESHANDLE,          /* invalid resource handle */
+	DLM_DEADLOCK,             /* deadlock recovery refused this request */
+	DLM_DENIED_NOASTS,        /* failed to allocate AST */
+	DLM_FORWARD,              /* request must wait for primary's response */
+	DLM_TIMEOUT,              /* timeout value for lock has expired */
+	DLM_IVGROUPID,            /* invalid group specification */
+	DLM_VERS_CONFLICT,        /* version conflicts prevent request handling */
+	DLM_BAD_DEVICE_PATH,      /* Locks device does not exist or path wrong */
+	DLM_NO_DEVICE_PERMISSION, /* Client has insufficient pers for device */
+	DLM_NO_CONTROL_DEVICE,    /* Cannot set options on opened device */
+	DLM_MAXSTATS,             /* upper limit for return code validation */
+	
+	DLM_RECOVERING            /* our lame addition to allow caller to fail a lock 
+				     request if it is being recovered */
+} dlm_status;
+
+
+
+typedef struct _dlm_recovery_ctxt
+{
+	struct list_head resources;
+	struct list_head received;   // list of dlm_reco_lock_infos received from other nodes during recovery
+	u16 new_master;
+	u16 dead_node;
+	u16 sending_node;
+	u32 next_seq;
+	util_thread_info thread;
+} dlm_recovery_ctxt;
+
+
+struct _dlm_ctxt
+{
+	struct list_head list;
+	struct list_head *resources;
+	struct list_head dirty_list;
+	spinlock_t spinlock;
+	struct rw_semaphore recovery_sem;
+	char *name;
+	char *net_buf;
+	util_thread_info thread;
+	struct inode *group;
+	u32 key;
+	u16 group_index;
+	u32 node_map[8];
+	u32 recovery_map[8];
+	dlm_recovery_ctxt reco;
+};
+
+#define DLM_LOCK_RES_UNINITED             0x00000001
+#define DLM_LOCK_RES_RECOVERING           0x00000002
+#define DLM_LOCK_RES_READY                0x00000004
+#define DLM_LOCK_RES_DIRTY                0x00000008
+#define DLM_LOCK_RES_IN_PROGRESS          0x00000010 
+
+typedef struct _dlm_lock_resource
+{
+	struct list_head list;
+	struct list_head granted;
+	struct list_head converting; 
+	struct list_head blocked;
+	struct list_head dirty;
+	struct list_head recovering; // dlm_recovery_ctxt.resources list
+	spinlock_t spinlock;
+	wait_queue_head_t wq;
+	u16 owner;              // node which owns the lock resource, or unknown
+	u16 state;
+	struct qstr lockname;
+	char lvb[DLM_LVB_LEN];
+} dlm_lock_resource;
+
+typedef void (dlm_astlockfunc_t)(void *);
+typedef void (dlm_bastlockfunc_t)(void *, int);
+typedef void (dlm_astunlockfunc_t)(void *, dlm_status);
+
+typedef struct _dlm_lock
+{
+	struct list_head list;
+	struct list_head ast_list;
+	dlm_lock_resource *lockres;
+	spinlock_t spinlock;
+
+	s8 type;
+	s8 convert_type;
+	s8 highest_blocked;
+	s8 reserved1;
+	u16 node;
+	u16 reserved2;
+
+	dlm_astlockfunc_t *ast;     // ast and bast must be callable while holding a spinlock!
+	dlm_bastlockfunc_t *bast;
+	void *astdata;
+	u64 cookie;
+} dlm_lock;
+
+
+typedef struct _dlm_lockstatus {
+	dlm_status status;
+	dlm_lock *lockid;
+	char lvb[DLM_LVB_LEN];
+} dlm_lockstatus;
+
+enum {
+	DLM_MLE_BLOCK,
+	DLM_MLE_MASTER
+};
+
+typedef struct _dlm_lock_name
+{
+	u8 len;
+	u8 name[0];   // [DLM_LOCKID_NAME_MAX]
+} dlm_lock_name;
+
+/* good god this needs to be trimmed down */
+typedef struct _dlm_master_list_entry
+{
+	struct list_head list;
+	dlm_ctxt *dlm;
+	spinlock_t spinlock;
+	wait_queue_head_t wq;
+	atomic_t woken;
+	atomic_t refcnt;
+	u32 maybe_map[8];
+	u32 vote_map[8];
+	u32 response_map[8];
+	u32 node_map[8];
+	u16 master;
+	u8 error;
+	u8 type;    // BLOCK or MASTER
+	union {
+		dlm_lock_resource *res;
+		dlm_lock_name name;
+	} u;
+} dlm_master_list_entry;
+
+void dlm_put_mle(dlm_master_list_entry *mle);
+static inline void dlm_get_mle(dlm_master_list_entry *mle)
+{
+	atomic_inc(&mle->refcnt);
+}
+
+
+#define DLM_MASTER_REQUEST_MSG  	500
+#define DLM_MASTER_REQUEST_RESP_MSG	501
+#define DLM_ASSERT_MASTER_MSG		502
+#define DLM_CREATE_LOCK_MSG		503
+#define DLM_CONVERT_LOCK_MSG		504
+#define DLM_PROXY_AST_MSG		505
+
+
+enum {
+	DLM_MASTER_RESP_NO,
+	DLM_MASTER_RESP_YES,
+	DLM_MASTER_RESP_MAYBE,
+	DLM_MASTER_RESP_ERROR
+};
+
+typedef struct _dlm_master_request
+{
+	u16 node_idx;
+	u8 namelen;
+	u8 name[NM_MAX_NAME_LEN];
+} dlm_master_request;
+
+typedef struct _dlm_master_request_resp
+{
+	u16 node_idx;
+	u8 response;
+	u8 namelen;
+	u8 name[NM_MAX_NAME_LEN];
+} dlm_master_request_resp;
+
+typedef struct _dlm_assert_master
+{
+	u16 node_idx;
+	u8 namelen;
+	u8 name[NM_MAX_NAME_LEN];
+} dlm_assert_master;
+
+
+
+
+
+void dlm_shuffle_lists(dlm_ctxt *dlm, dlm_lock_resource *res);
+void dlm_thread_run_lock_resources(dlm_ctxt *dlm);
+int dlm_thread(void *data);
+int dlm_launch_thread(dlm_ctxt *dlm);
+void dlm_complete_thread(dlm_ctxt *dlm);
+
+dlm_status dlmlock(dlm_ctxt *dlm, int mode, dlm_lockstatus *lksb, int flags, char *name, 
+		   dlm_astlockfunc_t *ast, void *data, dlm_bastlockfunc_t *bast);
+		   
+
+dlm_status do_dlmlock(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lockstatus *lksb,
+		      int options, int type, dlm_astlockfunc_t *ast, 
+		      dlm_bastlockfunc_t *bast, void *data);
+dlm_status dlmlock_local(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options);
+dlm_status dlmlock_remote(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options);
+
+dlm_status do_dlmconvert(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options, int type);
+dlm_status dlmconvert_local(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options, int type);
+dlm_status dlmconvert_remote(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options, int type);
+
+dlm_status dlmunlock(dlm_ctxt *dlm, dlm_lockstatus *lksb, int flags, dlm_astunlockfunc_t *unlockast, void *data);
+
+dlm_ctxt * dlm_register_domain(char *domain, char *group_name, u32 key);
+void dlm_unregister_domain(dlm_ctxt *dlm);
+dlm_lock_resource *dlm_get_lock_resource(dlm_ctxt *dlm, struct qstr *lockname);
+int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
+int dlm_refresh_lock_resource(dlm_ctxt *dlm, dlm_lock_resource *res);
+int dlm_do_ast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock);
+int dlm_do_bast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int blocked_type);
+u16 dlm_nm_this_node(dlm_ctxt *dlm);
+void dlm_kick_thread(dlm_ctxt *dlm, dlm_lock_resource *res);
+
+int dlm_nm_init(dlm_ctxt *dlm);
+int dlm_heartbeat_init(dlm_ctxt *dlm);
+
+dlm_lock_resource * dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname);
+dlm_ctxt * dlm_lookup_domain(char *domain);
+
+void dlm_hb_node_down_cb(void *ptr1, void *ptr2, int idx, void *data);
+void dlm_hb_node_up_cb(void *ptr1, void *ptr2, int idx, void *data);
+int dlm_hb_node_dead(dlm_ctxt *dlm, int node);
+int dlm_hb_node_up(dlm_ctxt *dlm, int node);
+int __dlm_hb_node_dead(dlm_ctxt *dlm, int node);
+int __dlm_hb_node_up(dlm_ctxt *dlm, int node);
+
+int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
+dlm_lock_resource * dlm_get_lock_resource(dlm_ctxt *dlm, struct qstr *lockname);
+int dlm_master_request_handler(net_msg *msg, u32 len, void *data);
+int dlm_master_request_resp_handler(net_msg *msg, u32 len, void *data);
+int dlm_assert_master_handler(net_msg *msg, u32 len, void *data);
+int dlm_do_master_request(dlm_master_list_entry *mle, int to);
+int dlm_do_master_request_resp(dlm_ctxt *dlm, struct qstr *name, int response, int to);
+int dlm_do_assert_master(dlm_master_list_entry *mle);
+void dlm_mle_node_down(void *ptr1, void *ptr2, int idx, void *data);
+void dlm_mle_node_up(void *ptr1, void *ptr2, int idx, void *data);
+
+dlm_lock_resource * __dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname);
+void dlm_init_lockres(dlm_lock_resource *res, struct qstr *lockname);
+void dlm_wait_on_lockres(dlm_lock_resource *res);
+void dlm_dump_everything(void);
+void dlm_dump_dlm(dlm_ctxt *dlm);
+
+static inline int dlm_lock_compatible(int existing, int request)
+{
+	/* NO_LOCK compatible with all */
+	if (request == LKM_NLMODE ||
+	    existing == LKM_NLMODE)
+		return 1;
+
+	/* EX incompatible with all non-NO_LOCK */
+	if (request == LKM_EXMODE)
+		return 0;
+	
+	/* request must be PR, which is compatible with PR */
+	if (existing == LKM_PRMODE)
+		return 1;
+
+	return 0;
+}
+
+static inline int dlm_lock_on_list(struct list_head *head, dlm_lock *lock)
+{
+	struct list_head *iter;
+	dlm_lock *tmplock;
+
+	list_for_each(iter, head) {
+		tmplock = list_entry(iter, dlm_lock, list);
+		if (tmplock == lock)
+			return 1;
+	}
+	return 0;
+}
+
+static inline int dlm_mle_equal(dlm_ctxt *dlm, dlm_master_list_entry *mle, struct qstr *lockname)
+{
+	dlm_lock_resource *res;
+
+	if (dlm != mle->dlm)
+		return 0;
+
+	if (mle->type == DLM_MLE_BLOCK) {
+		if (lockname->len != mle->u.name.len ||
+    	    	    strncmp(lockname->name, mle->u.name.name, lockname->len)!=0)
+			return 0;
+	} else {
+		res = mle->u.res;
+		if (res->lockname.hash != lockname->hash ||
+       	    	    res->lockname.len != lockname->len ||
+       	    	    strncmp(res->lockname.name, lockname->name, lockname->len)!=0)
+			return 0;
+	}
+	return 1;
+}
+
+
+
+
+
+#endif /* DLMMOD_H */

Added: branches/dlm-glue/dlmtools/dlmnet.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmnet.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmnet.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,80 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmnet.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMNET_H
+#define DLMNET_H
+
+#include "dlmnm.h"
+
+#define NET_MSG_MAGIC    0xbc0ffa55
+
+typedef struct _net_msg
+{
+	__u32 magic;
+	__u32 data_len;
+	__u16 src_node;
+	__u16 dst_node;
+	__u32 msg_type;
+	__u32 key;
+	__u8  buf[0];
+} net_msg;
+
+typedef int (net_msg_handler_func)(net_msg *msg, u32 len, void *data);
+
+#define NET_MSG_HANDLER_MAX_NAMELEN    32
+typedef struct _net_msg_handler
+{
+	struct list_head list;
+	char name[32];
+	u32 msg_type;
+	u32 key;
+	net_msg_handler_func *func;
+	void *data;
+	u32 expected_len;
+} net_msg_handler;
+
+/* no clue for these yet... */
+#define NET_MIN_MSG_LEN  (sizeof(net_msg))
+#define NET_MAX_MSG_LEN  (1300)    // something based on mtu i guess
+
+
+int net_send_error(struct socket *sock, void *buffer, const size_t buflen);
+int net_dispatch_message(struct inode *inode, struct socket *sock, net_msg *hdr, net_msg_handler *hnd);
+int net_dispatch(struct inode *inode, struct socket *sock, net_msg *msg);
+int net_receive(void);
+int net_send_tcp_msg (struct inode *inode, void *data, u32 packet_len);
+int net_recv_tcp_msg (void *data, u32 *packet_len);
+int net_process_message(net_msg *msg, u32 packet_len);
+net_msg * net_package_message(u32 msg_type, u32 key, void *data, u32 len, u16 from, u16 to);
+int net_register_handler(struct inode *inode, u32 msg_type, u32 key, int flags, 
+			 u32 max_len, net_msg_handler_func *func, void *data, void *buf);
+
+
+
+
+#endif /* DLMNET_H */

Added: branches/dlm-glue/dlmtools/dlmnm.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmnm.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmnm.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,253 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmnm.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMNM_H
+#define DLMNM_H
+
+
+
+struct _nm_ctxt
+{
+	int dummy;
+};
+
+#define NM_MAX_IFACES            2
+#define NM_MAX_NODES             255
+#define NM_INVALID_SLOT_NUM      255
+
+/* host name, group name, cluster name all 64 bytes */
+#define NM_MAX_NAME_LEN          64    // __NEW_UTS_LEN
+
+
+#define NM_GROUP_INODE_START    200000
+#define NM_NODE_INODE_START     100000
+
+enum {
+	NM_CLUSTER_DOWN=0,
+	NM_CLUSTER_UP
+};
+
+enum {
+	NM_GROUP_NOT_READY=0,
+	NM_GROUP_READY
+};
+
+enum {
+	NM_Root = 1,
+	NM_Cluster,
+	NM_Node,
+	NM_Group,
+};
+
+
+
+
+typedef struct _nm_network_iface
+{
+	u16 ip_port;			/* for simplicity, just define exactly one port for this if */
+	u16 ip_version;
+	union {
+		u32 ip_addr4;		/* IPv4 address in NBO */
+		u32 ip_addr6[4];	/* IPv6 address in NBO */
+	} addr_u;
+} nm_network_iface;
+
+typedef struct _nm_node_info 
+{
+	u16 node_num;
+	char node_name[NM_MAX_NAME_LEN+1];
+	nm_network_iface ifaces[NM_MAX_IFACES];
+} nm_node_info;
+
+
+typedef struct _nm_cluster
+{
+	char name[NM_MAX_NAME_LEN+1];
+	int state;
+	spinlock_t bitmap_lock;
+	u32 group_bitmap[8];
+	u32 node_bitmap[8];
+} nm_cluster;
+
+
+typedef struct _nm_group_inode_private
+{
+	struct inode *inode;
+	struct list_head net_list;
+	struct list_head disk_list;
+	cluster_disk disk;
+	int state;
+	spinlock_t bitmap_lock;
+	u32 slot_bitmap[8];
+} nm_group_inode_private;
+
+#ifdef __KERNEL__
+/* TODO: move this */
+#define NET_FLAG_CREATING_SOCKET   0x00000001
+typedef struct _net_inode_private
+{
+	struct socket *sock;
+	wait_queue_t sleep;
+	spinlock_t sock_lock;
+	struct list_head handlers;
+	struct list_head list;
+	int flags;
+} net_inode_private;
+
+typedef struct _nm_node_inode_private
+{
+	struct inode *inode;
+	nm_node_info node;
+	struct list_head ip_hash;
+	net_inode_private net;
+} nm_node_inode_private;
+#endif
+
+/* transaction file nm_op stuff */
+
+#define NM_OP_MAGIC      0xbeaf
+enum {
+	NM_OP_CREATE_CLUSTER=123,
+	NM_OP_DESTROY_CLUSTER,
+	NM_OP_NAME_CLUSTER,
+	NM_OP_ADD_CLUSTER_NODE,
+	NM_OP_GET_CLUSTER_NUM_NODES,
+	NM_OP_GET_NODE_INFO,
+	NM_OP_CREATE_GROUP,
+	NM_OP_GET_GROUP_INFO,
+	NM_OP_ADD_GROUP_NODE,
+	NM_OP_GET_GLOBAL_NODE_NUM
+};
+
+typedef struct _nm_group_change
+{
+	u16 group_num;
+	u16 node_num;
+	u16 slot_num;
+	char disk_uuid[CLUSTER_DISK_UUID_LEN+1];
+	char name[NM_MAX_NAME_LEN+1];
+} nm_group_change;
+
+typedef struct _nm_op
+{
+	u16 magic;
+	u16 opcode;
+	union {
+		u16 index;
+		char name[NM_MAX_NAME_LEN+1];
+		nm_node_info node;
+		nm_group_change gc;
+	} arg_u;
+} nm_op;
+
+
+/* callback stuff */
+
+enum {
+	NM_NODE_ADD_CB = 0,
+	NM_NODE_DEL_CB,
+	NM_GROUP_ADD_CB,
+	NM_GROUP_DEL_CB,
+	NM_GROUP_NODE_ADD_CB,
+	NM_GROUP_NODE_DEL_CB,
+	NM_NUM_CB
+};
+
+typedef void (nm_cb_func)(void *, void *, u16);
+
+typedef struct _nm_callback_func
+{
+	struct list_head list;
+	nm_cb_func *func;
+	//void (*func)(void *, void *, u16);
+} nm_callback_func;
+
+
+
+
+u16 nm_this_node(struct inode *group);
+int nm_init(struct _dlm_ctxt *dlm);
+nm_cluster * nm_get_cluster(void);
+int nm_register_callback(int type, void (*func)(void *, void *, u16));
+int nm_unregister_callback(int type, void (*func)(void *, void *, u16));
+int nm_get_group_num_nodes(struct inode *group);
+int nm_get_group_max_slots(struct inode *group);
+int nm_make_group_ready(struct inode *group);
+void * nm_iterate_group_disk_slots(struct inode *group, int *idx);
+int nm_remove_node_from_group(struct inode *group, struct inode *node);
+int nm_create_group(char *buf, nm_op *data);
+int nm_add_node_to_group(char *buf, nm_op *data);
+
+#ifdef __KERNEL__
+
+
+struct inode * nm_get_group_by_num(u16 group_num);
+struct inode * nm_get_node_by_num(u16 node_num);
+struct inode * __nm_get_node_by_name(char *node_name, int dir);
+struct inode * nm_get_node_by_ip(u32 addr);
+struct inode * nm_get_group_node_by_index(struct inode *group, u16 index);
+
+static inline struct inode * nm_get_node_by_name(char *node_name)
+{
+	return __nm_get_node_by_name(node_name, 0);
+}
+static inline struct inode * nm_get_group_by_name(char *group_name)
+{
+	return __nm_get_node_by_name(group_name, 1);
+}
+
+
+static inline int nm_get_node_global_index(struct inode *node)
+{
+	return (node->i_ino - NM_NODE_INODE_START);
+}
+static inline int nm_get_group_global_index(struct inode *group)
+{
+	return (group->i_ino - NM_GROUP_INODE_START);
+}
+#endif
+
+static inline int nm_valid_ino(int ino)
+{
+#if 0
+	// these should never be referred to in kernel
+	if (ino >= NM_Cluster && ino <= NM_Group)
+		return 1;
+#endif
+	if (ino >= NM_NODE_INODE_START &&
+	    ino < NM_NODE_INODE_START + NM_MAX_NODES)
+		return 1;
+	if (ino >= NM_GROUP_INODE_START &&
+	    ino < NM_GROUP_INODE_START + NM_MAX_NODES)
+		return 1;
+	return 0;
+}
+
+
+	
+#endif /* DLMNM_H */

Added: branches/dlm-glue/dlmtools/dlmtcp.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmtcp.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmtcp.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,239 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmtcp.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMNET_H
+#define DLMNET_H
+#include <linux/socket.h>
+#ifdef __KERNEL__
+#include <net/sock.h>
+#else
+#include <sys/socket.h>
+#endif
+#include <linux/inet.h>
+#include <linux/in.h>
+
+#include "dlmnm.h"
+
+
+#ifdef __KERNEL__
+
+#define NET_DISP_THREAD_MS   5000   /* TODO */
+#define NET_RECV_THREAD_MS   5000   /* TODO */
+
+#ifdef BIG_NET_MSG
+#define NET_MSG_MAGIC           ((u32)0xbc0ffa55)
+#define NET_MSG_STATUS_MAGIC    ((u32)0xbc0ffa56)
+#define NET_MSG_NUM_MAX         ((u64)0xffffffffffffffffULL)
+typedef struct _net_msg
+{
+	__u32 magic;
+	__u32 data_len;
+	__u16 src_node;
+	__u16 dst_node;
+	__u32 msg_type;
+	__u32 key;
+	__s32 status;
+	__u64 msg_num;
+	__u8  buf[0];
+} net_msg;
+#else
+
+#define NET_MSG_MAGIC           ((u16)0xfa55)
+#define NET_MSG_STATUS_MAGIC    ((u16)0xfa56)
+#define NET_MSG_NUM_MAX         ((u32)0xffffffffUL)
+typedef struct _net_msg
+{
+	__u16 magic;
+	__u16 data_len;
+	__u16 msg_type;
+	__s16 status;
+	__u32 key;
+	__u32 msg_num;
+	__u8  buf[0];
+} net_msg;
+
+#endif
+
+typedef int (net_msg_handler_func)(net_msg *msg, u32 len, void *data);
+
+typedef struct _net_msg_handler
+{
+	struct list_head list;
+	u32 msg_type;
+	u32 key;
+	net_msg_handler_func *func;
+	void *data;
+	net_msg hdr;
+	u32 max_len;
+	void *buf;
+	spinlock_t lock;
+	atomic_t refcnt;
+	int flags;
+} net_msg_handler;
+
+typedef struct _net_status_ctxt
+{
+	struct list_head list;
+	s32 status;
+	u64 msg_num;
+	wait_queue_head_t wq;
+	atomic_t woken;
+} net_status_ctxt;
+
+void net_do_status_return(u64 msg_num, s32 status);
+
+/* no clue for these yet... */
+#define NET_MIN_MSG_LEN  (0)
+#define NET_MAX_MSG_LEN  (8192)
+	
+
+#define NET_ALREADY_CONNECTED   2
+#define NET_UNKNOWN_HOST        3
+	
+
+static inline int net_is_valid_error_type(u32 err_type)
+{
+	if (err_type == NET_ALREADY_CONNECTED ||
+	    err_type == NET_UNKNOWN_HOST)
+		return 1;
+	return 0;
+}
+		       
+enum {
+	NET_HND_VAR_LEN = 0,
+	NET_HND_IN_USE,
+};
+
+#define net_handler_variable_len(h)   ((h)->flags & (1 << NET_HND_VAR_LEN))
+#define net_handler_in_use(h)         ((h)->flags & (1 << NET_HND_IN_USE))
+
+static inline int net_handler_msg_len_ok(net_msg_handler *handler, u32 len)
+{
+	return (net_handler_variable_len(handler) ? 
+		len > handler->max_len : len != handler->max_len);
+}
+
+
+static inline int net_ip_version_to_family(u16 ip_version)
+{
+	printk("ip_version passed: %u, host byteorder: %u\n", ip_version, ntohs(ip_version));
+	return PF_INET;
+	switch (ntohs(ip_version)) {
+		case 4:
+			return PF_INET;
+		case 6:
+			return PF_INET6;
+		default:
+			BUG();
+	}
+
+	return 4;
+}
+
+
+
+/* TODO: figure this out.... */
+static inline int net_link_down(int err, struct socket *sock)
+{
+	if (sock) {
+		if (sock->sk->state != TCP_ESTABLISHED &&
+	    	    sock->sk->state != TCP_CLOSE_WAIT)
+			return 1;
+	}
+
+	if (err >= 0)
+		return 0;
+	switch (err) {
+		/* ????????????????????????? */
+		case -ERESTARTSYS:
+		case -EBADF:
+		/* When the server has died, an ICMP port unreachable 
+		 * message prompts ECONNREFUSED. */
+		case -ECONNREFUSED:
+		case -ENOTCONN:
+		case -ECONNRESET:
+		case -EPIPE:
+			return 1;
+	}
+	return 0;
+}
+
+enum {
+	NET_DRIVER_UNINITED,
+	NET_DRIVER_READY,
+};
+
+
+int net_register_handler(u32 msg_type, u32 key, int flags, 
+			 u32 max_len, net_msg_handler_func *func, void *data, void *buf);
+net_msg * net_package_message(u32 msg_type, u32 key, void *data, u32 len);
+int net_recv_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 *packet_len);
+int net_send_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 packet_len);
+int net_send_error(struct socket *sock, u32 err_type);
+int net_init_tcp_sock(struct inode *inode);
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
+int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
+net_msg_handler * net_lookup_handler(u32 msg_type, u32 key);
+
+#endif /* __KERNEL__ */
+
+typedef struct _net_ioc
+{
+	u32 status;
+} net_ioc;
+
+typedef struct _gsd_ioc
+{
+	int fd;
+	int namelen;
+	char name[NM_MAX_NAME_LEN+1];
+	int status;
+} gsd_ioc;
+
+#define  NET_IOC_MAGIC          'O'
+#define  NET_IOC_ACTIVATE       _IOR(NET_IOC_MAGIC, 1, net_ioc)
+#define  NET_IOC_GETSTATE       _IOR(NET_IOC_MAGIC, 2, net_ioc)
+#define  GSD_IOC_CREATE_GROUP   _IOR(NET_IOC_MAGIC, 3, gsd_ioc)
+#define  GSD_IOC_ADD_GROUP_NODE _IOR(NET_IOC_MAGIC, 4, gsd_ioc)
+
+#define GSD_MESSAGE   130
+#define GSD_ACTION_ADD_GROUP        (0x01)
+#define GSD_ACTION_ADD_GROUP_NODE   (0x02)
+typedef struct _gsd_message
+{
+	u16 from;
+	u8 action;
+	u8 namelen;
+	u8 name[NM_MAX_NAME_LEN];
+} gsd_message;
+
+
+
+	
+
+#endif /* DLMNET_H */

Added: branches/dlm-glue/dlmtools/dlmutil.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmutil.h	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmutil.h	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,110 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmutil.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLM_UTIL_H
+#define DLM_UTIL_H
+
+#ifdef __KERNEL__
+#define SHUTDOWN_SIGS   (sigmask(SIGKILL) | sigmask(SIGHUP) | \
+			 sigmask(SIGINT) | sigmask(SIGQUIT))
+
+/* timeout structure taken from Ben's aio.c */
+typedef struct _util_timeout {
+	struct timer_list	timer;
+	int			timed_out;
+	wait_queue_head_t	wait;
+} util_timeout;
+
+void util_clear_timeout(util_timeout *to);
+void util_daemonize(char *name, int len, int shutdown_sigs);
+void util_init_timeout(util_timeout *to);
+void util_set_timeout(util_timeout *to, __u32 timeout);
+void util_show_stack(unsigned long *esp);
+void util_show_trace(unsigned long *stack);
+int util_sleep(__u32 ms);
+int __util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int ms);
+void util_block_sigs(sigset_t *oldsigs, unsigned long mask);
+void util_unblock_sigs(sigset_t newsig);
+
+/* exits when var == val, or on timeout */
+static inline int util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int timeout)
+{
+	int ret = 0;
+	if (atomic_read(var) != val)
+		ret = __util_wait_atomic_eq(wq, var, val, timeout);
+	return ret;
+}
+
+#endif  /* __KERNEL__ */
+
+/* resizable array */
+typedef struct _util_rarray
+{
+	void *page;
+	u16 elements;
+	u16 max_elem;
+	u16 elem_size;
+	u16 reserved1;
+} util_rarray;
+
+#define UTIL_RARRAY_PAGE_BUF_SIZE    (PAGE_SIZE - offsetof(util_rarray_page, buf))
+#define UTIL_RARRAY_ELEM_PER_BUF(r)  ((UTIL_RARRAY_PAGE_BUF_SIZE) / (r)->elem_size)
+typedef struct _util_rarray_page
+{
+	void *next;
+	char buf[0];
+} util_rarray_page;
+
+void util_init_rarray(util_rarray *arr, u16 elem_size);
+void * util_get_new_rarray_slot(util_rarray *arr, int *index);
+int util_add_to_rarray(util_rarray *arr, void *new);
+void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
+int util_resize_rarray(util_rarray *arr, int newelem);
+
+#ifdef __KERNEL__
+typedef struct _util_thread_info
+{
+	wait_queue_head_t thread_wq;
+	atomic_t woken;
+	struct task_struct *task;
+	struct completion complete;
+	int pid;
+} util_thread_info;
+
+
+static inline void util_thread_info_init(util_thread_info *info)
+{
+	init_waitqueue_head(&info->thread_wq);
+	atomic_set(&info->woken, 0);
+	info->task = NULL;
+	info->pid = -1;
+	init_completion(&info->complete);
+}
+#endif /* __KERNEL__ */
+
+#endif /* DLM_UTIL_H */

Added: branches/dlm-glue/dlmtools/mount.ocfs2.c
===================================================================
--- branches/dlm-glue/dlmtools/mount.ocfs2.c	2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/mount.ocfs2.c	2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,816 @@
+/*
+ * listuuid.c
+ *
+ * Lists UUIDs of all the devices
+ *
+ * Copyright (C) 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Sunil Mushran
+ */
+
+
+#define _LARGEFILE64_SOURCE
+#define _GNU_SOURCE /* Because libc really doesn't want us using O_DIRECT? */
+
+#include <sys/types.h>
+#include <asm/types.h>
+
+#define u8   __u8
+#define s8   __s8
+#define u16  __u16
+#define s16  __s16
+#define u32  __u32
+#define s32  __s32
+#define u64  __u64
+#define s64  __s64
+#define atomic_t int
+#define spinlock_t unsigned long
+typedef unsigned short kdev_t;
+
+
+
+
+#include <asm/page.h>
+#include <sys/mount.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fd.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <asm/bitops.h>
+
+#define  OCFS2_FLAT_INCLUDES	1
+#include <ocfs2.h>
+#include <ocfs2_fs.h>
+#include <ocfs2_disk_dlm.h>
+#include <ocfs1_fs_compat.h>
+#include <kernel-list.h>
+
+
+#include "dlmutil.h"
+#include "dlmcommon.h"
+#include "dlmnm.h"
+#include "dlmhb.h"
+#include "dlmtcp.h"
+
+
+
+#define CLUSTER_FILE   "/proc/cluster/nm/.cluster"
+#define GROUP_FILE     "/proc/cluster/nm/.group"
+#define NODE_FILE      "/proc/cluster/nm/.node"
+#define HEARTBEAT_DISK_FILE "/proc/cluster/heartbeat/.disk"
+
+
+int create_remote_group(char *group_name, __u16 node);
+int get_node_map(__u16 group_num, char *bitmap);
+int get_ocfs2_disk_hb_params(char *group_dev, __u32 *block_bits, __u32 *cluster_bits,
+			     __u64 *start_block, __u32 *num_clusters);
+int activate_group(char *group_name, char *group_dev, __u16 group_num, 
+		   __u32 block_bits, __u64 num_blocks, __u64 start_block);
+int add_to_local_group(char *uuid, __u16 group_num, __u16 node_num);
+int create_group(char *uuid, __u16 *group_num);
+int get_my_nodenum(__u16 *nodenum);
+int add_me_to_group(char *groupname, char *groupdev);
+static errcode_t ocfs2_partition_list (struct list_head *dev_list);
+static void ocfs2_partition_list_destroy (struct list_head *dev_list);
+static errcode_t ocfs2_detect_all(struct list_head *dev_list);
+static int read_options(int argc, char **argv, char **hbuuid, char **hbdev, char **device, char **mp);
+
+char *op_buf = NULL;
+
+
+
+/*
+ * ocfs2_partition_list()
+ *
+ */
+static errcode_t ocfs2_partition_list (struct list_head *dev_list)
+{
+	errcode_t ret = 0;
+	FILE *proc;
+	char line[256];
+	char name[256];
+	ocfs2_devices *dev;
+
+	proc = fopen ("/proc/partitions", "r");
+	if (proc == NULL) {
+		ret = OCFS2_ET_IO;
+		goto bail;
+	}
+
+	while (fgets (line, sizeof(line), proc) != NULL) {
+		if (sscanf(line, "%*d %*d %*d %99[^ \t\n]", name) != 1)
+			continue;
+
+		ret = ocfs2_malloc0(sizeof(ocfs2_devices), &dev);
+		if (ret)
+			goto bail;
+
+		snprintf(dev->dev_name, sizeof(dev->dev_name), "/dev/%s", name);
+		list_add_tail(&(dev->list), dev_list);
+	}
+
+bail:
+	if (proc)
+		fclose(proc);
+
+	return ret;
+}
+
+
+/*
+ * ocfs2_partition_list_destroy()
+ *
+ */
+static void ocfs2_partition_list_destroy (struct list_head *dev_list)
+{
+	struct list_head *iter, *tmpiter;
+	ocfs2_devices *dev;
+
+	list_for_each_safe(iter, tmpiter, dev_list) {
+		dev = list_entry(iter, ocfs2_devices, list);
+		list_del(&dev->list);
+		free(dev);
+	}
+}
+
+
+/*
+ * ocfs2_detect_all()
+ *
+ */
+static errcode_t ocfs2_detect_all(struct list_head *dev_list)
+{
+	errcode_t ret = 0;
+	struct list_head *pos1;
+	ocfs2_devices *dev;
+	ocfs2_filesys *fs = NULL;
+	char *dev_name;
+
+	ret = ocfs2_partition_list(dev_list);
+	if (ret) {
+		com_err("mount.ocfs2", ret, "while reading /proc/partitions");
+		return ret;
+	}
+
+	list_for_each(pos1, dev_list) {
+		dev = list_entry(pos1, ocfs2_devices, list);
+		dev_name = dev->dev_name;
+
+		/* open	fs */
+		fs = NULL;
+		ret = ocfs2_open(dev_name, OCFS2_FLAG_RO, 0, 0, &fs);
+		if (ret) {
+			dev->fs_type = 0;
+			ret = 0;
+			continue;
+		} else
+			dev->fs_type = 2;
+
+		/* get uuid for ocfs2 */
+		if (dev->fs_type == 2) {
+			memcpy(dev->uuid, OCFS2_RAW_SB(fs->fs_super)->s_uuid,
+			       sizeof(dev->uuid));
+		}
+
+		/* close fs */
+		ocfs2_close(fs);
+	}
+	return ret;
+}
+
+
+/*
+ * read_options()
+ *
+ */
+static int read_options(int argc, char **argv, char **hbuuid, char **hbdev, char **device, char **mp)
+{
+	int ret = 0;
+	int c;
+	char *tmp, *tmp2;
+
+	if (argc < 2) {
+		ret = 1;
+		goto bail;
+	}
+
+	while(1) {
+		c = getopt(argc, argv, "o:");
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'o':	/* options */
+			ret = 1;
+			tmp = optarg;
+			while (*tmp) {
+				if (strncasecmp(tmp, "hbuuid=", strlen("hbuuid="))==0) {
+					tmp += strlen("hbuuid=");
+					tmp2 = strchr(tmp, ',');
+					
+					/* uuid should be exactly 32 hex chars */
+					if (tmp2) {
+						if (tmp2-tmp != 32)
+							goto bail;
+					} else {
+						if (strlen(tmp) != 32)
+							goto bail;
+					}
+					*hbuuid = strndup(tmp, 32);
+				} else if (strncasecmp(tmp, "hbdev=", strlen("hbdev="))==0) {
+					tmp += strlen("hbdev=");
+					tmp2 = strchr(tmp, ',');
+					if (!tmp2)
+						tmp2 = tmp + strlen(tmp);
+					if (tmp2-tmp <= 0)
+						goto bail;
+					*hbdev = strndup(tmp, tmp2-tmp);
+				} else {
+					/* invalid option */
+					goto bail;
+				}
+				tmp = tmp2;
+				if (!tmp || !*tmp)
+					break;
+				tmp++;
+			}
+			ret = 0;
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	if (!ret) {
+		ret = 1;
+		/* need device and mountpoint */
+		if (optind+1 < argc && argv[optind] && argv[optind+1]) {
+			*device = strdup(argv[optind]);
+			*mp = strdup(argv[optind+1]);
+			ret = 0;
+		}
+	}
+
+bail:
+	return ret;
+}
+
+
+/*
+ * main()
+ *
+ */
+int main(int argc, char **argv)
+{
+	char *device = NULL, *mountpoint = NULL;
+	char *hbdev = NULL, *hbuuid = NULL;
+	errcode_t ret = 0;
+	struct list_head dev_list;
+	char uuid[40];
+	ocfs2_devices *dev;
+	struct list_head *iter;
+	char *p;
+	int i, found = 0;
+	unsigned long flags = 0;
+	char *args = NULL;
+
+	INIT_LIST_HEAD(&dev_list);
+
+	op_buf = malloc(PAGE_SIZE);
+	if (!op_buf) {
+		ret = 1;
+		goto bail;
+	}
+
+	ret = read_options (argc, argv, &hbuuid, &hbdev, &device, &mountpoint);
+	if (ret)
+		goto bail;
+	
+	ret = 1;
+	if (!device)
+		goto bail;
+
+	/* provide nothing or one, not both */
+	if (hbuuid && hbdev)
+		goto bail;
+
+	ret = ocfs2_detect_all(&dev_list);
+	if (ret)
+		goto bail;
+
+	found = 0;
+	/* sort out which device is to be used for heartbeat */
+	if (hbuuid) {
+		list_for_each(iter, &dev_list) {
+			dev = list_entry(iter, ocfs2_devices, list);
+			if (dev->fs_type != 2)
+				continue;
+
+			memset(uuid, 0, sizeof(uuid));
+			for (i = 0, p = uuid; i < 16; i++, p += 2)
+				sprintf(p, "%02X", dev->uuid[i]);
+			if (strncasecmp(uuid, hbuuid, 32)==0) {
+				/* found it */
+				strncpy(hbuuid, uuid, 32);  // normalized uuid
+				hbdev = strdup(dev->dev_name);
+				found = 1;
+				break;
+			}
+		}
+	} else {
+		char *look_for = (hbdev ? hbdev : device);
+		
+		/* broken for now.  have to give exact dev name in /proc/partitions */
+		list_for_each(iter, &dev_list) {
+			dev = list_entry(iter, ocfs2_devices, list);
+			if (dev->fs_type != 2)
+				continue;
+
+			if (strcasecmp(dev->dev_name, look_for)==0) {
+				/* found it */
+				if (!hbdev)
+					hbdev = strdup(dev->dev_name);
+				else
+					strcpy(hbdev, dev->dev_name);
+				hbuuid = malloc(33);
+				memset(hbuuid, 0, 33);
+				for (i = 0, p = hbuuid; i < 16; i++, p += 2)
+					sprintf(p, "%02X", dev->uuid[i]);
+				found = 1;
+				break;
+			}
+		}
+	}
+	if (!found) {
+		ret = 1;
+		goto bail;
+	}
+
+	/* all three should now have valid values */
+	printf("device=%s hbuuid=%s hbdev=%s\n", device, hbuuid, hbdev);
+
+	ret = add_me_to_group(hbuuid, hbdev);
+	if (ret < 0)
+		goto bail;
+
+	args = malloc(strlen(hbuuid) + strlen("group=") + 1);
+	if (!args)
+		goto bail;
+	sprintf(args, "group=%s", hbuuid);
+	flags = 0;
+	ret = mount(device, mountpoint, "ocfs2", flags, args);
+
+bail:
+	if (args)
+		free(args);
+	if (device)
+		free(device);
+	if (mountpoint)
+		free(mountpoint);
+	if (hbdev)
+		free(hbdev);
+	if (hbuuid)
+		free(hbuuid);
+
+	ocfs2_partition_list_destroy (&dev_list);
+
+	if (op_buf)
+		free(op_buf);
+
+	return ret;
+}
+
+
+
+/*
+ * this will try to add the group (and the node to the group)
+ * for every mount.  luckily, there are many shortcut paths
+ * along the way, so checking for -EEXIST will save time.
+ */
+int add_me_to_group(char *groupname, char *groupdev)
+{
+	int ret;
+	__u16 my_nodenum, groupnum;
+	__u32 pre_nodemap[] = {0, 0, 0, 0, 0, 0, 0, 0};
+	__u32 post_nodemap[] = {0, 0, 0, 0, 0, 0, 0, 0};
+	int start, next, i;
+	__u32 block_bits, cluster_bits, num_clusters;
+	__u64 start_block, num_blocks;
+
+	/* either create the group or find that it already exists */
+	ret = get_my_nodenum(&my_nodenum);
+	if (ret < 0)
+		return ret;
+
+	ret = get_ocfs2_disk_hb_params(groupdev, &block_bits, &cluster_bits, 
+				       &start_block, &num_clusters);
+	if (ret < 0)
+		return ret;
+
+	num_blocks = num_clusters << cluster_bits;
+	num_blocks >>= block_bits;
+	
+	ret = create_group(groupname, &groupnum);
+	if (ret != -EEXIST && ret != 0)
+		return ret;
+
+	ret = activate_group(groupname, groupdev, groupnum, block_bits, num_blocks, start_block);
+	if (ret < 0)
+		return ret;
+
+	ret = add_to_local_group(groupname, groupnum, my_nodenum);
+	if (ret != -EEXIST && ret != 0)
+		return ret;
+
+	/* at this point my node is heartbeating, so any other nodes 
+	 * joining right now must communicate with me */
+
+	/* TODO: how much to sleep? */
+	sleep(1);
+	ret = get_node_map(groupnum, (char *)pre_nodemap);
+	if (ret < 0)
+		return ret;
+
+again:
+	/* go create this group and add this node on every other node I see */	
+	start = 0;
+	while (1) {
+		next = find_next_bit((unsigned long *)pre_nodemap, NM_MAX_NODES, start);
+		if (next >= NM_MAX_NODES) {
+			break;
+		}
+		if (next != my_nodenum) {
+			ret = create_remote_group(groupname, next);
+			if (ret != 0 && ret != -EEXIST) {
+				break;
+			}
+		}
+		start = next + 1;
+	}
+	if (ret != 0 && ret != -EEXIST)
+		return ret;
+
+	/* grab the nodemap again and look for changes */
+	ret = get_node_map(groupnum, (char *)post_nodemap);
+	if (ret < 0)
+		return ret;
+
+	if (memcmp(pre_nodemap, post_nodemap, sizeof(pre_nodemap)) == 0) {
+		/* nothing changed.  we are DONE! */
+		return 0;
+	}
+		
+	/* something changed */
+	for (i=0; i<8; i++) {
+		post_nodemap[i] &= ~pre_nodemap[i];
+		pre_nodemap[i] = post_nodemap[i];
+		post_nodemap[i] = 0;
+	}
+
+	/* keep going while there are still nodes to contact */
+	if (find_next_bit((unsigned long *)pre_nodemap, NM_MAX_NODES, 0) < NM_MAX_NODES)
+		goto again;
+
+	return 0;
+}
+
+int get_my_nodenum(__u16 *nodenum)
+{
+	FILE *file;
+	int ret = -EINVAL;
+	int retval=-EINVAL, num;
+	nm_op *op = (nm_op *)op_buf;
+
+	memset(op_buf, 0, PAGE_SIZE);
+	op->magic = NM_OP_MAGIC;
+	op->opcode = NM_OP_GET_GLOBAL_NODE_NUM;
+	
+	*nodenum = 255;
+
+	file = fopen(CLUSTER_FILE, "r+");
+	if (!file)
+		return -errno;
+	if (fwrite((char *)op, sizeof(nm_op), 1, file) != 1)
+		goto done;
+	if (fscanf(file, "%d: %d", &retval, &num) != 2 ||
+	    retval != 0 || num < 0 || num > 255) {
+		ret = -EINVAL;
+		goto done;
+	}
+	*nodenum = num;
+	ret = 0;	
+done:	
+	fclose(file);
+	return ret;
+}
+
+int create_group(char *uuid, __u16 *group_num)
+{
+	FILE *file;
+	int ret = -EINVAL, retval;
+	int groupnum = NM_INVALID_SLOT_NUM;
+	nm_op *op = (nm_op *)op_buf;
+	struct stat st;
+	char fname[100];
+
+	if (strlen(uuid) != CLUSTER_DISK_UUID_LEN)
+		return -EINVAL;
+
+	sprintf(fname, "/proc/cluster/nm/%s", uuid);
+	if (stat(fname, &st) == 0) {
+		*group_num = st.st_ino - NM_GROUP_INODE_START;
+		return -EEXIST;
+	}
+	
+	*group_num = NM_INVALID_SLOT_NUM;
+
+	memset(op_buf, 0, PAGE_SIZE);
+	op->magic = NM_OP_MAGIC;
+	op->opcode = NM_OP_CREATE_GROUP;
+
+	op->arg_u.gc.group_num = NM_INVALID_SLOT_NUM;
+	strcpy(op->arg_u.gc.name, uuid);
+	strcpy(op->arg_u.gc.disk_uuid, uuid);
+
+	file = fopen(CLUSTER_FILE, "r+");
+	if (!file)
+		return -errno;
+	
+	if (fwrite((char *)op, sizeof(nm_op), 1, file) != 1)
+		goto done;
+
+	if (fscanf(file, "%d: group %d", &retval, &groupnum) != 2) {
+		ret = -EINVAL;
+		goto done;
+	}
+	ret = retval;
+	if ((ret == 0 || ret == -EEXIST) &&
+	    groupnum >= 0 && groupnum < NM_INVALID_SLOT_NUM)
+		*group_num = groupnum;
+		
+done:	
+	fclose(file);
+	return ret;
+}
+
+
+int add_to_local_group(char *uuid, __u16 group_num, __u16 node_num)
+{
+	FILE *file;
+	int ret = -EINVAL, retval;
+	nm_op *op = (nm_op *)op_buf;
+	char fname[100];
+	DIR *dir;
+	struct dirent *de;
+	
+	if (strlen(uuid) != CLUSTER_DISK_UUID_LEN)
+		return -EINVAL;
+
+	sprintf(fname, "/proc/cluster/nm/%s", uuid);
+	dir = opendir(fname);
+	if (dir) {
+		while ((de = readdir(dir)) != NULL) {
+			if (de->d_ino - NM_NODE_INODE_START == node_num) {
+				closedir(dir);
+				return -EEXIST;
+			}
+		}
+		closedir(dir);
+	}
+
+	memset(op_buf, 0, PAGE_SIZE);
+	op->magic = NM_OP_MAGIC;
+	op->opcode = NM_OP_ADD_GROUP_NODE;
+	op->arg_u.gc.group_num = group_num;
+	op->arg_u.gc.node_num = node_num;
+	op->arg_u.gc.slot_num = node_num;
+
+	file = fopen(GROUP_FILE, "r+");
+	if (!file)
+		return -errno;
+	
+	if (fwrite((char *)op, sizeof(nm_op), 1, file) != 1)
+		goto done;
+
+	if (fscanf(file, "%d: node", &retval) != 1) {
+		ret = -EINVAL;
+		goto done;
+	}
+	ret = retval;
+		
+done:	
+	fclose(file);
+	return ret;
+}
+
+int activate_group(char *group_name, char *group_dev, __u16 group_num, 
+		   __u32 block_bits, __u64 num_blocks, __u64 start_block)
+{
+	int dev_fd = -1;
+	int ret = -EINVAL, retval;
+	FILE *file;
+	hb_op *op;
+
+	printf("starting disk heartbeat...\n");
+	
+	memset(op_buf, 0, PAGE_SIZE);
+	op = (hb_op *)op_buf;
+	op->magic = HB_OP_MAGIC;
+	op->opcode = HB_OP_START_DISK_HEARTBEAT;
+	op->group_num = group_num;
+	strcpy(op->disk_uuid, group_name);
+	op->bits = block_bits;
+	op->blocks = num_blocks;
+	op->start = start_block;
+	
+	dev_fd = open(group_dev, O_RDWR);
+	if (dev_fd == -1)
+		return -errno;
+	op->fd = dev_fd;
+
+	file = fopen(HEARTBEAT_DISK_FILE, "r+");
+	if (!file)
+		return -errno;
+	
+	if (fwrite((char *)op, sizeof(hb_op), 1, file) != 1)
+		goto done;
+
+	if (fscanf(file, "%d: ", &retval) != 1) {
+		ret = -EINVAL;
+		goto done;
+	}
+	ret = 0;
+done:
+	/* hb will keep its own ref */
+	if (dev_fd != -1)
+		close(dev_fd);
+
+	fclose(file);
+	return 0;
+}
+
+
+int get_ocfs2_disk_hb_params(char *group_dev, __u32 *block_bits, __u32 *cluster_bits,
+			     __u64 *start_block, __u32 *num_clusters)
+{
+	int status = -EINVAL;
+	errcode_t ret = 0;
+	uint64_t blkno;
+	char *buf = NULL;
+	ocfs2_dinode *di;
+	ocfs2_extent_rec *rec;
+	ocfs2_filesys *fs = NULL;
+
+	ret = ocfs2_open(group_dev, OCFS2_FLAG_RO, 0, 0, &fs);
+	if (ret)
+		return status;
+
+	ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, 
+                   ocfs2_system_inode_names[DLM_SYSTEM_INODE], 
+                   strlen(ocfs2_system_inode_names[DLM_SYSTEM_INODE]),
+                   NULL, &blkno);
+	if (ret)
+		goto leave;
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto leave;
+	
+	ret = ocfs2_read_inode(fs, blkno, buf);
+	if (ret)
+		goto leave;
+
+	di = (ocfs2_dinode *)buf;
+	if (di->id2.i_list.l_tree_depth || 
+	    di->id2.i_list.l_next_free_rec != 1) {
+		goto leave;
+	}
+	rec = &(di->id2.i_list.l_recs[0]);
+	
+	*block_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+	*cluster_bits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits;
+	*start_block = rec->e_blkno;
+	*num_clusters = rec->e_clusters;
+	status = 0;
+
+leave:
+	if (buf)
+		ocfs2_free(&buf);
+	if (fs)
+		ocfs2_close(fs);
+	return status;
+}
+
+int get_node_map(__u16 group_num, char *bitmap)
+{
+	FILE *file = NULL;
+	hb_op *op;
+	int ret = -EINVAL;
+	int retval;
+	
+	printf("getting node map...\n");
+	
+	memset(op_buf, 0, PAGE_SIZE);
+	op = (hb_op *)op_buf;
+	op->magic = HB_OP_MAGIC;
+	op->opcode = HB_OP_GET_NODE_MAP;
+	op->group_num = group_num;
+	
+	file = fopen(HEARTBEAT_DISK_FILE, "r+");
+	if (!file)
+		return -errno;
+	
+	if (fwrite((char *)op, sizeof(hb_op), 1, file) != 1)
+		goto done;
+
+	if (fscanf(file, "%d: ", &retval) != 1) {
+		ret = -EINVAL;
+		goto done;
+	}
+	if (retval != 0) {
+		ret = retval;
+		goto done;
+	}
+	if (fread(bitmap, 1, (NM_MAX_NODES+7)/8, file) < (NM_MAX_NODES+7)/8) {
+		ret = -EINVAL;
+		goto done;
+	}
+	ret = 0;
+done:
+	fclose(file);
+	return ret;
+}
+
+int create_remote_group(char *group_name, __u16 node)
+{
+	int ret, fd = -1, remote_node = -1;
+	gsd_ioc ioc;
+	char fname[100];
+	
+	sprintf(fname, "/proc/cluster/nm/%s/%03u", group_name, node);
+
+	/* open a file descriptor to the node we want to talk to */
+	remote_node = open(fname, O_RDONLY);
+	if (remote_node == -1) {
+		ret = -errno;
+		goto leave;
+	}
+
+	/* TODO: move this over to a transaction file on the inode, eliminate the ioctl */
+	fd = open("/proc/cluster/net", O_RDONLY);
+	if (fd == -1) {
+		ret = -errno;
+		goto leave;
+	}
+
+	/* call an ioctl to create the group over there */
+	memset(&ioc, 0, sizeof(gsd_ioc));
+	ioc.fd = remote_node;
+	ioc.namelen = strlen(group_name);
+	memcpy(ioc.name, group_name, ioc.namelen);
+	if (ioctl(fd, GSD_IOC_CREATE_GROUP, &ioc) < 0) {
+		ret = -errno;
+		goto leave;
+	}
+	ret = ioc.status;
+
+	if (ret != 0 && ret != -EEXIST)
+		goto leave;
+	
+	/* call an ioctl to add this node to the group over there */
+	memset(&ioc, 0, sizeof(gsd_ioc));
+	ioc.fd = remote_node;
+	ioc.namelen = strlen(group_name);
+	memcpy(ioc.name, group_name, ioc.namelen);
+	if (ioctl(fd, GSD_IOC_ADD_GROUP_NODE, &ioc) < 0) {
+		ret = -errno;
+		goto leave;
+	}
+	ret = ioc.status;
+
+leave:
+	if (fd != -1)
+		close(fd);
+	if (remote_node != -1)
+		close(remote_node);
+	return ret;
+}



More information about the Ocfs2-tools-commits mailing list