[Ocfs2-tools-commits] khackel commits r411 -
branches/dlm-glue/dlmtools
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Nov 18 16:08:02 CST 2004
Author: khackel
Date: 2004-11-18 16:08:00 -0600 (Thu, 18 Nov 2004)
New Revision: 411
Added:
branches/dlm-glue/dlmtools/clusterbo.c
branches/dlm-glue/dlmtools/compat_libfs.h
branches/dlm-glue/dlmtools/dlm_compat.h
branches/dlm-glue/dlmtools/dlm_warning_hack.h
branches/dlm-glue/dlmtools/dlmcommon.h
branches/dlm-glue/dlmtools/dlmhb.h
branches/dlm-glue/dlmtools/dlmmod.h
branches/dlm-glue/dlmtools/dlmnet.h
branches/dlm-glue/dlmtools/dlmnm.h
branches/dlm-glue/dlmtools/dlmtcp.h
branches/dlm-glue/dlmtools/dlmutil.h
branches/dlm-glue/dlmtools/mount.ocfs2.c
Modified:
branches/dlm-glue/dlmtools/Makefile
Log:
first checkin of the two main dlm tools. the headers here will live in an external svn project at some point, not here.
Modified: branches/dlm-glue/dlmtools/Makefile
===================================================================
--- branches/dlm-glue/dlmtools/Makefile 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/Makefile 2004-11-18 22:08:00 UTC (rev 411)
@@ -15,7 +15,7 @@
LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
-SBIN_PROGRAMS = listuuid.ocfs2
+SBIN_PROGRAMS = listuuid.ocfs2 mount.ocfs2 clusterbo
INCLUDES = -I$(TOPDIR)/libocfs2/include
@@ -27,4 +27,10 @@
listuuid.ocfs2: $(OBJS)
$(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
+mount.ocfs2: mount.ocfs2.o
+ $(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
+
+clusterbo: clusterbo.o
+ $(LINK) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS)
+
include $(TOPDIR)/Postamble.make
Added: branches/dlm-glue/dlmtools/clusterbo.c
===================================================================
--- branches/dlm-glue/dlmtools/clusterbo.c 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/clusterbo.c 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,379 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <asm/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#define __u8 unsigned char
+#define u8 unsigned char
+#define u16 unsigned short int
+#define u32 unsigned int
+#define u64 unsigned long long
+#define atomic_t int
+#define spinlock_t unsigned long
+typedef unsigned short kdev_t;
+
+typedef struct list_head {
+ struct list_head *next, *prev;
+} list_t;
+
+
+//#include "dlm_compat.h"
+#include "dlmutil.h"
+#include "dlmcommon.h"
+#include "dlmnm.h"
+
+
+
+#define NIPQUAD(addr) \
+ ((unsigned char *)&addr)[0], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[3]
+
+
+#define CLUSTER_FILE "/proc/cluster/nm/.cluster"
+#define GROUP_FILE "/proc/cluster/nm/.group"
+#define NODE_FILE "/proc/cluster/nm/.node"
+
+#define CONF_FILE "/etc/cluster.conf"
+
+/* are these right ? */
+#define MIN_PORT_NUM 1024
+#define MAX_PORT_NUM 65535
+
+#define NET_IOC_MAGIC 'O'
+#define NET_IOC_ACTIVATE _IOR(NET_IOC_MAGIC, 1, net_ioc)
+#define NET_IOC_GETSTATE _IOR(NET_IOC_MAGIC, 2, net_ioc)
+
+
+typedef struct _net_ioc
+{
+ unsigned int status;
+} net_ioc;
+
+
+
+
+int activate_cluster(void);
+int add_node(nm_node_info *newnode);
+int set_cluster_name(char *cluster_name);
+int activate_net(void);
+int load_module(char *module, char *mountpoint, char *fstype);
+
+
+nm_node_info *nodes;
+int total_nodes = 0;
+
+int main(int argc, char **argv)
+{
+ int ret, i;
+ FILE *conf;
+ char *cluster_name = NULL;
+
+ ret = load_module("onm", "/proc/cluster/nm", "nm");
+ if (ret) {
+ fprintf(stderr, "failed to load and/or mount nm: %d\n", ret);
+ exit(1);
+ }
+ ret = load_module("ohb", "/proc/cluster/heartbeat", "hb");
+ if (ret) {
+ fprintf(stderr, "failed to load and/or mount hb: %d\n", ret);
+ exit(1);
+ }
+ ret = load_module("otcp", NULL, NULL);
+ if (ret) {
+ fprintf(stderr, "failed to load tcp: %d\n", ret);
+ exit(1);
+ }
+
+ nodes = malloc(NM_MAX_NODES * sizeof(nm_node_info));
+ if (!nodes) {
+ fprintf(stderr, "failed to malloc node array\n");
+ exit(1);
+ }
+ memset(nodes, 0, NM_MAX_NODES * sizeof(nm_node_info));
+
+ conf = fopen(CONF_FILE, "r");
+ if (!conf) {
+ fprintf(stderr, "failed to open %s: %s\n", CONF_FILE, strerror(errno));
+ exit(1);
+ }
+
+ ret = fscanf(conf, "cluster_name=%64as\n", &cluster_name);
+ if (ret != 1) {
+ fprintf(stderr, "bad file format: expected cluster_name=XXX\n");
+ exit(1);
+ }
+ printf("found cluster named %s\n", cluster_name);
+
+ while (1) {
+ int node_num, port;
+ char *node_name = NULL;
+ char *ip = NULL;
+ u32 real_ip;
+
+ ret = fscanf(conf, "%d,%64a[^,],%15a[0-9.],%d\n", &node_num, &node_name, &ip, &port);
+ if (ret == 0 || ret == -1) {
+ printf("done. found %d nodes\n", total_nodes);
+ break;
+ }
+ if (ret != 4) {
+ fprintf(stderr, "bad file format: node_num,node_name,ipaddr,ipport\n");
+ exit(1);
+ }
+
+ if (node_num < 0 || node_num >= NM_MAX_NODES) {
+ fprintf(stderr, "bad node number: got %d, range is 0 - %d\n", node_num, NM_MAX_NODES-1);
+ exit(1);
+ }
+ if (nodes[node_num].node_name[0]) {
+ fprintf(stderr, "already have a node in slot %d: orig=%s, this=%s\n",
+ node_num, nodes[node_num].node_name, node_name);
+ exit(1);
+ }
+ if (port < MIN_PORT_NUM || port > MAX_PORT_NUM) {
+ fprintf(stderr, "bad port number: got %d, range is %d - %d\n", port, MIN_PORT_NUM, MAX_PORT_NUM);
+ exit(1);
+ }
+ if (!inet_aton(ip, (struct in_addr*)&real_ip)) {
+ fprintf(stderr, "bad ipv4 address: %s\n", ip);
+ exit(1);
+ }
+
+
+ total_nodes++;
+ nodes[node_num].node_num = node_num;
+ memcpy(nodes[node_num].node_name, node_name, NM_MAX_NAME_LEN);
+ nodes[node_num].node_name[NM_MAX_NAME_LEN]=0;
+ nodes[node_num].ifaces[0].ip_port = htons(port);
+ nodes[node_num].ifaces[0].addr_u.ip_addr4 = real_ip;
+ free(ip);
+ free(node_name);
+ }
+ fclose(conf);
+
+ set_cluster_name(cluster_name);
+ i=0;
+ while (1) {
+ if (!total_nodes--)
+ break;
+ if (!nodes[i].node_name[0]) {
+ i++;
+ continue;
+ }
+ add_node(&nodes[i]);
+ i++;
+ }
+ printf("done. activating cluster now...\n");
+ activate_cluster();
+ printf("done. nm ready!\n");
+ activate_net();
+ printf("done. net ready!\n");
+ free(nodes);
+ return 0;
+}
+
+int set_cluster_name(char *cluster_name)
+{
+ int fd;
+ nm_op *op;
+ int ret;
+ char *buf;
+
+ buf = malloc(4096);
+ op = (nm_op *)buf;
+ memset(buf, 0, 4096);
+ op->magic = NM_OP_MAGIC;
+
+ printf("setting cluster name...\n");
+ fd = open(CLUSTER_FILE, O_RDWR);
+ if (fd == -1) {
+ printf("failed to open %s\n", CLUSTER_FILE);
+ exit(1);
+ }
+ op->opcode = NM_OP_NAME_CLUSTER;
+ strcpy(&op->arg_u.name[0], cluster_name);
+
+ ret = write(fd, op, sizeof(nm_op));
+ printf("write called returned %d\n", ret);
+ if (ret < 0) {
+ printf("error is: %s\n", strerror(errno));
+ exit(1);
+ }
+ memset(buf, 0, 4096);
+ ret = read(fd, buf, 4096);
+ printf("read returned %d\n", ret);
+ if (ret < 0)
+ exit(1);
+ printf("<<<<%*s>>>>\n", ret, buf);
+ close(fd);
+ free(buf);
+ return 0;
+
+}
+
+int add_node(nm_node_info *newnode)
+{
+ int fd;
+ nm_op *op;
+ int ret;
+ char *buf;
+ nm_node_info *node;
+
+ buf = malloc(4096);
+ op = (nm_op *)buf;
+ memset(buf, 0, 4096);
+ op->magic = NM_OP_MAGIC;
+
+
+ printf("adding cluster node....\n");
+ fd = open(CLUSTER_FILE, O_RDWR);
+ if (fd == -1) {
+ printf("failed to open %s\n", CLUSTER_FILE);
+ exit(1);
+ }
+ op->opcode = NM_OP_ADD_CLUSTER_NODE;
+ node = &(op->arg_u.node);
+ memcpy(node, newnode, sizeof(nm_node_info));
+ printf("passing port=%u, vers=%u, addr=%d.%d.%d.%d\n",
+ node->ifaces[0].ip_port,
+ node->ifaces[0].ip_version,
+ NIPQUAD(node->ifaces[0].addr_u.ip_addr4));
+
+ ret = write(fd, op, sizeof(nm_op));
+ printf("write called returned %d\n", ret);
+ if (ret < 0) {
+ printf("error is: %s\n", strerror(errno));
+ exit(1);
+ }
+ memset(buf, 0, 4096);
+ ret = read(fd, buf, 4096);
+ printf("read returned %d\n", ret);
+ if (ret < 0)
+ exit(1);
+ printf("<<<<%*s>>>>\n", ret, buf);
+ close(fd);
+
+ free(buf);
+ return 0;
+
+}
+
+int activate_cluster(void)
+{
+ int fd;
+ nm_op *op;
+ int ret;
+ char *buf;
+
+ buf = malloc(4096);
+ op = (nm_op *)buf;
+ memset(buf, 0, 4096);
+ op->magic = NM_OP_MAGIC;
+
+ printf("activating cluster....\n");
+ fd = open(CLUSTER_FILE, O_RDWR);
+ if (fd == -1) {
+ printf("failed to open %s\n", CLUSTER_FILE);
+ exit(1);
+ }
+ op->opcode = NM_OP_CREATE_CLUSTER;
+
+ ret = write(fd, op, sizeof(nm_op));
+ printf("write called returned %d\n", ret);
+ if (ret < 0) {
+ printf("error is: %s\n", strerror(errno));
+ exit(1);
+ }
+ memset(buf, 0, 4096);
+ ret = read(fd, buf, 4096);
+ printf("read returned %d\n", ret);
+ if (ret < 0)
+ exit(1);
+ printf("<<<<%*s>>>>\n", ret, buf);
+ close(fd);
+
+ free(buf);
+ return 0;
+
+}
+
+
+int activate_net(void)
+{
+ int fd;
+ net_ioc net;
+
+ memset(&net, 0, sizeof(net_ioc));
+ fd = open("/proc/cluster/net", O_RDONLY);
+ if (fd == -1) {
+ printf("eeek. failed to open\n");
+ exit(1);
+ }
+
+ if (ioctl(fd, NET_IOC_ACTIVATE, &net) == -1) {
+ printf("eeek. ioctl failed\n");
+ close(fd);
+ exit(1);
+ }
+ close(fd);
+ printf("ioctl returned: %u\n", net.status);
+ return 0;
+}
+
+
+int load_module(char *module, char *mountpoint, char *fstype)
+{
+ int ret;
+ int pid;
+ int status;
+ struct stat st;
+
+ pid = fork();
+ switch (pid) {
+ case 0:
+ ret = execl("/sbin/modprobe", "/sbin/modprobe", module, (char *)NULL);
+ fprintf(stderr, "eeek! exec returned %d: %s\n", ret, strerror(errno));
+ exit(1);
+ break;
+ case -1:
+ fprintf(stderr, "fork failed: %s\n", strerror(errno));
+ return -errno;
+
+ default:
+ ret = wait(&status);
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "modprobe returned %d!\n", WEXITSTATUS(status));
+ return -WEXITSTATUS(status);
+ }
+ } else {
+ fprintf(stderr, "modprobe has not exited!\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ if (!mountpoint)
+ return 0;
+
+ if (stat("/proc/cluster", &st) != 0 ||
+ stat(mountpoint, &st) != 0) {
+ fprintf(stderr, "mountpoint %s does not exist!\n", mountpoint);
+ return -EINVAL;
+ }
+
+ ret = mount("none", mountpoint, fstype, 0, "");
+ return ret;
+}
Added: branches/dlm-glue/dlmtools/compat_libfs.h
===================================================================
--- branches/dlm-glue/dlmtools/compat_libfs.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/compat_libfs.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,35 @@
+#ifndef COMPAT_LIBFS_H
+#define COMPAT_LIBFS_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define TA_GENERIC_SB_MEMBER(sb) ((sb)->s_fs_info)
+#else
+#define TA_GENERIC_SB_MEMBER(sb) ((sb)->u.generic_sbp)
+#endif
+
+
+/* an argresp is stored in an allocated page and holds the
+ * size of the argument or response, along with its content
+ */
+struct argresp {
+ ssize_t size;
+ char data[0];
+};
+
+typedef ssize_t (TA_write_op)(struct file *, char *, size_t);
+typedef struct _TA_write_ops
+{
+ int num_ops;
+ TA_write_op *write_op[0];
+} TA_write_ops;
+
+struct tree_descr
+{
+ char *name;
+ struct file_operations *ops;
+ int mode;
+};
+
+int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files);
+struct dentry * simple_find_child(struct dentry *dentry, struct qstr *name);
+#endif /* COMPAT_LIBFS_H */
Added: branches/dlm-glue/dlmtools/dlm_compat.h
===================================================================
--- branches/dlm-glue/dlmtools/dlm_compat.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlm_compat.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,120 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlm_compat.h
+ *
+ * Compatibility stuff for 2.4
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version
+ * 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLM_COMPAT_H
+#define DLM_COMPAT_H
+
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+# include <linux/locks.h>
+#else
+# include <linux/buffer_head.h>
+#endif
+
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+#ifdef __ia64__
+extern inline void prefetch(const void *x);
+extern inline void prefetchw(const void *x);
+#else
+static inline void prefetch(const void *x);
+static inline void prefetchw(const void *x);
+#endif
+extern inline int generic_fls(int x);
+extern inline int get_bitmask_order(unsigned int count);
+/* XXX Hack to avoid warning */
+struct mem_dqinfo;
+extern inline void mark_info_dirty(struct mem_dqinfo *info);
+
+
+
+
+#define flush_scheduled_work flush_scheduled_tasks
+#define work_struct tq_struct
+#define INIT_WORK(w, f, d) INIT_TQUEUE(w, f, d)
+#define schedule_work(w) schedule_task(w)
+
+#ifdef HAVE_NPTL
+static inline void dequeue_signal_lock(struct task_struct *task,
+ sigset_t *blocked, siginfo_t *info)
+{
+ spin_lock_irq(&task->sighand->siglock);
+ dequeue_signal(blocked, info);
+ spin_unlock_irq(&task->sighand->siglock);
+}
+#else
+static inline void dequeue_signal_lock(struct task_struct *task,
+ sigset_t *blocked, siginfo_t *info)
+{
+ spin_lock_irq(&task->sigmask_lock);
+ dequeue_signal(blocked, info);
+ spin_unlock_irq(&task->sigmask_lock);
+}
+#endif
+#define kstatfs statfs
+
+
+
+/*
+ * Copied right out of the 2.6.2 kernel's buffer_head.h:
+ * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
+ * and buffer_foo() functions.
+ */
+#define BUFFER_FNS(bit, name) \
+static inline void set_buffer_##name(struct buffer_head *bh) \
+{ \
+ set_bit(BH_##bit, &(bh)->b_state); \
+} \
+static inline void clear_buffer_##name(struct buffer_head *bh) \
+{ \
+ clear_bit(BH_##bit, &(bh)->b_state); \
+} \
+static inline int buffer_##name(struct buffer_head *bh) \
+{ \
+ return test_bit(BH_##bit, &(bh)->b_state); \
+}
+
+#undef buffer_uptodate
+#undef buffer_dirty
+BUFFER_FNS(Uptodate, uptodate)
+BUFFER_FNS(Dirty, dirty)
+
+#define clear_buffer_dirty mark_buffer_clean
+
+#endif /* LINUX_VERSION_CODE < 2.6 */
+
+
+#endif /* OCFS_COMPAT_H */
+
Added: branches/dlm-glue/dlmtools/dlm_warning_hack.h
===================================================================
--- branches/dlm-glue/dlmtools/dlm_warning_hack.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlm_warning_hack.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlm_warning_hack.h
+ *
+ * just to get rid of stupid warnings
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLM_WARNING_HACK_H
+#define DLM_WARNING_HACK_H
+
+struct mem_dqinfo;
+struct request;
+
+extern __inline__ int generic_fls(int x);
+extern __inline__ int get_bitmask_order(unsigned int count);
+extern inline void mark_info_dirty(struct mem_dqinfo *info);
+extern inline int rq_data_dir(struct request *rq);
+
+
+#endif /* DLM_WARNING_HACK_H */
Added: branches/dlm-glue/dlmtools/dlmcommon.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmcommon.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmcommon.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,53 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmcommon.h
+ *
+ * Common stuff
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMCOMMON_H
+#define DLMCOMMON_H
+
+#define DLM_ASSERT(x) ({ if (!(x)) { printk("assert failed! %s:%d\n", __FILE__, __LINE__); BUG(); } })
+
+typedef struct _nm_ctxt nm_ctxt;
+typedef struct _dlm_ctxt dlm_ctxt;
+typedef struct _heartbeat_ctxt heartbeat_ctxt;
+
+#define CLUSTER_DISK_UUID_LEN 32 // 16 byte binary == 32 char hex string
+
+typedef struct _cluster_disk
+{
+ // uuid of disk
+ char uuid[CLUSTER_DISK_UUID_LEN+1];
+ // all the rest are for heartbeat
+ kdev_t dev;
+ u32 blocksize_bits;
+ u32 num_blocks;
+ u64 start_block;
+ util_rarray slots;
+} cluster_disk;
+
+
+#endif /* DLMCOMMON_H */
Added: branches/dlm-glue/dlmtools/dlmhb.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmhb.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmhb.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,130 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmhb.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMHB_H
+#define DLMHB_H
+
+
+enum {
+ HB_NODE_STATE_INIT = 0,
+ HB_NODE_STATE_DOWN,
+ HB_NODE_STATE_UP
+};
+
+struct _heartbeat_ctxt
+{
+ int dummy;
+};
+
+typedef struct _hb_disk_slot
+{
+ struct inode *inode;
+ struct buffer_head *bh;
+ struct list_head list;
+ unsigned long last_time;
+ u16 margin;
+ u16 state;
+} hb_disk_slot;
+
+
+
+#define HB_THREAD_MS 2000 // every 2 seconds
+
+
+#define HB_OP_MAGIC 0xf00d
+enum {
+ HB_OP_START_DISK_HEARTBEAT=371,
+ HB_OP_GET_NODE_MAP
+};
+
+typedef struct _hb_op
+{
+ u16 magic;
+ u16 opcode;
+ unsigned int fd;
+ char disk_uuid[CLUSTER_DISK_UUID_LEN+1];
+ u16 group_num;
+ u32 bits;
+ u32 blocks;
+ u64 start;
+} hb_op;
+
+enum {
+ HB_TYPE_DISK = 0,
+ HB_TYPE_NET
+};
+
+
+/* callback stuff */
+
+enum {
+ HB_NODE_DOWN_CB = 0,
+ HB_NODE_UP_CB,
+ HB_NODE_RESPONDED_CB, // this one is very chatty
+ HB_NUM_CB
+};
+
+typedef void (hb_cb_func)(void *, void *, int, void *);
+
+typedef struct _hb_callback_func
+{
+ struct list_head list;
+ hb_cb_func *func;
+ void *data;
+ int priority;
+} hb_callback_func;
+
+
+enum {
+ HB_Root = 1,
+ HB_Disk,
+ HB_WriteOpArraySize
+};
+
+typedef struct _hb_disk_heartbeat_block
+{
+ u64 time;
+} hb_disk_heartbeat_block;
+
+
+// number of initial allowed misses
+#define HB_INITIAL_DISK_MARGIN 60
+#define HB_INITIAL_NET_MARGIN 60
+
+// number of allowed misses in steady state
+#define HB_DISK_MARGIN 30
+#define HB_NET_MARGIN 30
+
+
+int hb_unregister_callback(int type, void (*func)(void *, void *, int, void *), void *data);
+int hb_register_callback(int type, void (*func)(void *, void *, int, void *), void *data, int priority);
+int hb_fill_node_map(struct inode *group, void *map, int size);
+
+
+
+#endif /* DLMHB_H */
Added: branches/dlm-glue/dlmtools/dlmmod.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmmod.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmmod.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,418 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmmod.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMMOD_H
+#define DLMMOD_H
+
+#define DLM_HB_NODE_DOWN_PRI (0xf000000)
+#define DLM_HB_NODE_UP_PRI (0x8000000)
+
+#define DLM_LVB_LEN 64
+#define DLM_LOCKID_NAME_MAX 32
+
+#define DLM_DOMAIN_NAME_MAX_LEN 255
+#define DLM_LOCK_RES_OWNER_UNKNOWN NM_MAX_NODES
+#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
+#define DLM_THREAD_MS 200 // flush at least every 200 ms
+
+#define DLM_HASH_BITS 7
+#define DLM_HASH_SIZE (1 << DLM_HASH_BITS)
+#define DLM_HASH_MASK (DLM_HASH_SIZE - 1)
+
+typedef enum _dlm_ast_type {
+ DLM_AST = 0,
+ DLM_BAST,
+ DLM_ASTUNLOCK
+} dlm_ast_type;
+
+
+#define LKM_IVMODE (-1) /* invalid mode */
+#define LKM_NLMODE 0 /* null lock */
+#define LKM_CRMODE 1 /* concurrent read */ /* unsupported */
+#define LKM_CWMODE 2 /* concurrent write */ /* unsupported */
+#define LKM_PRMODE 3 /* protected read */
+#define LKM_PWMODE 4 /* protected write */ /* unsupported */
+#define LKM_EXMODE 5 /* exclusive */
+#define LKM_MAXMODE 5
+#define LKM_MODEMASK 0xff
+
+
+
+#define LKM_ORPHAN 0x10 /* this lock is orphanable */ /* unsupported */
+#define LKM_PARENTABLE 0x20 /* this lock was orphaned */ /* unsupported */
+#define LKM_BLOCK 0x40 /* blocking lock request */ /* unsupported */
+#define LKM_LOCAL 0x80 /* local lock request */ /* unsupported */
+#define LKM_VALBLK 0x100 /* lock value block request */
+#define LKM_NOQUEUE 0x200 /* non blocking request */ /* unsupported */
+#define LKM_CONVERT 0x400 /* conversion request */
+#define LKM_NODLCKWT 0x800 /* this lock wont deadlock */ /* unsupported */
+#define LKM_UNLOCK 0x1000 /* deallocate this lock */
+#define LKM_CANCEL 0x2000 /* cancel conversion request */
+#define LKM_DEQALL 0x4000 /* remove all locks held by proc */ /* unsupported */
+#define LKM_INVVALBLK 0x8000 /* invalidate lock value block */
+#define LKM_SYNCSTS 0x10000 /* return synchronous status if poss */ /* unsupported */
+#define LKM_TIMEOUT 0x20000 /* lock request contains timeout */ /* unsupported */
+#define LKM_SNGLDLCK 0x40000 /* request can self-deadlock */ /* unsupported */
+#define LKM_FINDLOCAL 0x80000 /* find local lock request */ /* unsupported */
+#define LKM_PROC_OWNED 0x100000 /* owned by process, not group */ /* unsupported */
+#define LKM_XID 0x200000 /* use transaction id for deadlock */ /* unsupported */
+#define LKM_XID_CONFLICT 0x400000 /* do not allow lock inheritance */ /* unsupported */
+#define LKM_FORCE 0x800000 /* force unlock flag */
+#define LKM_REVVALBLK 0x1000000 /* temporary solution: re-validate lock value block */ /* unsupported */
+
+typedef enum _dlm_status {
+ DLM_NORMAL, /* request in progress */
+ DLM_GRANTED, /* request granted */
+ DLM_DENIED, /* request denied */
+ DLM_DENIED_NOLOCKS, /* request denied, out of system resources */
+ DLM_WORKING, /* async request in progress */
+ DLM_BLOCKED, /* lock request blocked */
+ DLM_BLOCKED_ORPHAN, /* lock request blocked by a orphan lock*/
+ DLM_DENIED_GRACE_PERIOD, /* topological change in progress */
+ DLM_SYSERR, /* system error */
+ DLM_NOSUPPORT, /* unsupported */
+ DLM_CANCELGRANT, /* can't cancel convert: already granted */
+ DLM_IVLOCKID, /* bad lockid */
+ DLM_SYNC, /* synchronous request granted */
+ DLM_BADTYPE, /* bad resource type */
+ DLM_BADRESOURCE, /* bad resource handle */
+ DLM_MAXHANDLES, /* no more resource handles */
+ DLM_NOCLINFO, /* can't contact cluster manager */
+ DLM_NOLOCKMGR, /* can't contact lock manager */
+ DLM_NOPURGED, /* can't contact purge daemon */
+ DLM_BADARGS, /* bad api args */
+ DLM_VOID, /* no status */
+ DLM_NOTQUEUED, /* NOQUEUE was specified and request failed */
+ DLM_IVBUFLEN, /* invalid resource name length */
+ DLM_CVTUNGRANT, /* attempted to convert ungranted lock */
+ DLM_BADPARAM, /* invalid lock mode specified */
+ DLM_VALNOTVALID, /* value block has been invalidated */
+ DLM_REJECTED, /* request rejected, unrecognized client */
+ DLM_ABORT, /* blocked lock request cancelled */
+ DLM_CANCEL, /* conversion request cancelled */
+ DLM_IVRESHANDLE, /* invalid resource handle */
+ DLM_DEADLOCK, /* deadlock recovery refused this request */
+ DLM_DENIED_NOASTS, /* failed to allocate AST */
+ DLM_FORWARD, /* request must wait for primary's response */
+ DLM_TIMEOUT, /* timeout value for lock has expired */
+ DLM_IVGROUPID, /* invalid group specification */
+ DLM_VERS_CONFLICT, /* version conflicts prevent request handling */
+ DLM_BAD_DEVICE_PATH, /* Locks device does not exist or path wrong */
+ DLM_NO_DEVICE_PERMISSION, /* Client has insufficient pers for device */
+ DLM_NO_CONTROL_DEVICE, /* Cannot set options on opened device */
+ DLM_MAXSTATS, /* upper limit for return code validation */
+
+ DLM_RECOVERING /* our lame addition to allow caller to fail a lock
+ request if it is being recovered */
+} dlm_status;
+
+
+
+typedef struct _dlm_recovery_ctxt
+{
+ struct list_head resources;
+ struct list_head received; // list of dlm_reco_lock_infos received from other nodes during recovery
+ u16 new_master;
+ u16 dead_node;
+ u16 sending_node;
+ u32 next_seq;
+ util_thread_info thread;
+} dlm_recovery_ctxt;
+
+
+struct _dlm_ctxt
+{
+ struct list_head list;
+ struct list_head *resources;
+ struct list_head dirty_list;
+ spinlock_t spinlock;
+ struct rw_semaphore recovery_sem;
+ char *name;
+ char *net_buf;
+ util_thread_info thread;
+ struct inode *group;
+ u32 key;
+ u16 group_index;
+ u32 node_map[8];
+ u32 recovery_map[8];
+ dlm_recovery_ctxt reco;
+};
+
+#define DLM_LOCK_RES_UNINITED 0x00000001
+#define DLM_LOCK_RES_RECOVERING 0x00000002
+#define DLM_LOCK_RES_READY 0x00000004
+#define DLM_LOCK_RES_DIRTY 0x00000008
+#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
+
+typedef struct _dlm_lock_resource
+{
+ struct list_head list;
+ struct list_head granted;
+ struct list_head converting;
+ struct list_head blocked;
+ struct list_head dirty;
+ struct list_head recovering; // dlm_recovery_ctxt.resources list
+ spinlock_t spinlock;
+ wait_queue_head_t wq;
+ u16 owner; // node which owns the lock resource, or unknown
+ u16 state;
+ struct qstr lockname;
+ char lvb[DLM_LVB_LEN];
+} dlm_lock_resource;
+
+typedef void (dlm_astlockfunc_t)(void *);
+typedef void (dlm_bastlockfunc_t)(void *, int);
+typedef void (dlm_astunlockfunc_t)(void *, dlm_status);
+
+typedef struct _dlm_lock
+{
+ struct list_head list;
+ struct list_head ast_list;
+ dlm_lock_resource *lockres;
+ spinlock_t spinlock;
+
+ s8 type;
+ s8 convert_type;
+ s8 highest_blocked;
+ s8 reserved1;
+ u16 node;
+ u16 reserved2;
+
+ dlm_astlockfunc_t *ast; // ast and bast must be callable while holding a spinlock!
+ dlm_bastlockfunc_t *bast;
+ void *astdata;
+ u64 cookie;
+} dlm_lock;
+
+
+typedef struct _dlm_lockstatus {
+ dlm_status status;
+ dlm_lock *lockid;
+ char lvb[DLM_LVB_LEN];
+} dlm_lockstatus;
+
+enum {
+ DLM_MLE_BLOCK,
+ DLM_MLE_MASTER
+};
+
+typedef struct _dlm_lock_name
+{
+ u8 len;
+ u8 name[0]; // [DLM_LOCKID_NAME_MAX]
+} dlm_lock_name;
+
+/* good god this needs to be trimmed down */
+typedef struct _dlm_master_list_entry
+{
+ struct list_head list;
+ dlm_ctxt *dlm;
+ spinlock_t spinlock;
+ wait_queue_head_t wq;
+ atomic_t woken;
+ atomic_t refcnt;
+ u32 maybe_map[8];
+ u32 vote_map[8];
+ u32 response_map[8];
+ u32 node_map[8];
+ u16 master;
+ u8 error;
+ u8 type; // BLOCK or MASTER
+ union {
+ dlm_lock_resource *res;
+ dlm_lock_name name;
+ } u;
+} dlm_master_list_entry;
+
+void dlm_put_mle(dlm_master_list_entry *mle);
+static inline void dlm_get_mle(dlm_master_list_entry *mle)
+{
+ atomic_inc(&mle->refcnt);
+}
+
+
+#define DLM_MASTER_REQUEST_MSG 500
+#define DLM_MASTER_REQUEST_RESP_MSG 501
+#define DLM_ASSERT_MASTER_MSG 502
+#define DLM_CREATE_LOCK_MSG 503
+#define DLM_CONVERT_LOCK_MSG 504
+#define DLM_PROXY_AST_MSG 505
+
+
+enum {
+ DLM_MASTER_RESP_NO,
+ DLM_MASTER_RESP_YES,
+ DLM_MASTER_RESP_MAYBE,
+ DLM_MASTER_RESP_ERROR
+};
+
+typedef struct _dlm_master_request
+{
+ u16 node_idx;
+ u8 namelen;
+ u8 name[NM_MAX_NAME_LEN];
+} dlm_master_request;
+
+typedef struct _dlm_master_request_resp
+{
+ u16 node_idx;
+ u8 response;
+ u8 namelen;
+ u8 name[NM_MAX_NAME_LEN];
+} dlm_master_request_resp;
+
+typedef struct _dlm_assert_master
+{
+ u16 node_idx;
+ u8 namelen;
+ u8 name[NM_MAX_NAME_LEN];
+} dlm_assert_master;
+
+
+
+
+
+void dlm_shuffle_lists(dlm_ctxt *dlm, dlm_lock_resource *res);
+void dlm_thread_run_lock_resources(dlm_ctxt *dlm);
+int dlm_thread(void *data);
+int dlm_launch_thread(dlm_ctxt *dlm);
+void dlm_complete_thread(dlm_ctxt *dlm);
+
+dlm_status dlmlock(dlm_ctxt *dlm, int mode, dlm_lockstatus *lksb, int flags, char *name,
+ dlm_astlockfunc_t *ast, void *data, dlm_bastlockfunc_t *bast);
+
+
+dlm_status do_dlmlock(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lockstatus *lksb,
+ int options, int type, dlm_astlockfunc_t *ast,
+ dlm_bastlockfunc_t *bast, void *data);
+dlm_status dlmlock_local(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options);
+dlm_status dlmlock_remote(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options);
+
+dlm_status do_dlmconvert(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options, int type);
+dlm_status dlmconvert_local(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options, int type);
+dlm_status dlmconvert_remote(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int options, int type);
+
+dlm_status dlmunlock(dlm_ctxt *dlm, dlm_lockstatus *lksb, int flags, dlm_astunlockfunc_t *unlockast, void *data);
+
+dlm_ctxt * dlm_register_domain(char *domain, char *group_name, u32 key);
+void dlm_unregister_domain(dlm_ctxt *dlm);
+dlm_lock_resource *dlm_get_lock_resource(dlm_ctxt *dlm, struct qstr *lockname);
+int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
+int dlm_refresh_lock_resource(dlm_ctxt *dlm, dlm_lock_resource *res);
+int dlm_do_ast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock);
+int dlm_do_bast(dlm_ctxt *dlm, dlm_lock_resource *res, dlm_lock *lock, int blocked_type);
+u16 dlm_nm_this_node(dlm_ctxt *dlm);
+void dlm_kick_thread(dlm_ctxt *dlm, dlm_lock_resource *res);
+
+int dlm_nm_init(dlm_ctxt *dlm);
+int dlm_heartbeat_init(dlm_ctxt *dlm);
+
+dlm_lock_resource * dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname);
+dlm_ctxt * dlm_lookup_domain(char *domain);
+
+void dlm_hb_node_down_cb(void *ptr1, void *ptr2, int idx, void *data);
+void dlm_hb_node_up_cb(void *ptr1, void *ptr2, int idx, void *data);
+int dlm_hb_node_dead(dlm_ctxt *dlm, int node);
+int dlm_hb_node_up(dlm_ctxt *dlm, int node);
+int __dlm_hb_node_dead(dlm_ctxt *dlm, int node);
+int __dlm_hb_node_up(dlm_ctxt *dlm, int node);
+
+int dlm_lock_owner_broadcast(dlm_ctxt *dlm, dlm_lock_resource *res);
+dlm_lock_resource * dlm_get_lock_resource(dlm_ctxt *dlm, struct qstr *lockname);
+int dlm_master_request_handler(net_msg *msg, u32 len, void *data);
+int dlm_master_request_resp_handler(net_msg *msg, u32 len, void *data);
+int dlm_assert_master_handler(net_msg *msg, u32 len, void *data);
+int dlm_do_master_request(dlm_master_list_entry *mle, int to);
+int dlm_do_master_request_resp(dlm_ctxt *dlm, struct qstr *name, int response, int to);
+int dlm_do_assert_master(dlm_master_list_entry *mle);
+void dlm_mle_node_down(void *ptr1, void *ptr2, int idx, void *data);
+void dlm_mle_node_up(void *ptr1, void *ptr2, int idx, void *data);
+
+dlm_lock_resource * __dlm_lookup_lock(dlm_ctxt *dlm, struct qstr *lockname);
+void dlm_init_lockres(dlm_lock_resource *res, struct qstr *lockname);
+void dlm_wait_on_lockres(dlm_lock_resource *res);
+void dlm_dump_everything(void);
+void dlm_dump_dlm(dlm_ctxt *dlm);
+
+static inline int dlm_lock_compatible(int existing, int request)
+{
+ /* NO_LOCK compatible with all */
+ if (request == LKM_NLMODE ||
+ existing == LKM_NLMODE)
+ return 1;
+
+ /* EX incompatible with all non-NO_LOCK */
+ if (request == LKM_EXMODE)
+ return 0;
+
+ /* request must be PR, which is compatible with PR */
+ if (existing == LKM_PRMODE)
+ return 1;
+
+ return 0;
+}
+
+static inline int dlm_lock_on_list(struct list_head *head, dlm_lock *lock)
+{
+ struct list_head *iter;
+ dlm_lock *tmplock;
+
+ list_for_each(iter, head) {
+ tmplock = list_entry(iter, dlm_lock, list);
+ if (tmplock == lock)
+ return 1;
+ }
+ return 0;
+}
+
+static inline int dlm_mle_equal(dlm_ctxt *dlm, dlm_master_list_entry *mle, struct qstr *lockname)
+{
+ dlm_lock_resource *res;
+
+ if (dlm != mle->dlm)
+ return 0;
+
+ if (mle->type == DLM_MLE_BLOCK) {
+ if (lockname->len != mle->u.name.len ||
+ strncmp(lockname->name, mle->u.name.name, lockname->len)!=0)
+ return 0;
+ } else {
+ res = mle->u.res;
+ if (res->lockname.hash != lockname->hash ||
+ res->lockname.len != lockname->len ||
+ strncmp(res->lockname.name, lockname->name, lockname->len)!=0)
+ return 0;
+ }
+ return 1;
+}
+
+
+
+
+
+#endif /* DLMMOD_H */
Added: branches/dlm-glue/dlmtools/dlmnet.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmnet.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmnet.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,80 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmnet.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMNET_H
+#define DLMNET_H
+
+#include "dlmnm.h"
+
+#define NET_MSG_MAGIC 0xbc0ffa55
+
+typedef struct _net_msg
+{
+ __u32 magic;
+ __u32 data_len;
+ __u16 src_node;
+ __u16 dst_node;
+ __u32 msg_type;
+ __u32 key;
+ __u8 buf[0];
+} net_msg;
+
+typedef int (net_msg_handler_func)(net_msg *msg, u32 len, void *data);
+
+#define NET_MSG_HANDLER_MAX_NAMELEN 32
+typedef struct _net_msg_handler
+{
+ struct list_head list;
+ char name[32];
+ u32 msg_type;
+ u32 key;
+ net_msg_handler_func *func;
+ void *data;
+ u32 expected_len;
+} net_msg_handler;
+
+/* no clue for these yet... */
+#define NET_MIN_MSG_LEN (sizeof(net_msg))
+#define NET_MAX_MSG_LEN (1300) // something based on mtu i guess
+
+
+int net_send_error(struct socket *sock, void *buffer, const size_t buflen);
+int net_dispatch_message(struct inode *inode, struct socket *sock, net_msg *hdr, net_msg_handler *hnd);
+int net_dispatch(struct inode *inode, struct socket *sock, net_msg *msg);
+int net_receive(void);
+int net_send_tcp_msg (struct inode *inode, void *data, u32 packet_len);
+int net_recv_tcp_msg (void *data, u32 *packet_len);
+int net_process_message(net_msg *msg, u32 packet_len);
+net_msg * net_package_message(u32 msg_type, u32 key, void *data, u32 len, u16 from, u16 to);
+int net_register_handler(struct inode *inode, u32 msg_type, u32 key, int flags,
+ u32 max_len, net_msg_handler_func *func, void *data, void *buf);
+
+
+
+
+#endif /* DLMNET_H */
Added: branches/dlm-glue/dlmtools/dlmnm.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmnm.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmnm.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,253 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmnm.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMNM_H
+#define DLMNM_H
+
+
+
+struct _nm_ctxt
+{
+ int dummy;
+};
+
+#define NM_MAX_IFACES 2
+#define NM_MAX_NODES 255
+#define NM_INVALID_SLOT_NUM 255
+
+/* host name, group name, cluster name all 64 bytes */
+#define NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
+
+
+#define NM_GROUP_INODE_START 200000
+#define NM_NODE_INODE_START 100000
+
+enum {
+ NM_CLUSTER_DOWN=0,
+ NM_CLUSTER_UP
+};
+
+enum {
+ NM_GROUP_NOT_READY=0,
+ NM_GROUP_READY
+};
+
+enum {
+ NM_Root = 1,
+ NM_Cluster,
+ NM_Node,
+ NM_Group,
+};
+
+
+
+
+typedef struct _nm_network_iface
+{
+ u16 ip_port; /* for simplicity, just define exactly one port for this if */
+ u16 ip_version;
+ union {
+ u32 ip_addr4; /* IPv4 address in NBO */
+ u32 ip_addr6[4]; /* IPv6 address in NBO */
+ } addr_u;
+} nm_network_iface;
+
+typedef struct _nm_node_info
+{
+ u16 node_num;
+ char node_name[NM_MAX_NAME_LEN+1];
+ nm_network_iface ifaces[NM_MAX_IFACES];
+} nm_node_info;
+
+
+typedef struct _nm_cluster
+{
+ char name[NM_MAX_NAME_LEN+1];
+ int state;
+ spinlock_t bitmap_lock;
+ u32 group_bitmap[8];
+ u32 node_bitmap[8];
+} nm_cluster;
+
+
+typedef struct _nm_group_inode_private
+{
+ struct inode *inode;
+ struct list_head net_list;
+ struct list_head disk_list;
+ cluster_disk disk;
+ int state;
+ spinlock_t bitmap_lock;
+ u32 slot_bitmap[8];
+} nm_group_inode_private;
+
+#ifdef __KERNEL__
+/* TODO: move this */
+#define NET_FLAG_CREATING_SOCKET 0x00000001
+typedef struct _net_inode_private
+{
+ struct socket *sock;
+ wait_queue_t sleep;
+ spinlock_t sock_lock;
+ struct list_head handlers;
+ struct list_head list;
+ int flags;
+} net_inode_private;
+
+typedef struct _nm_node_inode_private
+{
+ struct inode *inode;
+ nm_node_info node;
+ struct list_head ip_hash;
+ net_inode_private net;
+} nm_node_inode_private;
+#endif
+
+/* transaction file nm_op stuff */
+
+#define NM_OP_MAGIC 0xbeaf
+enum {
+ NM_OP_CREATE_CLUSTER=123,
+ NM_OP_DESTROY_CLUSTER,
+ NM_OP_NAME_CLUSTER,
+ NM_OP_ADD_CLUSTER_NODE,
+ NM_OP_GET_CLUSTER_NUM_NODES,
+ NM_OP_GET_NODE_INFO,
+ NM_OP_CREATE_GROUP,
+ NM_OP_GET_GROUP_INFO,
+ NM_OP_ADD_GROUP_NODE,
+ NM_OP_GET_GLOBAL_NODE_NUM
+};
+
+typedef struct _nm_group_change
+{
+ u16 group_num;
+ u16 node_num;
+ u16 slot_num;
+ char disk_uuid[CLUSTER_DISK_UUID_LEN+1];
+ char name[NM_MAX_NAME_LEN+1];
+} nm_group_change;
+
+typedef struct _nm_op
+{
+ u16 magic;
+ u16 opcode;
+ union {
+ u16 index;
+ char name[NM_MAX_NAME_LEN+1];
+ nm_node_info node;
+ nm_group_change gc;
+ } arg_u;
+} nm_op;
+
+
+/* callback stuff */
+
+enum {
+ NM_NODE_ADD_CB = 0,
+ NM_NODE_DEL_CB,
+ NM_GROUP_ADD_CB,
+ NM_GROUP_DEL_CB,
+ NM_GROUP_NODE_ADD_CB,
+ NM_GROUP_NODE_DEL_CB,
+ NM_NUM_CB
+};
+
+typedef void (nm_cb_func)(void *, void *, u16);
+
+typedef struct _nm_callback_func
+{
+ struct list_head list;
+ nm_cb_func *func;
+ //void (*func)(void *, void *, u16);
+} nm_callback_func;
+
+
+
+
+u16 nm_this_node(struct inode *group);
+int nm_init(struct _dlm_ctxt *dlm);
+nm_cluster * nm_get_cluster(void);
+int nm_register_callback(int type, void (*func)(void *, void *, u16));
+int nm_unregister_callback(int type, void (*func)(void *, void *, u16));
+int nm_get_group_num_nodes(struct inode *group);
+int nm_get_group_max_slots(struct inode *group);
+int nm_make_group_ready(struct inode *group);
+void * nm_iterate_group_disk_slots(struct inode *group, int *idx);
+int nm_remove_node_from_group(struct inode *group, struct inode *node);
+int nm_create_group(char *buf, nm_op *data);
+int nm_add_node_to_group(char *buf, nm_op *data);
+
+#ifdef __KERNEL__
+
+
+struct inode * nm_get_group_by_num(u16 group_num);
+struct inode * nm_get_node_by_num(u16 node_num);
+struct inode * __nm_get_node_by_name(char *node_name, int dir);
+struct inode * nm_get_node_by_ip(u32 addr);
+struct inode * nm_get_group_node_by_index(struct inode *group, u16 index);
+
+static inline struct inode * nm_get_node_by_name(char *node_name)
+{
+ return __nm_get_node_by_name(node_name, 0);
+}
+static inline struct inode * nm_get_group_by_name(char *group_name)
+{
+ return __nm_get_node_by_name(group_name, 1);
+}
+
+
+static inline int nm_get_node_global_index(struct inode *node)
+{
+ return (node->i_ino - NM_NODE_INODE_START);
+}
+static inline int nm_get_group_global_index(struct inode *group)
+{
+ return (group->i_ino - NM_GROUP_INODE_START);
+}
+#endif
+
+static inline int nm_valid_ino(int ino)
+{
+#if 0
+ // these should never be referred to in kernel
+ if (ino >= NM_Cluster && ino <= NM_Group)
+ return 1;
+#endif
+ if (ino >= NM_NODE_INODE_START &&
+ ino < NM_NODE_INODE_START + NM_MAX_NODES)
+ return 1;
+ if (ino >= NM_GROUP_INODE_START &&
+ ino < NM_GROUP_INODE_START + NM_MAX_NODES)
+ return 1;
+ return 0;
+}
+
+
+
+#endif /* DLMNM_H */
Added: branches/dlm-glue/dlmtools/dlmtcp.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmtcp.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmtcp.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,239 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmtcp.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLMNET_H
+#define DLMNET_H
+#include <linux/socket.h>
+#ifdef __KERNEL__
+#include <net/sock.h>
+#else
+#include <sys/socket.h>
+#endif
+#include <linux/inet.h>
+#include <linux/in.h>
+
+#include "dlmnm.h"
+
+
+#ifdef __KERNEL__
+
+#define NET_DISP_THREAD_MS 5000 /* TODO */
+#define NET_RECV_THREAD_MS 5000 /* TODO */
+
+#ifdef BIG_NET_MSG
+#define NET_MSG_MAGIC ((u32)0xbc0ffa55)
+#define NET_MSG_STATUS_MAGIC ((u32)0xbc0ffa56)
+#define NET_MSG_NUM_MAX ((u64)0xffffffffffffffffULL)
+typedef struct _net_msg
+{
+ __u32 magic;
+ __u32 data_len;
+ __u16 src_node;
+ __u16 dst_node;
+ __u32 msg_type;
+ __u32 key;
+ __s32 status;
+ __u64 msg_num;
+ __u8 buf[0];
+} net_msg;
+#else
+
+#define NET_MSG_MAGIC ((u16)0xfa55)
+#define NET_MSG_STATUS_MAGIC ((u16)0xfa56)
+#define NET_MSG_NUM_MAX ((u32)0xffffffffUL)
+typedef struct _net_msg
+{
+ __u16 magic;
+ __u16 data_len;
+ __u16 msg_type;
+ __s16 status;
+ __u32 key;
+ __u32 msg_num;
+ __u8 buf[0];
+} net_msg;
+
+#endif
+
+typedef int (net_msg_handler_func)(net_msg *msg, u32 len, void *data);
+
+typedef struct _net_msg_handler
+{
+ struct list_head list;
+ u32 msg_type;
+ u32 key;
+ net_msg_handler_func *func;
+ void *data;
+ net_msg hdr;
+ u32 max_len;
+ void *buf;
+ spinlock_t lock;
+ atomic_t refcnt;
+ int flags;
+} net_msg_handler;
+
+typedef struct _net_status_ctxt
+{
+ struct list_head list;
+ s32 status;
+ u64 msg_num;
+ wait_queue_head_t wq;
+ atomic_t woken;
+} net_status_ctxt;
+
+void net_do_status_return(u64 msg_num, s32 status);
+
+/* no clue for these yet... */
+#define NET_MIN_MSG_LEN (0)
+#define NET_MAX_MSG_LEN (8192)
+
+
+#define NET_ALREADY_CONNECTED 2
+#define NET_UNKNOWN_HOST 3
+
+
+static inline int net_is_valid_error_type(u32 err_type)
+{
+ if (err_type == NET_ALREADY_CONNECTED ||
+ err_type == NET_UNKNOWN_HOST)
+ return 1;
+ return 0;
+}
+
+enum {
+ NET_HND_VAR_LEN = 0,
+ NET_HND_IN_USE,
+};
+
+#define net_handler_variable_len(h) ((h)->flags & (1 << NET_HND_VAR_LEN))
+#define net_handler_in_use(h) ((h)->flags & (1 << NET_HND_IN_USE))
+
+static inline int net_handler_msg_len_ok(net_msg_handler *handler, u32 len)
+{
+ return (net_handler_variable_len(handler) ?
+ len > handler->max_len : len != handler->max_len);
+}
+
+
+static inline int net_ip_version_to_family(u16 ip_version)
+{
+ printk("ip_version passed: %u, host byteorder: %u\n", ip_version, ntohs(ip_version));
+ return PF_INET;
+ switch (ntohs(ip_version)) {
+ case 4:
+ return PF_INET;
+ case 6:
+ return PF_INET6;
+ default:
+ BUG();
+ }
+
+ return 4;
+}
+
+
+
+/* TODO: figure this out.... */
+static inline int net_link_down(int err, struct socket *sock)
+{
+ if (sock) {
+ if (sock->sk->state != TCP_ESTABLISHED &&
+ sock->sk->state != TCP_CLOSE_WAIT)
+ return 1;
+ }
+
+ if (err >= 0)
+ return 0;
+ switch (err) {
+ /* ????????????????????????? */
+ case -ERESTARTSYS:
+ case -EBADF:
+ /* When the server has died, an ICMP port unreachable
+ * message prompts ECONNREFUSED. */
+ case -ECONNREFUSED:
+ case -ENOTCONN:
+ case -ECONNRESET:
+ case -EPIPE:
+ return 1;
+ }
+ return 0;
+}
+
+enum {
+ NET_DRIVER_UNINITED,
+ NET_DRIVER_READY,
+};
+
+
+int net_register_handler(u32 msg_type, u32 key, int flags,
+ u32 max_len, net_msg_handler_func *func, void *data, void *buf);
+net_msg * net_package_message(u32 msg_type, u32 key, void *data, u32 len);
+int net_recv_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 *packet_len);
+int net_send_tcp_msg (struct inode *inode, struct socket *sock, void *data, u32 packet_len);
+int net_send_error(struct socket *sock, u32 err_type);
+int net_init_tcp_sock(struct inode *inode);
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
+int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
+net_msg_handler * net_lookup_handler(u32 msg_type, u32 key);
+
+#endif /* __KERNEL__ */
+
+typedef struct _net_ioc
+{
+ u32 status;
+} net_ioc;
+
+typedef struct _gsd_ioc
+{
+ int fd;
+ int namelen;
+ char name[NM_MAX_NAME_LEN+1];
+ int status;
+} gsd_ioc;
+
+#define NET_IOC_MAGIC 'O'
+#define NET_IOC_ACTIVATE _IOR(NET_IOC_MAGIC, 1, net_ioc)
+#define NET_IOC_GETSTATE _IOR(NET_IOC_MAGIC, 2, net_ioc)
+#define GSD_IOC_CREATE_GROUP _IOR(NET_IOC_MAGIC, 3, gsd_ioc)
+#define GSD_IOC_ADD_GROUP_NODE _IOR(NET_IOC_MAGIC, 4, gsd_ioc)
+
+#define GSD_MESSAGE 130
+#define GSD_ACTION_ADD_GROUP (0x01)
+#define GSD_ACTION_ADD_GROUP_NODE (0x02)
+typedef struct _gsd_message
+{
+ u16 from;
+ u8 action;
+ u8 namelen;
+ u8 name[NM_MAX_NAME_LEN];
+} gsd_message;
+
+
+
+
+
+#endif /* DLMNET_H */
Added: branches/dlm-glue/dlmtools/dlmutil.h
===================================================================
--- branches/dlm-glue/dlmtools/dlmutil.h 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/dlmutil.h 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,110 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmutil.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef DLM_UTIL_H
+#define DLM_UTIL_H
+
+#ifdef __KERNEL__
+#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | \
+ sigmask(SIGINT) | sigmask(SIGQUIT))
+
+/* timeout structure taken from Ben's aio.c */
+typedef struct _util_timeout {
+ struct timer_list timer;
+ int timed_out;
+ wait_queue_head_t wait;
+} util_timeout;
+
+void util_clear_timeout(util_timeout *to);
+void util_daemonize(char *name, int len, int shutdown_sigs);
+void util_init_timeout(util_timeout *to);
+void util_set_timeout(util_timeout *to, __u32 timeout);
+void util_show_stack(unsigned long *esp);
+void util_show_trace(unsigned long *stack);
+int util_sleep(__u32 ms);
+int __util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int ms);
+void util_block_sigs(sigset_t *oldsigs, unsigned long mask);
+void util_unblock_sigs(sigset_t newsig);
+
+/* exits when var == val, or on timeout */
+static inline int util_wait_atomic_eq(wait_queue_head_t *wq, atomic_t *var, int val, int timeout)
+{
+ int ret = 0;
+ if (atomic_read(var) != val)
+ ret = __util_wait_atomic_eq(wq, var, val, timeout);
+ return ret;
+}
+
+#endif /* __KERNEL__ */
+
+/* resizable array */
+typedef struct _util_rarray
+{
+ void *page;
+ u16 elements;
+ u16 max_elem;
+ u16 elem_size;
+ u16 reserved1;
+} util_rarray;
+
+#define UTIL_RARRAY_PAGE_BUF_SIZE (PAGE_SIZE - offsetof(util_rarray_page, buf))
+#define UTIL_RARRAY_ELEM_PER_BUF(r) ((UTIL_RARRAY_PAGE_BUF_SIZE) / (r)->elem_size)
+typedef struct _util_rarray_page
+{
+ void *next;
+ char buf[0];
+} util_rarray_page;
+
+void util_init_rarray(util_rarray *arr, u16 elem_size);
+void * util_get_new_rarray_slot(util_rarray *arr, int *index);
+int util_add_to_rarray(util_rarray *arr, void *new);
+void * util_rarray_idx_to_slot(util_rarray *arr, int idx);
+int util_resize_rarray(util_rarray *arr, int newelem);
+
+#ifdef __KERNEL__
+typedef struct _util_thread_info
+{
+ wait_queue_head_t thread_wq;
+ atomic_t woken;
+ struct task_struct *task;
+ struct completion complete;
+ int pid;
+} util_thread_info;
+
+
+static inline void util_thread_info_init(util_thread_info *info)
+{
+ init_waitqueue_head(&info->thread_wq);
+ atomic_set(&info->woken, 0);
+ info->task = NULL;
+ info->pid = -1;
+ init_completion(&info->complete);
+}
+#endif /* __KERNEL__ */
+
+#endif /* DLM_UTIL_H */
Added: branches/dlm-glue/dlmtools/mount.ocfs2.c
===================================================================
--- branches/dlm-glue/dlmtools/mount.ocfs2.c 2004-11-18 22:00:07 UTC (rev 410)
+++ branches/dlm-glue/dlmtools/mount.ocfs2.c 2004-11-18 22:08:00 UTC (rev 411)
@@ -0,0 +1,816 @@
+/*
+ * listuuid.c
+ *
+ * Lists UUIDs of all the devices
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Sunil Mushran
+ */
+
+
+#define _LARGEFILE64_SOURCE
+#define _GNU_SOURCE /* Because libc really doesn't want us using O_DIRECT? */
+
+#include <sys/types.h>
+#include <asm/types.h>
+
+#define u8 __u8
+#define s8 __s8
+#define u16 __u16
+#define s16 __s16
+#define u32 __u32
+#define s32 __s32
+#define u64 __u64
+#define s64 __s64
+#define atomic_t int
+#define spinlock_t unsigned long
+typedef unsigned short kdev_t;
+
+
+
+
+#include <asm/page.h>
+#include <sys/mount.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fd.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <asm/bitops.h>
+
+#define OCFS2_FLAT_INCLUDES 1
+#include <ocfs2.h>
+#include <ocfs2_fs.h>
+#include <ocfs2_disk_dlm.h>
+#include <ocfs1_fs_compat.h>
+#include <kernel-list.h>
+
+
+#include "dlmutil.h"
+#include "dlmcommon.h"
+#include "dlmnm.h"
+#include "dlmhb.h"
+#include "dlmtcp.h"
+
+
+
+#define CLUSTER_FILE "/proc/cluster/nm/.cluster"
+#define GROUP_FILE "/proc/cluster/nm/.group"
+#define NODE_FILE "/proc/cluster/nm/.node"
+#define HEARTBEAT_DISK_FILE "/proc/cluster/heartbeat/.disk"
+
+
+int create_remote_group(char *group_name, __u16 node);
+int get_node_map(__u16 group_num, char *bitmap);
+int get_ocfs2_disk_hb_params(char *group_dev, __u32 *block_bits, __u32 *cluster_bits,
+ __u64 *start_block, __u32 *num_clusters);
+int activate_group(char *group_name, char *group_dev, __u16 group_num,
+ __u32 block_bits, __u64 num_blocks, __u64 start_block);
+int add_to_local_group(char *uuid, __u16 group_num, __u16 node_num);
+int create_group(char *uuid, __u16 *group_num);
+int get_my_nodenum(__u16 *nodenum);
+int add_me_to_group(char *groupname, char *groupdev);
+static errcode_t ocfs2_partition_list (struct list_head *dev_list);
+static void ocfs2_partition_list_destroy (struct list_head *dev_list);
+static errcode_t ocfs2_detect_all(struct list_head *dev_list);
+static int read_options(int argc, char **argv, char **hbuuid, char **hbdev, char **device, char **mp);
+
+char *op_buf = NULL;
+
+
+
+/*
+ * ocfs2_partition_list()
+ *
+ */
+static errcode_t ocfs2_partition_list (struct list_head *dev_list)
+{
+ errcode_t ret = 0;
+ FILE *proc;
+ char line[256];
+ char name[256];
+ ocfs2_devices *dev;
+
+ proc = fopen ("/proc/partitions", "r");
+ if (proc == NULL) {
+ ret = OCFS2_ET_IO;
+ goto bail;
+ }
+
+ while (fgets (line, sizeof(line), proc) != NULL) {
+ if (sscanf(line, "%*d %*d %*d %99[^ \t\n]", name) != 1)
+ continue;
+
+ ret = ocfs2_malloc0(sizeof(ocfs2_devices), &dev);
+ if (ret)
+ goto bail;
+
+ snprintf(dev->dev_name, sizeof(dev->dev_name), "/dev/%s", name);
+ list_add_tail(&(dev->list), dev_list);
+ }
+
+bail:
+ if (proc)
+ fclose(proc);
+
+ return ret;
+}
+
+
+/*
+ * ocfs2_partition_list_destroy()
+ *
+ */
+static void ocfs2_partition_list_destroy (struct list_head *dev_list)
+{
+ struct list_head *iter, *tmpiter;
+ ocfs2_devices *dev;
+
+ list_for_each_safe(iter, tmpiter, dev_list) {
+ dev = list_entry(iter, ocfs2_devices, list);
+ list_del(&dev->list);
+ free(dev);
+ }
+}
+
+
+/*
+ * ocfs2_detect_all()
+ *
+ */
+static errcode_t ocfs2_detect_all(struct list_head *dev_list)
+{
+ errcode_t ret = 0;
+ struct list_head *pos1;
+ ocfs2_devices *dev;
+ ocfs2_filesys *fs = NULL;
+ char *dev_name;
+
+ ret = ocfs2_partition_list(dev_list);
+ if (ret) {
+ com_err("mount.ocfs2", ret, "while reading /proc/partitions");
+ return ret;
+ }
+
+ list_for_each(pos1, dev_list) {
+ dev = list_entry(pos1, ocfs2_devices, list);
+ dev_name = dev->dev_name;
+
+ /* open fs */
+ fs = NULL;
+ ret = ocfs2_open(dev_name, OCFS2_FLAG_RO, 0, 0, &fs);
+ if (ret) {
+ dev->fs_type = 0;
+ ret = 0;
+ continue;
+ } else
+ dev->fs_type = 2;
+
+ /* get uuid for ocfs2 */
+ if (dev->fs_type == 2) {
+ memcpy(dev->uuid, OCFS2_RAW_SB(fs->fs_super)->s_uuid,
+ sizeof(dev->uuid));
+ }
+
+ /* close fs */
+ ocfs2_close(fs);
+ }
+ return ret;
+}
+
+
+/*
+ * read_options()
+ *
+ */
+static int read_options(int argc, char **argv, char **hbuuid, char **hbdev, char **device, char **mp)
+{
+ int ret = 0;
+ int c;
+ char *tmp, *tmp2;
+
+ if (argc < 2) {
+ ret = 1;
+ goto bail;
+ }
+
+ while(1) {
+ c = getopt(argc, argv, "o:");
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'o': /* options */
+ ret = 1;
+ tmp = optarg;
+ while (*tmp) {
+ if (strncasecmp(tmp, "hbuuid=", strlen("hbuuid="))==0) {
+ tmp += strlen("hbuuid=");
+ tmp2 = strchr(tmp, ',');
+
+ /* uuid should be exactly 32 hex chars */
+ if (tmp2) {
+ if (tmp2-tmp != 32)
+ goto bail;
+ } else {
+ if (strlen(tmp) != 32)
+ goto bail;
+ }
+ *hbuuid = strndup(tmp, 32);
+ } else if (strncasecmp(tmp, "hbdev=", strlen("hbdev="))==0) {
+ tmp += strlen("hbdev=");
+ tmp2 = strchr(tmp, ',');
+ if (!tmp2)
+ tmp2 = tmp + strlen(tmp);
+ if (tmp2-tmp <= 0)
+ goto bail;
+ *hbdev = strndup(tmp, tmp2-tmp);
+ } else {
+ /* invalid option */
+ goto bail;
+ }
+ tmp = tmp2;
+ if (!tmp || !*tmp)
+ break;
+ tmp++;
+ }
+ ret = 0;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (!ret) {
+ ret = 1;
+ /* need device and mountpoint */
+ if (optind+1 < argc && argv[optind] && argv[optind+1]) {
+ *device = strdup(argv[optind]);
+ *mp = strdup(argv[optind+1]);
+ ret = 0;
+ }
+ }
+
+bail:
+ return ret;
+}
+
+
+/*
+ * main()
+ *
+ */
+int main(int argc, char **argv)
+{
+ char *device = NULL, *mountpoint = NULL;
+ char *hbdev = NULL, *hbuuid = NULL;
+ errcode_t ret = 0;
+ struct list_head dev_list;
+ char uuid[40];
+ ocfs2_devices *dev;
+ struct list_head *iter;
+ char *p;
+ int i, found = 0;
+ unsigned long flags = 0;
+ char *args = NULL;
+
+ INIT_LIST_HEAD(&dev_list);
+
+ op_buf = malloc(PAGE_SIZE);
+ if (!op_buf) {
+ ret = 1;
+ goto bail;
+ }
+
+ ret = read_options (argc, argv, &hbuuid, &hbdev, &device, &mountpoint);
+ if (ret)
+ goto bail;
+
+ ret = 1;
+ if (!device)
+ goto bail;
+
+ /* provide nothing or one, not both */
+ if (hbuuid && hbdev)
+ goto bail;
+
+ ret = ocfs2_detect_all(&dev_list);
+ if (ret)
+ goto bail;
+
+ found = 0;
+ /* sort out which device is to be used for heartbeat */
+ if (hbuuid) {
+ list_for_each(iter, &dev_list) {
+ dev = list_entry(iter, ocfs2_devices, list);
+ if (dev->fs_type != 2)
+ continue;
+
+ memset(uuid, 0, sizeof(uuid));
+ for (i = 0, p = uuid; i < 16; i++, p += 2)
+ sprintf(p, "%02X", dev->uuid[i]);
+ if (strncasecmp(uuid, hbuuid, 32)==0) {
+ /* found it */
+ strncpy(hbuuid, uuid, 32); // normalized uuid
+ hbdev = strdup(dev->dev_name);
+ found = 1;
+ break;
+ }
+ }
+ } else {
+ char *look_for = (hbdev ? hbdev : device);
+
+ /* broken for now. have to give exact dev name in /proc/partitions */
+ list_for_each(iter, &dev_list) {
+ dev = list_entry(iter, ocfs2_devices, list);
+ if (dev->fs_type != 2)
+ continue;
+
+ if (strcasecmp(dev->dev_name, look_for)==0) {
+ /* found it */
+ if (!hbdev)
+ hbdev = strdup(dev->dev_name);
+ else
+ strcpy(hbdev, dev->dev_name);
+ hbuuid = malloc(33);
+ memset(hbuuid, 0, 33);
+ for (i = 0, p = hbuuid; i < 16; i++, p += 2)
+ sprintf(p, "%02X", dev->uuid[i]);
+ found = 1;
+ break;
+ }
+ }
+ }
+ if (!found) {
+ ret = 1;
+ goto bail;
+ }
+
+ /* all three should now have valid values */
+ printf("device=%s hbuuid=%s hbdev=%s\n", device, hbuuid, hbdev);
+
+ ret = add_me_to_group(hbuuid, hbdev);
+ if (ret < 0)
+ goto bail;
+
+ args = malloc(strlen(hbuuid) + strlen("group=") + 1);
+ if (!args)
+ goto bail;
+ sprintf(args, "group=%s", hbuuid);
+ flags = 0;
+ ret = mount(device, mountpoint, "ocfs2", flags, args);
+
+bail:
+ if (args)
+ free(args);
+ if (device)
+ free(device);
+ if (mountpoint)
+ free(mountpoint);
+ if (hbdev)
+ free(hbdev);
+ if (hbuuid)
+ free(hbuuid);
+
+ ocfs2_partition_list_destroy (&dev_list);
+
+ if (op_buf)
+ free(op_buf);
+
+ return ret;
+}
+
+
+
+/*
+ * this will try to add the group (and the node to the group)
+ * for every mount. luckily, there are many shortcut paths
+ * along the way, so checking for -EEXIST will save time.
+ */
+int add_me_to_group(char *groupname, char *groupdev)
+{
+ int ret;
+ __u16 my_nodenum, groupnum;
+ __u32 pre_nodemap[] = {0, 0, 0, 0, 0, 0, 0, 0};
+ __u32 post_nodemap[] = {0, 0, 0, 0, 0, 0, 0, 0};
+ int start, next, i;
+ __u32 block_bits, cluster_bits, num_clusters;
+ __u64 start_block, num_blocks;
+
+ /* either create the group or find that it already exists */
+ ret = get_my_nodenum(&my_nodenum);
+ if (ret < 0)
+ return ret;
+
+ ret = get_ocfs2_disk_hb_params(groupdev, &block_bits, &cluster_bits,
+ &start_block, &num_clusters);
+ if (ret < 0)
+ return ret;
+
+ num_blocks = num_clusters << cluster_bits;
+ num_blocks >>= block_bits;
+
+ ret = create_group(groupname, &groupnum);
+ if (ret != -EEXIST && ret != 0)
+ return ret;
+
+ ret = activate_group(groupname, groupdev, groupnum, block_bits, num_blocks, start_block);
+ if (ret < 0)
+ return ret;
+
+ ret = add_to_local_group(groupname, groupnum, my_nodenum);
+ if (ret != -EEXIST && ret != 0)
+ return ret;
+
+ /* at this point my node is heartbeating, so any other nodes
+ * joining right now must communicate with me */
+
+ /* TODO: how much to sleep? */
+ sleep(1);
+ ret = get_node_map(groupnum, (char *)pre_nodemap);
+ if (ret < 0)
+ return ret;
+
+again:
+ /* go create this group and add this node on every other node I see */
+ start = 0;
+ while (1) {
+ next = find_next_bit((unsigned long *)pre_nodemap, NM_MAX_NODES, start);
+ if (next >= NM_MAX_NODES) {
+ break;
+ }
+ if (next != my_nodenum) {
+ ret = create_remote_group(groupname, next);
+ if (ret != 0 && ret != -EEXIST) {
+ break;
+ }
+ }
+ start = next + 1;
+ }
+ if (ret != 0 && ret != -EEXIST)
+ return ret;
+
+ /* grab the nodemap again and look for changes */
+ ret = get_node_map(groupnum, (char *)post_nodemap);
+ if (ret < 0)
+ return ret;
+
+ if (memcmp(pre_nodemap, post_nodemap, sizeof(pre_nodemap)) == 0) {
+ /* nothing changed. we are DONE! */
+ return 0;
+ }
+
+ /* something changed */
+ for (i=0; i<8; i++) {
+ post_nodemap[i] &= ~pre_nodemap[i];
+ pre_nodemap[i] = post_nodemap[i];
+ post_nodemap[i] = 0;
+ }
+
+ /* keep going while there are still nodes to contact */
+ if (find_next_bit((unsigned long *)pre_nodemap, NM_MAX_NODES, 0) < NM_MAX_NODES)
+ goto again;
+
+ return 0;
+}
+
+int get_my_nodenum(__u16 *nodenum)
+{
+ FILE *file;
+ int ret = -EINVAL;
+ int retval=-EINVAL, num;
+ nm_op *op = (nm_op *)op_buf;
+
+ memset(op_buf, 0, PAGE_SIZE);
+ op->magic = NM_OP_MAGIC;
+ op->opcode = NM_OP_GET_GLOBAL_NODE_NUM;
+
+ *nodenum = 255;
+
+ file = fopen(CLUSTER_FILE, "r+");
+ if (!file)
+ return -errno;
+ if (fwrite((char *)op, sizeof(nm_op), 1, file) != 1)
+ goto done;
+ if (fscanf(file, "%d: %d", &retval, &num) != 2 ||
+ retval != 0 || num < 0 || num > 255) {
+ ret = -EINVAL;
+ goto done;
+ }
+ *nodenum = num;
+ ret = 0;
+done:
+ fclose(file);
+ return ret;
+}
+
+int create_group(char *uuid, __u16 *group_num)
+{
+ FILE *file;
+ int ret = -EINVAL, retval;
+ int groupnum = NM_INVALID_SLOT_NUM;
+ nm_op *op = (nm_op *)op_buf;
+ struct stat st;
+ char fname[100];
+
+ if (strlen(uuid) != CLUSTER_DISK_UUID_LEN)
+ return -EINVAL;
+
+ sprintf(fname, "/proc/cluster/nm/%s", uuid);
+ if (stat(fname, &st) == 0) {
+ *group_num = st.st_ino - NM_GROUP_INODE_START;
+ return -EEXIST;
+ }
+
+ *group_num = NM_INVALID_SLOT_NUM;
+
+ memset(op_buf, 0, PAGE_SIZE);
+ op->magic = NM_OP_MAGIC;
+ op->opcode = NM_OP_CREATE_GROUP;
+
+ op->arg_u.gc.group_num = NM_INVALID_SLOT_NUM;
+ strcpy(op->arg_u.gc.name, uuid);
+ strcpy(op->arg_u.gc.disk_uuid, uuid);
+
+ file = fopen(CLUSTER_FILE, "r+");
+ if (!file)
+ return -errno;
+
+ if (fwrite((char *)op, sizeof(nm_op), 1, file) != 1)
+ goto done;
+
+ if (fscanf(file, "%d: group %d", &retval, &groupnum) != 2) {
+ ret = -EINVAL;
+ goto done;
+ }
+ ret = retval;
+ if ((ret == 0 || ret == -EEXIST) &&
+ groupnum >= 0 && groupnum < NM_INVALID_SLOT_NUM)
+ *group_num = groupnum;
+
+done:
+ fclose(file);
+ return ret;
+}
+
+
+int add_to_local_group(char *uuid, __u16 group_num, __u16 node_num)
+{
+ FILE *file;
+ int ret = -EINVAL, retval;
+ nm_op *op = (nm_op *)op_buf;
+ char fname[100];
+ DIR *dir;
+ struct dirent *de;
+
+ if (strlen(uuid) != CLUSTER_DISK_UUID_LEN)
+ return -EINVAL;
+
+ sprintf(fname, "/proc/cluster/nm/%s", uuid);
+ dir = opendir(fname);
+ if (dir) {
+ while ((de = readdir(dir)) != NULL) {
+ if (de->d_ino - NM_NODE_INODE_START == node_num) {
+ closedir(dir);
+ return -EEXIST;
+ }
+ }
+ closedir(dir);
+ }
+
+ memset(op_buf, 0, PAGE_SIZE);
+ op->magic = NM_OP_MAGIC;
+ op->opcode = NM_OP_ADD_GROUP_NODE;
+ op->arg_u.gc.group_num = group_num;
+ op->arg_u.gc.node_num = node_num;
+ op->arg_u.gc.slot_num = node_num;
+
+ file = fopen(GROUP_FILE, "r+");
+ if (!file)
+ return -errno;
+
+ if (fwrite((char *)op, sizeof(nm_op), 1, file) != 1)
+ goto done;
+
+ if (fscanf(file, "%d: node", &retval) != 1) {
+ ret = -EINVAL;
+ goto done;
+ }
+ ret = retval;
+
+done:
+ fclose(file);
+ return ret;
+}
+
+int activate_group(char *group_name, char *group_dev, __u16 group_num,
+ __u32 block_bits, __u64 num_blocks, __u64 start_block)
+{
+ int dev_fd = -1;
+ int ret = -EINVAL, retval;
+ FILE *file;
+ hb_op *op;
+
+ printf("starting disk heartbeat...\n");
+
+ memset(op_buf, 0, PAGE_SIZE);
+ op = (hb_op *)op_buf;
+ op->magic = HB_OP_MAGIC;
+ op->opcode = HB_OP_START_DISK_HEARTBEAT;
+ op->group_num = group_num;
+ strcpy(op->disk_uuid, group_name);
+ op->bits = block_bits;
+ op->blocks = num_blocks;
+ op->start = start_block;
+
+ dev_fd = open(group_dev, O_RDWR);
+ if (dev_fd == -1)
+ return -errno;
+ op->fd = dev_fd;
+
+ file = fopen(HEARTBEAT_DISK_FILE, "r+");
+ if (!file)
+ return -errno;
+
+ if (fwrite((char *)op, sizeof(hb_op), 1, file) != 1)
+ goto done;
+
+ if (fscanf(file, "%d: ", &retval) != 1) {
+ ret = -EINVAL;
+ goto done;
+ }
+ ret = 0;
+done:
+ /* hb will keep its own ref */
+ if (dev_fd != -1)
+ close(dev_fd);
+
+ fclose(file);
+ return 0;
+}
+
+
+int get_ocfs2_disk_hb_params(char *group_dev, __u32 *block_bits, __u32 *cluster_bits,
+ __u64 *start_block, __u32 *num_clusters)
+{
+ int status = -EINVAL;
+ errcode_t ret = 0;
+ uint64_t blkno;
+ char *buf = NULL;
+ ocfs2_dinode *di;
+ ocfs2_extent_rec *rec;
+ ocfs2_filesys *fs = NULL;
+
+ ret = ocfs2_open(group_dev, OCFS2_FLAG_RO, 0, 0, &fs);
+ if (ret)
+ return status;
+
+ ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno,
+ ocfs2_system_inode_names[DLM_SYSTEM_INODE],
+ strlen(ocfs2_system_inode_names[DLM_SYSTEM_INODE]),
+ NULL, &blkno);
+ if (ret)
+ goto leave;
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ goto leave;
+
+ ret = ocfs2_read_inode(fs, blkno, buf);
+ if (ret)
+ goto leave;
+
+ di = (ocfs2_dinode *)buf;
+ if (di->id2.i_list.l_tree_depth ||
+ di->id2.i_list.l_next_free_rec != 1) {
+ goto leave;
+ }
+ rec = &(di->id2.i_list.l_recs[0]);
+
+ *block_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+ *cluster_bits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits;
+ *start_block = rec->e_blkno;
+ *num_clusters = rec->e_clusters;
+ status = 0;
+
+leave:
+ if (buf)
+ ocfs2_free(&buf);
+ if (fs)
+ ocfs2_close(fs);
+ return status;
+}
+
+int get_node_map(__u16 group_num, char *bitmap)
+{
+ FILE *file = NULL;
+ hb_op *op;
+ int ret = -EINVAL;
+ int retval;
+
+ printf("getting node map...\n");
+
+ memset(op_buf, 0, PAGE_SIZE);
+ op = (hb_op *)op_buf;
+ op->magic = HB_OP_MAGIC;
+ op->opcode = HB_OP_GET_NODE_MAP;
+ op->group_num = group_num;
+
+ file = fopen(HEARTBEAT_DISK_FILE, "r+");
+ if (!file)
+ return -errno;
+
+ if (fwrite((char *)op, sizeof(hb_op), 1, file) != 1)
+ goto done;
+
+ if (fscanf(file, "%d: ", &retval) != 1) {
+ ret = -EINVAL;
+ goto done;
+ }
+ if (retval != 0) {
+ ret = retval;
+ goto done;
+ }
+ if (fread(bitmap, 1, (NM_MAX_NODES+7)/8, file) < (NM_MAX_NODES+7)/8) {
+ ret = -EINVAL;
+ goto done;
+ }
+ ret = 0;
+done:
+ fclose(file);
+ return ret;
+}
+
+int create_remote_group(char *group_name, __u16 node)
+{
+ int ret, fd = -1, remote_node = -1;
+ gsd_ioc ioc;
+ char fname[100];
+
+ sprintf(fname, "/proc/cluster/nm/%s/%03u", group_name, node);
+
+ /* open a file descriptor to the node we want to talk to */
+ remote_node = open(fname, O_RDONLY);
+ if (remote_node == -1) {
+ ret = -errno;
+ goto leave;
+ }
+
+ /* TODO: move this over to a transaction file on the inode, eliminate the ioctl */
+ fd = open("/proc/cluster/net", O_RDONLY);
+ if (fd == -1) {
+ ret = -errno;
+ goto leave;
+ }
+
+ /* call an ioctl to create the group over there */
+ memset(&ioc, 0, sizeof(gsd_ioc));
+ ioc.fd = remote_node;
+ ioc.namelen = strlen(group_name);
+ memcpy(ioc.name, group_name, ioc.namelen);
+ if (ioctl(fd, GSD_IOC_CREATE_GROUP, &ioc) < 0) {
+ ret = -errno;
+ goto leave;
+ }
+ ret = ioc.status;
+
+ if (ret != 0 && ret != -EEXIST)
+ goto leave;
+
+ /* call an ioctl to add this node to the group over there */
+ memset(&ioc, 0, sizeof(gsd_ioc));
+ ioc.fd = remote_node;
+ ioc.namelen = strlen(group_name);
+ memcpy(ioc.name, group_name, ioc.namelen);
+ if (ioctl(fd, GSD_IOC_ADD_GROUP_NODE, &ioc) < 0) {
+ ret = -errno;
+ goto leave;
+ }
+ ret = ioc.status;
+
+leave:
+ if (fd != -1)
+ close(fd);
+ if (remote_node != -1)
+ close(remote_node);
+ return ret;
+}
More information about the Ocfs2-tools-commits
mailing list