[Ocfs2-commits] zab commits r1979 - in branches/usysfsify/fs/ocfs2:
. cluster dlm
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Tue Mar 15 17:35:34 CST 2005
Author: zab
Date: 2005-03-15 17:35:32 -0600 (Tue, 15 Mar 2005)
New Revision: 1979
Modified:
branches/usysfsify/fs/ocfs2/cluster/Makefile
branches/usysfsify/fs/ocfs2/cluster/clcommon.h
branches/usysfsify/fs/ocfs2/cluster/heartbeat.c
branches/usysfsify/fs/ocfs2/cluster/heartbeat.h
branches/usysfsify/fs/ocfs2/cluster/nodemanager.c
branches/usysfsify/fs/ocfs2/cluster/nodemanager.h
branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h
branches/usysfsify/fs/ocfs2/cluster/tcp.c
branches/usysfsify/fs/ocfs2/cluster/tcp.h
branches/usysfsify/fs/ocfs2/dlm/dlmmod.c
branches/usysfsify/fs/ocfs2/dlm/dlmmod.h
branches/usysfsify/fs/ocfs2/dlmglue.c
branches/usysfsify/fs/ocfs2/heartbeat.c
branches/usysfsify/fs/ocfs2/ocfs.h
branches/usysfsify/fs/ocfs2/super.c
branches/usysfsify/fs/ocfs2/vote.c
Log:
o this is the current state of the work-in-progress to use usysfs for
managing state in ocfs2{,/cluster}. This doesn't even build and there
is quite a bit left to transition. This is just so joel and I can keep
in sync-ish.
Modified: branches/usysfsify/fs/ocfs2/cluster/Makefile
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/Makefile 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/Makefile 2005-03-15 23:35:32 UTC (rev 1979)
@@ -16,6 +16,11 @@
EXTRA_CFLAGS += -DMISSING_SOCK_CREATE_LITE
endif
+# this is going to get exciting if usysfs is merged
+EXTRA_CFLAGS += -I$(OUR_TOPDIR)/fs/usysfs/
+# XXX should be folded into the kapi stuff
+EXTRA_CFLAGS += -I$(OUR_TOPDIR)/fs/usysfs/compatinclude
+
ifeq ($(KERNELRELEASE),)
ifeq ($(KERNEL_26),)
COMPAT_LIBFS := compat_libfs.o
@@ -24,15 +29,10 @@
INSTALL_MOD_DIR := fs/ocfs2
-obj-m := ocfs2_heartbeat.o ocfs2_nodemanager.o ocfs2_tcp.o
+obj-m := ocfs2_nodemanager.o
-ocfs2_nodemanager-objs := nodemanager.o util.o transaction_file.o $(COMPAT_LIBFS)
+ocfs2_nodemanager-objs := nodemanager.o heartbeat.o tcp.o $(COMPAT_LIBFS)
-ocfs2_heartbeat-objs := heartbeat.o util.o transaction_file.o $(COMPAT_LIBFS)
-
-ocfs2_tcp-objs := gsd.o tcp.o util.o $(COMPAT_LIBFS)
-
-
ifeq ($(KERNELRELEASE),)
#
# Called from a regular "make".
@@ -44,7 +44,6 @@
heartbeat.c \
nodemanager.c \
tcp.c \
- transaction_file.c \
util.c
HEADERS = \
@@ -58,7 +57,6 @@
ocfs2_nodemanager.h \
ocfs2_tcp.h \
tcp.h \
- transaction_file.h \
util.h
DIST_FILES = $(SOURCES) $(HEADERS)
Modified: branches/usysfsify/fs/ocfs2/cluster/clcommon.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/clcommon.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/clcommon.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -28,7 +28,6 @@
#ifndef CLUSTER_CLCOMMON_H
#define CLUSTER_CLCOMMON_H
-typedef struct _nm_ctxt nm_ctxt;
typedef struct _heartbeat_ctxt heartbeat_ctxt;
#define CLUSTER_DISK_UUID_LEN 32 // 16 byte binary == 32 char hex string
@@ -38,7 +37,6 @@
// uuid of disk
char uuid[CLUSTER_DISK_UUID_LEN+1];
// all the rest are for heartbeat
- dev_t dev;
u32 blocksize_bits;
u32 num_blocks;
u64 start_block;
Modified: branches/usysfsify/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/heartbeat.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/heartbeat.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -81,17 +81,14 @@
static void hb_teardown(void);
-static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u8 idx);
-static void hb_nm_group_node_del_cb(void *ptr1, void *ptr2, u8 idx);
static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u8 idx);
static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u8 idx);
static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits,
u32 blocks, u64 start);
static ssize_t write_disk(struct file *file, char *buf, size_t size);
-static void hb_do_callbacks(int type, void *ptr1, void *ptr2, int idx);
+static void hb_do_callbacks(int type, struct nm_group *group,
+ struct nm_node *node, int idx);
static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate);
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx);
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx);
static int hb_do_disk_heartbeat(void *page);
static int hb_thread(void *data);
static void hb_complete_thread(void);
@@ -119,8 +116,27 @@
#define hbprintk0(x)
#endif
+#define HB_THREAD_MS 2000 // every 2 seconds
+struct hb_disk_slot
+{
+ struct inode *inode;
+ struct buffer_head *bh;
+ struct list_head list;
+ unsigned long last_time;
+ u16 margin;
+ u16 state;
+};
+struct hb_region {
+ struct kobj hr_kobj;
+ u64 hr_block_bytes;
+ u64 hr_start_block;
+ u64 hr_blocks;
+ dev_t hr_dev;
+ struct hb_disk_slot *hr_slots;
+};
+
static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
if (uptodate)
@@ -132,24 +148,17 @@
unlock_buffer(bh);
}
-
-
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx)
+static int hb_do_node_down(struct nm_node *node, int idx)
{
- //int ret;
- hbprintk("hb_do_node_down: group=%lu, node=%lu\n", group->i_ino,
- node->i_ino);
- hbprintk("NOT removing node from group\n");
- //ret = nm_remove_node_from_group(group, node);
- hb_do_callbacks(HB_NODE_DOWN_CB, group, node, idx);
+ hbprintk("hb_do_node_down: node=%u\n", node->nd_num);
+ hb_do_callbacks(HB_NODE_DOWN_CB, node, idx);
return 0;
}
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx)
+static int hb_do_node_up(struct nm_node *node, int idx)
{
- hbprintk("hb_do_node_up: group=%lu, node=%lu\n", group->i_ino,
- node->i_ino);
- hb_do_callbacks(HB_NODE_UP_CB, group, node, idx);
+ hbprintk("hb_do_node_up: node=%u\n", node->nd_num);
+ hb_do_callbacks(HB_NODE_UP_CB, node, idx);
return 0;
}
@@ -227,10 +236,23 @@
return bh;
}
+/*
+ * heartbeats/block_regions/foo/
+ * block_bytes
+ * start_block
+ * num_blocks
+ * active_nodes/
+ *
+ * for now require that we have the local node defined
+ *
+ * heartbeat bits need to pin other node bits, I guess
+ */
+
static int hb_do_disk_heartbeat(void *page)
{
nm_group_inode_private *priv;
- struct inode *group, *node;
+ struct nm_group *group;
+ struct nm_node *node;
struct list_head *iter;
struct buffer_head *bh;
hb_disk_slot *slot;
@@ -462,6 +484,7 @@
}
+#if 0
static ssize_t write_disk(struct file *file, char *buf, size_t size)
{
hb_op *data;
@@ -553,67 +576,6 @@
return ret;
}
-
-extern struct file_operations transaction_ops;
-
-/*----------------------------------------------------------------------------*/
-/*
- * populating the filesystem.
- */
-static int hb_fill_super(struct super_block * sb, void * data, int silent)
-{
- int ret;
- struct TA_write_ops *ops;
- static struct tree_descr hb_files[] = {
- [HB_Disk] = {".disk", &transaction_ops, S_IWUSR},
- /* last one */ {""}
- };
-
- ops = kmalloc(sizeof(struct TA_write_ops) +
- (sizeof(ops->write_op[0])),
- GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- ops->num_ops = HB_WriteOpArraySize;
- ops->write_op[HB_Disk] = write_disk;
-
- hbprintk("calling simple_fill_super...\n");
- ret = simple_fill_super(sb, 0x5551212f, hb_files);
- if (ret >= 0)
- TA_GENERIC_SB_MEMBER(sb) = ops;
- else
- kfree(ops);
- return ret;
-}
-
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *hb_get_sb(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
-{
- return get_sb_single(fs_type, flags, data, hb_fill_super);
-}
-
-static struct file_system_type hb_fs_type = {
- .owner = THIS_MODULE,
- .name = "hb",
- .get_sb = hb_get_sb,
- .kill_sb = kill_anon_super,
-};
-#else
-static struct super_block *hb_read_super(struct super_block *sb,
- void *data,
- int silent)
-{
- hbprintk("welcome to hb_read_super!!!\n");
- return (hb_fill_super(sb, data, silent) < 0) ? NULL : sb;
-}
-static DECLARE_FSTYPE (hb_fs_type, "hb", hb_read_super, FS_SINGLE|FS_LITTER);
-#endif
-
/* TODO: make callbacks all return int */
static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u8 idx)
{
@@ -715,99 +677,55 @@
hbprintk("hb_nm_group_node_del_cb done: %d\n", ret);
return;
}
+#endif
-static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u8 idx)
+#if 0
+static void hb_teardown(void)
{
- //struct inode *node = ptr1;
+ /* XXX are there thread/cb dependencies? */
+ hb_complete_thread();
+ nm_unregister_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb);
+ nm_unregister_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb);
+ nm_unregister_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
+ nm_unregister_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
}
-static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u8 idx)
-{
- struct inode *group = ptr1;
- nm_group_inode_private *priv;
-
- hbprintk("hb_nm_group_add_cb: group=%lu, idx=%u\n",
- group->i_ino, idx);
-
- priv = group->u.generic_ip;
- if (!priv) {
- hbprintk("eek! bad group inode!\n");
- return;
- }
-
- spin_lock(&hb_lock);
- list_add_tail(&priv->net_list, &hb_net_groups);
- if (priv->disk.uuid[0]) {
- hbprintk("adding priv=%p inode=%p to disk group list\n",
- priv, group);
- list_add_tail(&priv->disk_list, &hb_disk_groups);
- }
- spin_unlock(&hb_lock);
-}
-
static int __init init_hb(void)
{
- int retval=-1, i;
- hbprintk("loading heartbeat module: nodename is %s\n", nm_nodename);
+ int ret, i;
- if (proc_mkdir("cluster/heartbeat", 0)) {
- // ???
- }
-
- //hb_net_timestamps = __get_free_page(GFP_KERNEL);
- //if (!hb_net_timestamps)
- // goto done;
-
- for (i=HB_NODE_DOWN_CB; i<HB_NUM_CB; i++) {
+ for (i = HB_NODE_DOWN_CB; i < HB_NUM_CB; i++) {
INIT_LIST_HEAD(&hb_callbacks[i].list);
init_MUTEX(&hb_callbacks[i].sem);
}
- if (nm_register_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb))
+ ret = nm_register_callback(NM_GROUP_NODE_DEL_CB,
+ hb_nm_group_node_del_cb);
+ if (ret)
goto done;
- if (nm_register_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb))
+ ret = nm_register_callback(NM_GROUP_NODE_ADD_CB,
+ hb_nm_group_node_add_cb);
+ if (ret)
goto done;
- if (nm_register_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb))
+ ret = nm_register_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
+ if (ret)
goto done;
- if (nm_register_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb))
+ ret = nm_register_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
+ if (ret)
goto done;
- if (hb_launch_thread() < 0)
+ ret = hb_launch_thread();
+ if (ret)
goto done;
- retval = register_filesystem(&hb_fs_type);
done:
- if (retval)
+ if (ret)
hb_teardown();
- return retval;
+ return ret;
}
+#endif
-static void __exit exit_hb(void)
-{
- hb_complete_thread();
- hb_teardown();
- unregister_filesystem(&hb_fs_type);
- hbprintk("unloading heartbeat module\n");
-}
-
-static void hb_teardown(void)
-{
- nm_unregister_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb);
- nm_unregister_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb);
- nm_unregister_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
- nm_unregister_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
- remove_proc_entry("cluster/heartbeat", NULL);
-
- /* XXX make sure that we're not being called from any more active
- * nm callbacks, then teardown hb_callbacks */
- //if (hb_net_timestamps)
- // kfree(hb_net_timestamps);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_hb)
-module_exit(exit_hb)
-
+#if 0
/*
* hb_fill_node_bytemap()
* 255 bytes... each byte set to 0 (not mounted) or 1 (mounted)
@@ -887,7 +805,226 @@
return 0;
}
EXPORT_SYMBOL(hb_fill_node_map);
+#endif
+/*
+ * heartbeat usysfs bits. The heartbeat set is a default set under
+ * the cluster set in nodemanager.c.
+ */
+
+static struct hb_region *to_hb_region(struct kobject *kobj)
+{
+ return kobj ? container_of(kobj, struct hb_region, hr_kobj) : NULL;
+}
+
+static void hb_region_release(struct kobject *kobj)
+{
+ struct hr_region *reg = to_hr_region(kobj);
+ printk("releasing reg %p\n", reg);
+}
+
+static ssize_t hb_region_block_bytes_read(struct hb_region *reg, char *page)
+{
+ return sprintf(page, "%d\n", node->nd_block_bytes);
+}
+
+static ssize_t hb_region_block_bytes_write(struct hb_region *reg,
+ const char *page,
+ size_t count)
+{
+ struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+ unsigned long long tmp;
+ char *p = (char *)page;
+
+ tmp = simple_strtoull(p, &p, 0);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ /* XXX compare blocks against dev later on commit? */
+ reg->hr_block_bytes = tmp;
+
+ return count;
+}
+static ssize_t hb_region_start_block_read(struct hb_region *reg, char *page)
+{
+ return sprintf(page, "%d\n", node->nd_start_block);
+}
+
+static ssize_t hb_region_start_block_write(struct hb_region *reg,
+ const char *page,
+ size_t count)
+{
+ struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+ unsigned long long tmp;
+ char *p = (char *)page;
+
+ tmp = simple_strtoull(p, &p, 0);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ /* XXX compare blocks against dev later on commit? */
+ reg->hr_start_block = tmp;
+
+ return count;
+}
+static ssize_t hb_region_blocks_read(struct hb_region *reg, char *page)
+{
+ return sprintf(page, "%d\n", node->nd_blocks);
+}
+
+static ssize_t hb_region_blocks_write(struct hb_region *reg,
+ const char *page,
+ size_t count)
+{
+ struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+ unsigned long long tmp;
+ char *p = (char *)page;
+
+ tmp = simple_strtoull(p, &p, 0);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ /* XXX compare blocks against dev later on commit? */
+ reg->hr_blocks = tmp;
+
+ return count;
+}
+struct hb_region_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct hb_region *, char *);
+ ssize_t (*store)(struct hb_region *, const char *, size_t);
+};
+
+static struct hb_region_attribute hb_region_attr_block_bytes = {
+ .attr = { .name = "block_bytes", .mode = S_IRUGO | S_IWUSR },
+ .show = hb_region_block_bytes_read,
+ .store = hb_region_block_bytes_write,
+};
+static struct hb_region_attribute hb_region_attr_start_block = {
+ .attr = { .name = "start_block", .mode = S_IRUGO | S_IWUSR },
+ .show = hb_region_start_block_read,
+ .store = hb_region_start_block_write,
+};
+static struct hb_region_attribute hb_region_attr_blocks = {
+ .attr = { .name = "blocks", .mode = S_IRUGO | S_IWUSR },
+ .show = hb_region_blocks_read,
+ .store = hb_region_blocks_write,
+};
+
+static struct attribute *hb_region_default_attrs[] = {
+ &hb_region_block_bytes.attr,
+ &hb_region_start_block.attr,
+ &hb_region_blocks.attr,
+// &hb_region_dev.attr, /* link to a real device? hmm. */
+ NULL,
+};
+
+static ssize_t hb_region_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *page)
+{
+ struct hb_region *reg = to_hb_region(kobj);
+ struct hb_region_attribute *nm_nodde_attr =
+ container_of(attr, struct hb_region_attribute, attr);
+ ssize_t ret = 0;
+
+ if (hb_region_attr->show)
+ ret = hb_region_attr->show(node, page);
+ return ret;
+}
+
+static ssize_t hb_region_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *page, size_t count)
+{
+ struct hb_region *reg = to_hb_region(kobj);
+ struct hb_region_attribute *hb_region_attr =
+ container_of(attr, struct hb_region_attribute, attr);
+ ssize_t ret = -EINVAL;
+
+ if (hb_region_attr->store)
+ ret = hb_region_attr->store(node, page, count);
+ return ret;
+}
+
+struct sysfs_ops hb_region_sysfs_ops = {
+ .show = &hb_region_show,
+ .store = &hb_region_store,
+};
+
+static struct ukobj_type hb_region_type = {
+ .ktype = {
+ .release = hb_region_release,
+ .sysfs_ops = &hb_region_sysfs_ops,
+ .default_attrs = hb_region_default_addrs,
+ },
+ .owner = THIS_MODULE,
+};
+
+/* heartbeat set */
+
+};
+
+static struct hb_heartbeat_set *to_hb_heartbeat_set(struct kset *kset)
+{
+ return kset ? container_of(kset, struct hb_heartbeat_set, hs_kset) : NULL;
+}
+
+static struct kobject *hb_heartbeat_set_make_object(struct kset *kset,
+ const char *name)
+{
+ struct hb_region *reg = NULL;
+ struct hb_cluster *cluster = to_hb_cluster(kset->kobj.parent);
+ struct kobj *ret = NULL;
+
+ printk("trying to make a heartbeat object under cluster %p\n", cluster);
+
+ reg = kcalloc(1, sizeof(struct hb_region), GFP_KERNEL);
+ if (reg == NULL)
+ goto out; /* ENOMEM */
+
+ strcpy(reg->hr_kobj.name, name);
+ reg->kobj.k_name = reg->kobj.name;
+ reg->kobj.ktype = &hb_region_type.ktype;
+ kobject_init(®->hr_kobj);
+ ret = ®->hr_kobj;
+
+out:
+ if (ret == NULL)
+ kfree(reg);
+
+ return ret;
+}
+
+static void hb_heartbeat_set_drop_object(struct kset *kset, struct kobject *kobj)
+{
+ struct hb_heartbeat_set *hs = to_hb_heartbeat_set(kset);
+
+ printk("dropping hb_heartbeat_set %p\n", hs);
+ kobject_put(kobj);
+}
+
+static struct ukobj_type hb_heartbeat_set_type = {
+ .ktype = {
+ .sysfs_ops = NULL, /* no attributes */
+ },
+ .make_object = hb_heartbeat_set_make_object,
+ .drop_object = hb_heartbeat_set_drop_object,
+ .owner = THIS_MODULE,
+};
+
+struct hb_heartbeat_set hb_heartbeat_set = {
+ .hs_kset = {
+ .kobj = {
+ .name = "heartbeat",
+ .ktype = &hb_heartbeat_set_type.ktype,
+ },
+ },
+};
+
+
+/* hb callback registration and issueing */
+
static struct hb_callback *hbcall_from_type(int type)
{
if (type < HB_NODE_DOWN_CB || type >= HB_NUM_CB)
Modified: branches/usysfsify/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/heartbeat.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/heartbeat.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -36,32 +36,6 @@
HB_NODE_STATE_UP
};
-struct _heartbeat_ctxt
-{
- int dummy;
-};
-
-typedef struct _hb_disk_slot
-{
- struct inode *inode;
- struct buffer_head *bh;
- struct list_head list;
- unsigned long last_time;
- u16 margin;
- u16 state;
-} hb_disk_slot;
-
-
-
-#define HB_THREAD_MS 2000 // every 2 seconds
-
-
-enum {
- HB_TYPE_DISK = 0,
- HB_TYPE_NET
-};
-
-
/* callback stuff */
enum {
@@ -71,7 +45,8 @@
HB_NUM_CB
};
-typedef void (hb_cb_func)(struct inode *, struct inode *, int, void *);
+struct nm_node;
+typedef void (hb_cb_func)(struct nm_node *, int, void *);
struct hb_callback_func {
struct list_head hc_item;
@@ -96,6 +71,11 @@
#define HB_DISK_MARGIN 30
#define HB_NET_MARGIN 30
+struct hb_heartbeat_set {
+ struct kset hs_kset;
+ /* some stuff? */
+};
+extern struct hb_heartbeat_set hb_heartbeat_set;
void hb_setup_callback(struct hb_callback_func *hc, int type, hb_cb_func *func,
void *data, int priority);
Modified: branches/usysfsify/fs/ocfs2/cluster/nodemanager.c
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/nodemanager.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/nodemanager.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -1,12 +1,8 @@
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
- * nodemanager.c
+ * Copyright (C) 2004, 2005 Oracle. All rights reserved.
*
- * totally lame static node management placeholder
- *
- * Copyright (C) 2004 Oracle. All rights reserved.
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
@@ -21,11 +17,8 @@
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
*/
-
#include "cl_compat.h"
#include "util.h"
#include "clcommon.h"
@@ -64,6 +57,7 @@
#include <linux/pagemap.h>
#include <linux/hash.h>
#include <linux/bitops.h>
+#include <linux/kobject.h>
#include <asm/uaccess.h>
@@ -73,116 +67,21 @@
#include "compat_libfs.h"
#include "transaction_file.h"
+#include "usysfs.h"
#ifndef __user
#define __user
#endif
-
-/*
- * This nm module is similar to nfsd/nfsctl.c in that it uses
- * transaction files (in /proc/cluster/nm) to communicate with
- * the kernel module instead of ioctls or other means.
- *
- * Files involved:
- * /proc/cluster/nm/cluster - used to create/destroy cluster, adds
- * nodes/groups to the cluster, queries info
- * about the cluster
- * /proc/cluster/nm/group - adds/removes nodes from a group, queries
- * info about a group
- * /proc/cluster/nm/node - changes info for a node, queries info about
- * a node
- *
- * This nm implementation basically allows this node to live in exactly one
- * cluster. All "clustered" nodes that are known to this node should be
- * added to the cluster, and all nodes should see the same list of nodes in
- * the same order at all times. The "slot" number given to a node in this
- * global cluster list is fixed and never changes. Groups can be dynamically
- * created within a cluster (TODO: currently static only) and be made up of
- * one or more nodes (listed at most once) in the global list. A node may exist
- * in many groups. Also, a group may have an optional disk UUID which is simply
- * stored for later use by the heartbeat service. (The heartbeat service will
- * do disk heartbeating only for those groups with valid UUIDs.)
- *
- * USAGE:
- * For our purposes, the nm service can be autoloaded by an fstab entry or
- * manually through mount (mount -t nm none /proc/cluster/nm). Once that is
- * done, an init script (or single executable on an initrd) should be run to
- * create the static cluster info, possibly from a file like /etc/nm.conf or
- * similar. We should probably create a "dlm" or "everyone" group (with NO disk
- * heartbeating) so that the dlm service can be used with the network only.
- * This group should contain all known nodes. After this is done, the net, hb
- * and dlm modules can come up. The nm service is now ready for use, since
- * groups don't need to be created till later.
- *
- * A group services daemon can be written (by someone!? ;-) to run at this
- * point. Since the "dlm" group has everything it needs for full dlmming (since
- * it uses only network), the dlm itself can be used to arbitrate for group
- * creation, and additions/deletions from groups. Callbacks should be
- * registered with nm by other services that care on each of these events. For
- * instance, heartbeat should register a callback with nm for group creation,
- * and addition and deletion from a group so that it can make any necessary
- * changes to its heartbeating (primarily so that it can begin/end disk
- * heartbeat for any group/node that needs it).
- *
- * NOTE NOTE NOTE !!!!:
- * This is intended to be a quickie implementation. (translation: lame) I do
- * not want to step on anyone's toes who may have implemented something wayyy
- * better. If something out there "wins", we will plug into that instead. If
- * nothing really takes off, we at least have a (lame) reference to work off of.
- * However, since this implementation exists solely to make ocfs2 work, and one
- * of the major advantages of ocfs version 1 was ease of setup, we don't want
- * to move to something substantially more complicated than this (one conf
- * file).
- *
- */
-
-
-
-/* globals */
-nm_cluster cluster;
-struct super_block *single_sb;
-char *nm_nodename;
-EXPORT_SYMBOL(nm_nodename);
-static spinlock_t nm_lock = SPIN_LOCK_UNLOCKED;
static DECLARE_MUTEX(nm_cb_sem);
struct list_head nm_callbacks[NM_NUM_CB];
-
-static void nm_teardown(void);
static int nm_create_cluster(char *buf);
-static void nm_init_cluster(nm_cluster *cluster);
int nm_create_node(char *buf, nm_op *data);
int nm_name_cluster(char *buf, nm_op *data);
-int nm_destroy_cluster(char *buf);
-int nm_get_cluster_num_nodes(char *buf);
-int nm_get_cluster_num_groups(char *buf);
int nm_get_node_info(char *buf, nm_op *data);
-int nm_get_group_info(char *buf, nm_op *data);
-nm_cluster *nm_get_cluster(void);
-struct inode *nm_get_node_by_name(char *node_name);
static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx);
-/* support for adding files, dirs, hardlinks in /proc/cluster/nm/... */
-extern struct file_operations simple_dir_operations;
-extern struct inode_operations simple_dir_inode_operations;
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request);
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-
-static ssize_t write_node(struct file *file, char *buf, size_t size);
-static ssize_t write_group(struct file *file, char *buf, size_t size);
-static ssize_t write_cluster(struct file *file, char *buf, size_t size);
-
-static u8 nm_get_group_index(struct inode *group, struct inode *inode, struct dentry **child);
-
-#define NM_HASH_BITS 7
-#define NM_HASH_SIZE (1 << NM_HASH_BITS)
-#define NM_HASH_MASK (NM_HASH_SIZE - 1)
-
-
-
#define nmprintk(x, arg...) printk("(nm:%d) " x, current->pid, ##arg)
#define nmprintk0(x) printk("(nm:%d) " x, current->pid)
#if 0
@@ -190,1171 +89,558 @@
#define nmprintk0(x)
#endif
-static struct list_head *nm_ip_hash = NULL;
-static spinlock_t nm_ip_hash_lock;
-
-static int nm_init_ip_hash(void);
-static void nm_destroy_ip_hash(void);
-
-
-static void nm_destroy_ip_hash(void)
+static int nm_find_next_slot(spinlock_t *lock, void *bitmap, int max,
+ int request)
{
- int i;
- if (!nm_ip_hash)
- return;
- for (i=0; i<NM_HASH_SIZE; i++) {
- /* TODO: cleanup */
- }
- free_page((unsigned long)nm_ip_hash);
-}
+ int start = 0, slot_num, ret = -1;
-static int nm_init_ip_hash(void)
-{
- int i;
-
- if ((PAGE_SIZE / sizeof(struct list_head)) < NM_HASH_SIZE) {
- nmprintk("eek! hash size too big for this arch!\n");
- BUG();
- }
-
- nm_ip_hash = (struct list_head *) __get_free_page(GFP_KERNEL);
- if (!nm_ip_hash)
- return -ENOMEM;
- for (i=0; i<NM_HASH_SIZE; i++)
- INIT_LIST_HEAD(&nm_ip_hash[i]);
- spin_lock_init(&nm_ip_hash_lock);
- return 0;
-}
-
-
-
-
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request)
-{
- int start = 0, slot_num;
if (request != NM_INVALID_SLOT_NUM)
start = request;
+
+ spin_lock(lock);
+
slot_num = find_next_zero_bit (bitmap, max, start);
if (slot_num >= max)
- return -1;
+ goto out;
if (request != NM_INVALID_SLOT_NUM && slot_num != request)
- return -1;
- set_bit(slot_num, bitmap);
- return slot_num;
-}
-
-
-
-
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent,
- struct tree_descr *file, int ino)
-{
- struct qstr name;
- struct dentry *dentry = ERR_PTR(-EINVAL);
- struct inode *inode;
-
- if (!file->name)
goto out;
- name.name = file->name;
- name.len = strlen(name.name);
- nmprintk("adding file %.*s\n", name.len, name.name);
- name.hash = full_name_hash(name.name, name.len);
- dentry = d_alloc(parent, &name);
- if (!dentry) {
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode = new_inode(s);
- if (!inode) {
- dput(dentry);
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode->i_mode = file->mode;
- inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
- inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (file->mode & S_IFDIR) {
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- } else {
- inode->i_fop = file->ops;
- }
- inode->i_ino = ino;
- insert_inode_hash(inode);
- d_add(dentry, inode);
+ set_bit(slot_num, bitmap);
+ ret = slot_num;
+
out:
- return dentry;
+ spin_unlock(lock);
+ return ret;
}
-
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent,
- struct tree_descr *file, int ino)
+/* XXX return nm_node, see why people are calling it in the first place.. */
+struct nm_node * nm_get_node_by_num(u8 node_num)
{
- struct qstr name;
- struct dentry *dentry = ERR_PTR(-EINVAL);
- struct inode *inode;
-
- if (!file->name)
- goto out;
- name.name = file->name;
- name.len = strlen(name.name);
- nmprintk("adding link %.*s\n", name.len, name.name);
- name.hash = full_name_hash(name.name, name.len);
- dentry = d_alloc(parent, &name);
- if (!dentry) {
- nmprintk("failed to d_alloc\n");
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode = iget(s, ino);
- if (!inode) {
- nmprintk("failed to iget\n");
- dput(dentry);
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- if (!inode->u.generic_ip) {
- nmprintk("bad inode: %d\n", ino);
- iput(inode);
- dput(dentry);
- dentry = ERR_PTR(-EINVAL);
- goto out;
- }
- inode->i_nlink++;
- d_add(dentry, inode);
-
-out:
- return dentry;
+ BUG();
+ return NULL;
}
+EXPORT_SYMBOL(nm_get_node_by_num);
-
-
-
-
-/* cluster, node and group transaction files.
- * here's where the actual work of nm takes place. */
-
-static int nm_create_cluster(char *buf)
+/* tcp calls this when a node connects so that it can deref the inode
+ * to find the inode_private->net state.. hmm. it probably just wants
+ * to look it up so that it can verify that it's a node we like and
+ * allocate its own state? hmm, that's a pickle. */
+struct nm_node * nm_get_node_by_ip(u32 addr)
{
- int ret = -EINVAL;
-
- nmprintk("create cluster...\n");
-
- spin_lock(&nm_lock);
- if (cluster.state == NM_CLUSTER_UP) {
- ret = sprintf(buf, "%d: cluster already up\n", -EINVAL);
- } else {
- cluster.state = NM_CLUSTER_UP;
- ret = sprintf(buf, "0: cluster state: UP");
- }
- spin_unlock(&nm_lock);
- return ret;
+ BUG();
+ return NULL;
}
+EXPORT_SYMBOL(nm_get_node_by_ip);
-
-
-int nm_create_group(char *buf, nm_op *data)
+#if 0
+/* this tree is only used by get_group_by_name */
+static struct nm_group * nm_group_tree_lookup(nm_cluster *cluster,
+ const char *group_name,
+ struct rb_node ***ret_p,
+ struct rb_node **ret_parent)
{
- struct tree_descr desc;
- struct dentry *dentry = NULL;
- struct inode *inode = NULL;
- int ino, group_num;
- int ret = -EINVAL;
- nm_group_inode_private *g = NULL;
+ struct rb_node **p = &cluster->group_name_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct nm_group *group, *ret = NULL;
+ int cmp;
- nmprintk("create group...\n");
+ while (*p) {
+ parent = *p;
+ group = rb_entry(parent, struct nm_group, gr_name_rb_node);
+ cmp = strcmp(group_name, group->gr_name);
- data->arg_u.gc.name[NM_MAX_NAME_LEN] = '\0';
- inode = nm_get_group_by_name(data->arg_u.gc.name);
- if (inode) {
- ret = sprintf(buf, "%d: group %u (%s) already exists", -EEXIST,
- nm_get_group_global_index(inode),
- data->arg_u.gc.name);
- iput(inode);
- return ret;
- }
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else if (cmp > 0) {
+ p = &(*p)->rb_right;
+ else {
+ ret = group;
+ break;
+ }
+ }
+
+ if (ret_p != NULL)
+ *ret_p = p;
+ if (ret_parent != NULL)
+ *ret_parent = parent;
- group_num = data->arg_u.gc.group_num;
- if (group_num > NM_INVALID_SLOT_NUM)
- goto leave;
+ if (ret)
+ /* get ref */;
- spin_lock(&cluster.bitmap_lock);
- group_num = nm_find_next_slot(&(cluster.group_bitmap[0]), 255,
- group_num);
- spin_unlock(&cluster.bitmap_lock);
-
- if (group_num < 0) {
- nmprintk("out of group slots!\n");
- goto leave;
- }
-
- ino = group_num + NM_GROUP_INODE_START;
-
- desc.name = data->arg_u.gc.name;
- desc.ops = NULL;
- desc.mode = S_IFDIR | 0755;
- dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
- if (IS_ERR(dentry))
- goto leave;
- inode = igrab(dentry->d_inode);
- if (!inode) {
- nmprintk("igrab failed!\n");
- goto leave;
- }
-
- g = kmalloc(sizeof(nm_group_inode_private), GFP_KERNEL);
- if (!g)
- goto leave;
-
- memset(g, 0, sizeof(nm_group_inode_private));
- memcpy(g->disk.uuid, data->arg_u.gc.disk_uuid, CLUSTER_DISK_UUID_LEN);
- spin_lock_init(&g->bitmap_lock);
- if (g->disk.uuid[0])
- g->state = NM_GROUP_NOT_READY;
- else
- g->state = NM_GROUP_READY;
- g->inode = inode;
- inode->u.generic_ip = g;
-
- ret = sprintf(buf, "0: group %u (%s) added, uuid: %s", group_num,
- data->arg_u.gc.name, g->disk.uuid);
- nm_do_callbacks(NM_GROUP_ADD_CB, inode, NULL, group_num);
-
-leave:
- if (ret < 0) {
- if (inode) {
- if (inode->u.generic_ip)
- kfree(inode->u.generic_ip);
- iput(inode);
- }
- if (dentry)
- dput(dentry);
- }
- return ret;
+ return ret;
}
-EXPORT_SYMBOL(nm_create_group);
-int nm_create_node(char *buf, nm_op *data)
+struct nm_group * nm_get_group_by_name(nm_cluster *cluster,
+ const char *group_name)
{
- struct tree_descr desc;
- struct dentry *dentry = NULL;
- struct inode *inode = NULL;
- int ino, node_num, bucket;
- int ret = -EINVAL;
- nm_node_inode_private *n = NULL;
- struct page *page = NULL;
+ struct nm_group *group;
- nmprintk("add cluster node ...\n");
+ read_lock(&cluster->rbtree_lock);
+ group = nm_group_tree_lookup(cluster, group_name, NULL, NULL);
+ read_unlock(&cluster->rbtree_lock);
- data->arg_u.node.node_name[NM_MAX_NAME_LEN] = '\0';
- inode = nm_get_node_by_name(data->arg_u.node.node_name);
- if (inode) {
- ret = sprintf(buf, "%d: node %u (%s) already exists", -EEXIST,
- nm_get_node_global_index(inode),
- data->arg_u.node.node_name);
- iput(inode);
- return ret;
- }
+ return group;
+}
+EXPORT_SYMBOL(nm_get_group_by_name);
+#endif
- node_num = data->arg_u.node.node_num;
- if (node_num > NM_INVALID_SLOT_NUM) {
- nmprintk("bad node_num: %d\n", node_num);
- goto leave;
- }
-
- spin_lock(&cluster.bitmap_lock);
- node_num = nm_find_next_slot(&(cluster.node_bitmap[0]), 255, node_num);
- spin_unlock(&cluster.bitmap_lock);
-
- if (node_num < 0) {
- nmprintk("out of node slots!\n");
- goto leave;
- }
-
- ino = node_num + NM_NODE_INODE_START;
-
- desc.name = data->arg_u.node.node_name;
- desc.ops = NULL;
- desc.mode = S_IFREG | S_IWUSR;
- dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
- if (IS_ERR(dentry)) {
- nmprintk("bad dentry\n");
- goto leave;
- }
- inode = igrab(dentry->d_inode);
- if (!inode) {
- nmprintk("igrab failed!\n");
- goto leave;
- }
-
- n = kmalloc(sizeof(nm_node_inode_private), GFP_KERNEL);
- if (!n) {
- nmprintk("could not kmalloc\n");
- goto leave;
- }
- memcpy(&n->node, &data->arg_u.node, sizeof(nm_node_info));
- INIT_LIST_HEAD(&n->ip_hash);
-
- spin_lock_init(&n->net.sock_lock);
- n->net.sock = NULL;
- n->net.sock_refs = 0;
- n->net.sock_pending = 0;
- n->net.defer_release = 0;
- INIT_LIST_HEAD(&n->net.pending_waiters);
- init_waitqueue_head(&n->net.waitq);
- INIT_LIST_HEAD(&n->net.handlers);
- INIT_LIST_HEAD(&n->net.active_item);
- n->net.page = NULL;
- n->net.page_off = 0;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL) {
- nmprintk("page allocation failed\n");
- goto leave;
- }
- n->net.page = page;
-
- /* hash on first ip address */
- spin_lock(&nm_ip_hash_lock);
- bucket = hash_long(n->node.ifaces[0].addr_u.ip_addr4, NM_HASH_BITS);
- list_add_tail(&n->ip_hash, &nm_ip_hash[bucket]);
- spin_unlock(&nm_ip_hash_lock);
- nmprintk("hashed ip %d.%d.%d.%d to bucket %d\n",
- NIPQUAD(n->node.ifaces[0].addr_u.ip_addr4), bucket);
- n->inode = inode;
- inode->u.generic_ip = n;
-
- ret = sprintf(buf, "0: node %u (%s) added", node_num,
- n->node.node_name);
- nm_do_callbacks(NM_NODE_ADD_CB, inode, NULL, node_num);
-
-leave:
- if (ret < 0) {
- if (page)
- __free_page(page);
- if (inode) {
- if (inode->u.generic_ip)
- kfree(inode->u.generic_ip);
- iput(inode);
- }
- if (dentry)
- dput(dentry);
- }
- return ret;
+u8 nm_this_node(void)
+{
+ /* sigh, in our one cluster one node one ip.. */
+ BUG();
+ return 0;
}
+EXPORT_SYMBOL(nm_this_node);
-int nm_make_group_ready(struct inode *group)
+int nm_register_callback(unsigned int type, void (*func)(void *, void *, u8))
{
- nm_group_inode_private *g = group->u.generic_ip;
- if (!g)
+ nm_callback_func *f;
+
+ if (type >= NM_NUM_CB)
return -EINVAL;
- g->state = NM_GROUP_READY;
+ f = kmalloc(sizeof(nm_callback_func), GFP_KERNEL);
+ if (f == NULL)
+ return -ENOMEM;
+ memset(f, 0, sizeof(nm_callback_func));
+ f->func = func;
+ down(&nm_cb_sem);
+ list_add_tail(&f->list, &nm_callbacks[type]);
+ up(&nm_cb_sem);
return 0;
}
-EXPORT_SYMBOL(nm_make_group_ready);
+EXPORT_SYMBOL(nm_register_callback);
-int nm_add_node_to_group(char *buf, nm_op *data)
+int nm_unregister_callback(unsigned int type, void (*func)(void *, void *, u8))
{
- struct tree_descr desc;
- struct inode *inode = NULL;
- struct dentry *dentry = NULL, *child = NULL;
- nm_group_inode_private *g = NULL;
- int group_num, slot_num;
+ struct list_head *iter, *tmpiter;
int ret = -EINVAL;
- u8 ino;
- char tmpname[6];
+ nm_callback_func *f;
- nmprintk("add node to group...\n");
+ if (type >= NM_NUM_CB)
+ return ret;
- group_num = data->arg_u.gc.group_num;
- ino = data->arg_u.gc.node_num;
- slot_num = data->arg_u.gc.slot_num;
-
- /* request a certain slot, or NM_INVALID_SLOT_NUM for any slot */
- if (slot_num > NM_INVALID_SLOT_NUM)
- goto leave;
-
- if (ino >= NM_INVALID_SLOT_NUM || group_num >= NM_INVALID_SLOT_NUM)
- goto leave;
-
- inode = nm_get_group_by_num(group_num);
- if (!inode)
- goto leave;
- if (list_empty(&inode->i_dentry))
- goto leave;
- dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias));
- if (!dentry)
- goto leave;
- g = inode->u.generic_ip;
- if (!g)
- goto leave;
-
- if (g->state == NM_GROUP_NOT_READY) {
- ret = sprintf(buf, "%d: group disk has not been discovered. "
- "cannot add nodes.", -EROFS);
- goto leave;
+ down(&nm_cb_sem);
+ list_for_each_safe(iter, tmpiter, &nm_callbacks[type]) {
+ f = list_entry (iter, nm_callback_func, list);
+ if (f->func == func) {
+ list_del(&f->list);
+ kfree(f);
+ ret = 0;
+ break;
+ }
}
-
- spin_lock(&g->bitmap_lock);
- slot_num = nm_find_next_slot(&(g->slot_bitmap[0]), 255, slot_num);
- spin_unlock(&g->bitmap_lock);
- if (slot_num < 0)
- goto leave;
-
- /* create hardlink to ino with name "slot_num" */
- sprintf(tmpname, "%03u", slot_num);
- desc.name = &(tmpname[0]);
- desc.ops = NULL;
- desc.mode = 0;
- child = nm_add_link(single_sb, dentry, &desc,
- NM_NODE_INODE_START+ino);
- if (IS_ERR(child)) {
- nmprintk("error adding link for %s\n", tmpname);
- child = NULL;
- goto leave;
- }
-
- ret = sprintf(buf, "0: node %u added to group: %.*s",
- ino, dentry->d_name.len, dentry->d_name.name);
-
- if (!igrab(child->d_inode))
- goto leave;
- nm_do_callbacks(NM_GROUP_NODE_ADD_CB, inode, child->d_inode, slot_num);
- iput(child->d_inode);
-
-leave:
- if (dentry)
- dput(dentry);
- if (child)
- dput(child);
- if (inode)
- iput(inode);
+ up(&nm_cb_sem);
return ret;
}
-EXPORT_SYMBOL(nm_add_node_to_group);
+EXPORT_SYMBOL(nm_unregister_callback);
-int nm_remove_node_from_group(struct inode *group, struct inode *node)
+static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx)
{
- struct dentry *child = NULL;
- nm_group_inode_private *g = NULL;
- int slot_num;
- int ret = -EINVAL;
-
- nmprintk("remove node from group...\n");
-
- slot_num = nm_get_group_index(group, node, &child);
-
- if (slot_num == NM_MAX_NODES || !child)
- goto leave;
-
- g = group->u.generic_ip;
- if (!g)
- goto leave;
-
- nmprintk("killing the dentry now!!\n");
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- down(&group->i_zombie);
- node->i_nlink--;
- d_delete(child);
- up(&group->i_zombie);
-#else
- down(&group->i_sem);
- node->i_nlink--;
- d_delete(child);
- up(&group->i_sem);
-#endif
- nmprintk("done killing the dentry!!\n");
-
-
- if (!igrab(node))
- goto leave;
- nm_do_callbacks(NM_GROUP_NODE_DEL_CB, group, node, slot_num);
- iput(node);
+ struct list_head *iter;
+ nm_callback_func *f;
- spin_lock(&g->bitmap_lock);
- clear_bit(slot_num, (void *)(&g->slot_bitmap[0]));
- spin_unlock(&g->bitmap_lock);
-
- ret = 0;
-
-leave:
- if (child)
- dput(child);
- return ret;
+ down(&nm_cb_sem);
+ list_for_each(iter, &nm_callbacks[type]) {
+ f = list_entry (iter, nm_callback_func, list);
+ (f->func) (ptr1, ptr2, idx);
+ }
+ up(&nm_cb_sem);
}
+/* node usysfs bits */
-
-int nm_name_cluster(char *buf, nm_op *data)
+static struct nm_cluster *to_nm_cluster(struct kobject *kobj)
{
- int ret = -EINVAL;
-
- nmprintk("name cluster...\n");
- spin_lock(&nm_lock);
- if (cluster.state == NM_CLUSTER_UP) {
- ret = sprintf(buf, "%d: cluster name could not be set. "
- "cluster already up.", -EINVAL);
- goto leave;
- }
- memset(cluster.name, 0, NM_MAX_NAME_LEN+1);
- memcpy(cluster.name, data->arg_u.name, NM_MAX_NAME_LEN);
- ret = sprintf(buf, "0: cluster name set: %s", cluster.name);
-leave:
- spin_unlock(&nm_lock);
- return ret;
+ return kobj ? container_of(kobj, struct nm_cluster, cl_kobj) : NULL;
}
-int nm_destroy_cluster(char *buf)
+static struct nm_node *to_nm_node(struct kobject *kobj)
{
- int ret;
- nmprintk("destroy cluster...\n");
-
- /* TODO */
- spin_lock(&nm_lock);
- nm_init_cluster(&cluster);
- ret = sprintf(buf, "0: rudely destroyed cluster!!!");
- spin_unlock(&nm_lock);
- return ret;
+ return kobj ? container_of(kobj, struct nm_node, nd_kobj) : NULL;
}
-
-int nm_get_cluster_num_nodes(char *buf)
+static void nm_node_release(struct kobject *kobj)
{
- int num_nodes=0, i;
-
- nmprintk("get cluster num nodes...\n");
-
- spin_lock(&cluster.bitmap_lock);
- for (i=0; i<ARRAY_SIZE(cluster.node_bitmap); i++)
- num_nodes += hweight_long(cluster.node_bitmap[i]);
- spin_unlock(&cluster.bitmap_lock);
-
- return sprintf(buf, "0: %d", num_nodes);
+ struct nm_node *node = to_nm_node(kobj);
+ printk("releasing node %p\n", node);
}
-int nm_get_cluster_num_groups(char *buf)
+static ssize_t nm_node_num_read(struct nm_node *node, char *page)
{
- int num_groups=0, i;
-
- nmprintk("get cluster num groups...\n");
-
- spin_lock(&cluster.bitmap_lock);
- for (i=0; i<ARRAY_SIZE(cluster.group_bitmap); i++)
- num_groups += hweight_long(cluster.group_bitmap[i]);
- spin_unlock(&cluster.bitmap_lock);
-
- return sprintf(buf, "0: %d", num_groups);
+ return sprintf(page, "%d\n", node->nd_num);
}
-int nm_get_group_num_nodes(struct inode *group)
+static ssize_t nm_node_num_write(struct nm_node *node, const char *page,
+ size_t count)
{
- int num_nodes=0, i;
- nm_group_inode_private *g;
-
- nmprintk("get group num nodes...\n");
-
- g = group->u.generic_ip;
- if (!g)
+ struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+ unsigned long tmp;
+ char *p = (char *)page;
+ int node_num;
+
+ tmp = simple_strtoul(p, &p, 0);
+ if (!p || (*p && (*p != '\n')))
return -EINVAL;
- spin_lock(&g->bitmap_lock);
- for (i=0; i<ARRAY_SIZE(g->slot_bitmap); i++)
- num_nodes += hweight_long(g->slot_bitmap[i]);
- spin_unlock(&g->bitmap_lock);
+ if (tmp >= NM_MAX_NODES)
+ return -ERANGE;
- return num_nodes;
-}
+ node_num = nm_find_next_slot(&cluster->cl_bitmap_lock,
+ &(cluster->cl_node_bitmap[0]), 255, tmp);
+ if (node_num < 0)
+ return node_num;
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx)
-{
- nm_group_inode_private *priv;
- int next;
+ node->nd_num = tmp;
- if (*idx >= 255)
- return NULL;
- priv = group->u.generic_ip;
- if (!priv)
- return NULL;
- next = find_next_bit(priv->slot_bitmap, 255, *idx);
- if (next >= 255)
- return NULL;
- *idx = next;
- return util_rarray_idx_to_slot(&priv->disk.slots, next);
+ return count;
}
-EXPORT_SYMBOL(nm_iterate_group_disk_slots);
-
-int nm_get_node_info(char *buf, nm_op *data)
+static ssize_t nm_node_ipv4_port_read(struct nm_node *node, char *page)
{
- int ret, tmpret, i;
- nm_node_inode_private *priv;
- nm_network_iface *n;
- struct inode *inode = NULL;
- struct dentry *dentry;
- u8 node_num;
- u16 vers;
-
- ret = -EINVAL;
- node_num = data->arg_u.index;
- inode = nm_get_node_by_num(node_num);
- if (inode) {
- dentry = list_entry(inode->i_dentry.next, struct dentry,
- d_alias);
- priv = inode->u.generic_ip;
- ret = sprintf(buf, "0: global_index=%u\n"
- "name=%.*s\n",
- priv->node.node_num, dentry->d_name.len,
- dentry->d_name.name);
- buf += ret;
- for (i=0; i<NM_MAX_IFACES; i++) {
- n = &priv->node.ifaces[i];
- vers = ntohs(n->ip_version);
- nmprintk("ip_version=%u, vers=%u\n",
- n->ip_version, vers);
- if (vers!=4 && vers!=6)
- continue;
- /* TODO: how to print ipv6? */
- tmpret = sprintf(buf, "iface%d.port=%u\n"
- "iface%d.version=%d\n"
- "iface%d.addr=%d.%d.%d.%d\n",
- i, ntohs(n->ip_port), i, vers, i,
- NIPQUAD(n->addr_u.ip_addr4));
- buf += tmpret;
- ret += tmpret;
- }
- iput(inode);
- }
- return ret;
+ return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port));
}
-int nm_get_group_info(char *buf, nm_op *data)
+static ssize_t nm_node_ipv4_port_write(struct nm_node *node, const char *page,
+ size_t count)
{
- int ret, tmpret;
- nm_group_inode_private *g = NULL;
- struct inode *inode = NULL;
- u8 group_num;
- struct dentry *dentry, *child;
+ unsigned long tmp;
+ char *p = (char *)page;
- ret = -EINVAL;
- group_num = data->arg_u.index;
- inode = nm_get_group_by_num(group_num);
- if (inode) {
- g = inode->u.generic_ip;
- dentry = list_entry(inode->i_dentry.next, struct dentry,
- d_alias);
- ret = sprintf(buf, "0: group_num=%u\n"
- "name=%.*s\n"
- "disk_uuid=%s\n",
- group_num, dentry->d_name.len,
- dentry->d_name.name, g->disk.uuid);
- buf += ret;
+ tmp = simple_strtoul(p, &p, 0);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
- spin_lock(&dcache_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_child) {
- tmpret = sprintf(buf, "%.*s\n", child->d_name.len,
- child->d_name.name);
- buf += tmpret;
- ret += tmpret;
- }
- spin_unlock(&dcache_lock);
- iput(inode);
- }
- return ret;
-}
-
-
-
-static ssize_t write_cluster(struct file *file, char *buf, size_t size)
-{
- nm_op *data;
- int ret;
- u8 me;
-
- nmprintk("write_cluster\n");
-
- if (size < sizeof(*data))
- return -EINVAL;
- data = (nm_op *) buf;
- if (data->magic != NM_OP_MAGIC)
+ if (tmp == 0)
return -EINVAL;
+ if (tmp >= (u16)-1)
+ return -ERANGE;
- switch (data->opcode) {
- case NM_OP_CREATE_CLUSTER:
- ret = nm_create_cluster(buf);
- break;
- case NM_OP_CREATE_GROUP:
- ret = nm_create_group(buf, data);
- break;
- case NM_OP_NAME_CLUSTER:
- ret = nm_name_cluster(buf, data);
- break;
- case NM_OP_DESTROY_CLUSTER:
- ret = nm_destroy_cluster(buf);
- break;
- case NM_OP_ADD_CLUSTER_NODE:
- ret = nm_create_node(buf, data);
- break;
- case NM_OP_GET_CLUSTER_NUM_NODES:
- ret = nm_get_cluster_num_nodes(buf);
- break;
- case NM_OP_GET_GLOBAL_NODE_NUM:
- ret = 0;
- me = nm_this_node(NULL);
- if (me >= NM_MAX_NODES)
- ret = -EINVAL;
- ret = sprintf(buf, "%d: %u", ret, me);
- break;
- default:
- ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL,
- data->opcode);
- break;
- }
- nmprintk("leaving!\n");
- return ret;
+ node->nd_ipv4_port = htons(tmp);
+
+ return count;
}
-static ssize_t write_node(struct file *file, char *buf, size_t size)
+static ssize_t nm_node_ipv4_address_read(struct nm_node *node, char *page)
{
- nm_op *data;
- int ret;
-
- nmprintk("write_node\n");
-
- if (size < sizeof(*data))
- return -EINVAL;
- data = (nm_op *) buf;
- if (data->magic != NM_OP_MAGIC)
- return -EINVAL;
-
- switch (data->opcode) {
- case NM_OP_GET_NODE_INFO:
- ret = nm_get_node_info(buf, data);
- break;
- default:
- ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL,
- data->opcode);
- break;
- }
- nmprintk("leaving!\n");
- return ret;
+ return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address));
}
-static ssize_t write_group(struct file *file, char *buf, size_t size)
+/* XXX this is acting as commit until commit really lands.. all this will be
+ * hoisted into the commit method */
+static ssize_t nm_node_ipv4_address_write(struct nm_node *node,
+ const char *page,
+ size_t count)
{
- nm_op *data;
int ret;
-
- nmprintk("write_group\n");
+ union {
+ u32 ipv4_addr; /* network order */
+ unsigned char bytes[4];
+ } u;
- if (size < sizeof(*data))
- return -EINVAL;
- data = (nm_op *) buf;
- if (data->magic != NM_OP_MAGIC)
+ ret = sscanf(page, "%c.%c.%c.%c", &u.bytes[0], &u.bytes[1], &u.bytes[2],
+ &u.bytes[3]);
+ if (ret != 4)
return -EINVAL;
- nmprintk("opcode is %u, add_group is %u\n", data->opcode,
- NM_OP_ADD_GROUP_NODE);
- switch (data->opcode) {
- case NM_OP_GET_GROUP_INFO:
- ret = nm_get_group_info(buf, data);
- break;
+ memcpy(&node->nd_ipv4_address, &u.ipv4_addr, sizeof(u.ipv4_addr));
- case NM_OP_ADD_GROUP_NODE:
- ret = nm_add_node_to_group(buf, data);
- break;
+#if 0
+ /* XXX kick off to a thread? */
+ /* XXX bubble inode arg down to callbacks? :/ */
+ nm_do_callbacks(NM_NODE_ADD_CB, node);
+#endif
- default:
- ret = sprintf(buf, "%d: bad opcode: %u",
- -EINVAL, data->opcode);
- break;
- }
- nmprintk("leaving!\n");
- return ret;
+ return count;
}
+struct nm_node_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct nm_node *, char *);
+ ssize_t (*store)(struct nm_node *, const char *, size_t);
+};
+static struct nm_node_attribute nm_node_attr_num = {
+ .attr = { .name = "num", .mode = S_IRUGO | S_IWUSR },
+ .show = nm_node_num_read,
+ .store = nm_node_num_write,
+};
+static struct nm_node_attribute nm_node_attr_ipv4_port = {
+ .attr = { .name = "ipv4_port", .mode = S_IRUGO | S_IWUSR },
+ .show = nm_node_ipv4_port_read,
+ .store = nm_node_ipv4_port_write,
+};
+static struct nm_node_attribute nm_node_attr_ipv4_address = {
+ .attr = { .name = "ipv4_address", .mode = S_IRUGO | S_IWUSR },
+ .show = nm_node_ipv4_address_read,
+ .store = nm_node_ipv4_address_write,
+};
+static struct attribute *nm_node_default_attrs[] = {
+ &nm_node_attr_num.attr,
+ &nm_node_attr_ipv4_port.attr,
+ &nm_node_attr_ipv4_address.attr,
+ NULL,
+};
-struct inode * nm_get_group_by_num(u8 group_num)
+static ssize_t nm_node_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *page)
{
- struct inode *inode = iget(single_sb, group_num + NM_GROUP_INODE_START);
- if (!inode)
- return NULL;
- if (!inode->u.generic_ip) {
- iput(inode);
- return NULL;
- }
- return inode;
-}
-EXPORT_SYMBOL(nm_get_group_by_num);
+ struct nm_node *node = to_nm_node(kobj);
+ struct nm_node_attribute *nm_node_attr =
+ container_of(attr, struct nm_node_attribute, attr);
+ ssize_t ret = 0;
-struct inode * nm_get_node_by_num(u8 node_num)
-{
- struct inode *inode = iget(single_sb, node_num + NM_NODE_INODE_START);
- if (!inode)
- return NULL;
- if (!inode->u.generic_ip) {
- iput(inode);
- return NULL;
- }
- return inode;
+ if (nm_node_attr->show)
+ ret = nm_node_attr->show(node, page);
+ return ret;
}
-EXPORT_SYMBOL(nm_get_node_by_num);
-/* ipv4 only for now... */
-struct inode * nm_get_node_by_ip(u32 addr)
+static ssize_t nm_node_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *page, size_t count)
{
- int bucket;
- struct list_head *iter;
- nm_node_inode_private *priv;
- struct inode *ret = NULL;
-
- bucket = hash_long(addr, NM_HASH_BITS);
+ struct nm_node *node = to_nm_node(kobj);
+ struct nm_node_attribute *nm_node_attr =
+ container_of(attr, struct nm_node_attribute, attr);
+ ssize_t ret = -EINVAL;
- spin_lock(&nm_ip_hash_lock);
- list_for_each(iter, &nm_ip_hash[bucket]) {
- priv = list_entry(iter, nm_node_inode_private, ip_hash);
- if (priv->node.ifaces[0].addr_u.ip_addr4 == addr) {
- ret = igrab(priv->inode);
- break;
- }
-
- }
- spin_unlock(&nm_ip_hash_lock);
+ if (nm_node_attr->store)
+ ret = nm_node_attr->store(node, page, count);
return ret;
}
-EXPORT_SYMBOL(nm_get_node_by_ip);
-nm_cluster * nm_get_cluster(void)
-{
- return &cluster;
-}
+struct sysfs_ops nm_node_sysfs_ops = {
+ .show = &nm_node_show,
+ .store = &nm_node_store,
+};
-struct inode * nm_get_group_node_by_index(struct inode *group, u8 index)
-{
- struct dentry *dentry = NULL, *parent;
- struct inode *inode = NULL;
- char tmpname[6];
+static struct ukobj_type nm_node_type = {
+ .ktype = {
+ .release = nm_node_release,
+ .sysfs_ops = &nm_node_sysfs_ops,
+ .default_attrs = nm_node_default_attrs,
+ },
+ .owner = THIS_MODULE,
+};
- if (list_empty(&group->i_dentry))
- return NULL;
- parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
- if (!parent)
- return NULL;
-
- sprintf(tmpname, "%03u", index);
- dentry = lookup_one_len(tmpname, parent, strlen(tmpname));
- if (!IS_ERR(dentry)) {
- inode = dentry->d_inode;
- if (inode) {
- inode = igrab(inode);
- if (!inode->u.generic_ip || !S_ISREG (inode->i_mode)) {
- nmprintk("bad inode!\n");
- iput(inode);
- inode = NULL;
- }
- }
- if (!inode)
- dput(dentry);
- }
- dput(parent);
- return inode;
+/* node set */
+
+struct nm_node_set {
+ struct kset ns_kset;
+ /* some stuff? */
+};
+
+static struct nm_node_set *to_nm_node_set(struct kset *kset)
+{
+ return kset ? container_of(kset, struct nm_node_set, ns_kset) : NULL;
}
-EXPORT_SYMBOL(nm_get_group_node_by_index);
-struct inode * __nm_get_node_by_name(const char *node_name, int dir)
+
+static struct kobject *nm_node_set_make_object(struct kset *kset,
+ const char *name)
{
- struct dentry *dentry = NULL;
- struct inode *inode = NULL;
+ struct nm_node *node = NULL;
+ struct nm_cluster *cluster = to_nm_cluster(kset->kobj.parent);
+ struct kobject *ret = NULL;
+ net_inode_private *nip;
+ struct page *page;
- NM_ASSERT(node_name);
- NM_ASSERT(single_sb);
- NM_ASSERT(single_sb->s_root);
+ printk("trying to make a node object under cluster %p\n", cluster);
- dentry = lookup_one_len(node_name, single_sb->s_root,
- strlen(node_name));
- if (!IS_ERR(dentry)) {
- inode = dentry->d_inode;
- if (inode) {
- inode = igrab(inode);
- if (!inode->u.generic_ip ||
- (dir && !S_ISDIR (inode->i_mode)) ||
- (!dir && !S_ISREG (inode->i_mode))) {
- nmprintk("bad inode!\n");
- iput(inode);
- inode = NULL;
- }
- }
- }
- return inode;
-}
-EXPORT_SYMBOL(__nm_get_node_by_name);
+ if (strlen(name) > NM_MAX_NAME_LEN)
+ goto out; /* ENAMETOOLONG */
+ node = kcalloc(1, sizeof(struct nm_node), GFP_KERNEL);
+ if (node == NULL)
+ goto out; /* ENOMEM */
-/*
- * if group is NULL: return the global index for this node
- * if group is non NULL: return the index within the group of this node
- *
- * NOTE: currently getting the group index is slow
- * will need to change this somehow
- */
-u8 nm_this_node(struct inode *group)
-{
- struct inode *inode = NULL;
- struct dentry *child = NULL;
- u8 node_num = NM_MAX_NODES;
+ strcpy(node->nd_name, name); /* use kobj.name instead? */
- inode = nm_get_node_by_name(nm_nodename);
- if (inode && inode->u.generic_ip) {
- if (group)
- node_num = nm_get_group_index(group, inode, &child);
- else
- node_num = nm_get_node_global_index(inode);
+ /* this should be somewhere else */
+ nip = &node->nd_net_inode_private;
+ spin_lock_init(&nip->sock_lock);
+ nip->sock = NULL;
+ nip->sock_refs = 0;
+ nip->sock_pending = 0;
+ nip->defer_release = 0;
+ INIT_LIST_HEAD(&nip->pending_waiters);
+ init_waitqueue_head(&nip->waitq);
+ INIT_LIST_HEAD(&nip->handlers);
+ INIT_LIST_HEAD(&nip->active_item);
+ nip->page = NULL;
+ nip->page_off = 0;
+ page = alloc_page(GFP_KERNEL);
+ if (page == NULL) {
+ nmprintk("page allocation failed\n");
+ goto out; /* ENOMEM */
}
- iput(inode);
- dput(child);
- //nmprintk("for group=%p, this node is %u\n", group, node_num);
- return node_num;
-}
-EXPORT_SYMBOL(nm_this_node);
+ nip->page = page;
-/* slow */
-static u8 nm_get_group_index(struct inode *group, struct inode *inode,
- struct dentry **child)
-{
- struct dentry *tmp = NULL, *parent = NULL;
- u8 slot_num = NM_MAX_NODES;
- struct list_head *iter;
- char tmpname[6];
- char *err;
+ kobject_set_name(&node->nd_kobj, name);
+ node->nd_kobj.ktype = &nm_node_type.ktype;
+ kobject_init(&node->nd_kobj);
- *child = NULL;
- parent = NULL;
- if (list_empty(&group->i_dentry))
- goto leave;
- parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
- if (!parent)
- goto leave;
-
- spin_lock(&dcache_lock);
- list_for_each(iter, &parent->d_subdirs) {
- tmp = list_entry(iter, struct dentry, d_child);
- if (tmp->d_inode == inode)
- break;
- tmp = NULL;
- }
- if (tmp)
- dget_locked(tmp);
- spin_unlock(&dcache_lock);
+ ret = &node->nd_kobj;
- if (!tmp || tmp->d_name.len > 3)
- goto leave;
- strncpy(tmpname, tmp->d_name.name, tmp->d_name.len);
- tmpname[tmp->d_name.len] = '\0';
- err=NULL;
- slot_num = simple_strtoul(tmpname, &err, 10);
-
- if (*err != '\0')
- slot_num = NM_MAX_NODES; // error
- else
- *child = dget(tmp); // done, get extra ref for child
-
-leave:
- dput(parent);
- dput(tmp);
+out:
+ if (ret == NULL)
+ kfree(node);
- return slot_num;
+ return ret;
}
-int nm_register_callback(int type, void (*func)(void *, void *, u8))
+static void nm_node_set_drop_object(struct kset *kset, struct kobject *kobj)
{
- nm_callback_func *f;
+ struct nm_node_set *ns = to_nm_node_set(kset);
- if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
- return -EINVAL;
- f = kmalloc(sizeof(nm_callback_func), GFP_KERNEL);
- if (f == NULL)
- return -ENOMEM;
- memset(f, 0, sizeof(nm_callback_func));
- f->func = func;
- down(&nm_cb_sem);
- list_add_tail(&f->list, &nm_callbacks[type]);
- up(&nm_cb_sem);
- return 0;
+ printk("dropping nm_node_set %p\n", ns);
+ kobject_put(kobj);
}
-EXPORT_SYMBOL(nm_register_callback);
-int nm_unregister_callback(int type, void (*func)(void *, void *, u8))
-{
- struct list_head *iter, *tmpiter;
- int ret = -EINVAL;
- nm_callback_func *f;
+static struct ukobj_type nm_node_set_type = {
+ .ktype = {
+ .sysfs_ops = NULL, /* no attributes */
+ },
+ .make_object = nm_node_set_make_object,
+ .drop_object = nm_node_set_drop_object,
+ .owner = THIS_MODULE,
+};
- if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
- return ret;
+static struct nm_node_set nm_node_set = {
+ .ns_kset = {
+ .kobj = {
+ .name = "cluster",
+ .ktype = &nm_node_set_type.ktype,
+ },
+ },
+};
- down(&nm_cb_sem);
- list_for_each_safe(iter, tmpiter, &nm_callbacks[type]) {
- f = list_entry (iter, nm_callback_func, list);
- if (f->func == func) {
- list_del(&f->list);
- kfree(f);
- ret = 0;
- break;
- }
- }
- up(&nm_cb_sem);
- return ret;
-}
-EXPORT_SYMBOL(nm_unregister_callback);
+/* cluster */
-static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx)
+
+static void nm_cluster_release(struct kobject *kobj)
{
- struct list_head *iter;
- nm_callback_func *f;
-
- down(&nm_cb_sem);
- list_for_each(iter, &nm_callbacks[type]) {
- f = list_entry (iter, nm_callback_func, list);
- (f->func) (ptr1, ptr2, idx);
- }
- up(&nm_cb_sem);
+ struct nm_cluster *cluster = to_nm_cluster(kobj);
+ printk("releasing cluster %p\n", cluster);
}
+static struct ukobj_type nm_cluster_type = {
+ .ktype = {
+ .release = nm_cluster_release,
+ .sysfs_ops = NULL, /* no attributes */
+ },
+ .owner = THIS_MODULE,
+};
-static void nm_teardown(void)
-{
- remove_proc_entry("cluster/nm", NULL);
- remove_proc_entry("cluster", NULL);
-}
+/* cluster set */
-static void nm_init_cluster(nm_cluster *cluster)
+struct nm_cluster_set {
+ struct kset cs_kset;
+ /* some stuff? */
+};
+
+static struct nm_cluster_set *to_nm_cluster_set(struct kset *kset)
{
- int i;
- memset(cluster, 0, sizeof(nm_cluster));
- cluster->state = NM_CLUSTER_DOWN;
- spin_lock_init(&cluster->bitmap_lock);
-
- for (i=NM_NODE_ADD_CB; i<=NM_GROUP_NODE_DEL_CB; i++)
- INIT_LIST_HEAD(&nm_callbacks[i]);
+ return kset ? container_of(kset, struct nm_cluster_set, cs_kset) : NULL;
}
+static struct kset *nm_cluster_default_sets[] = {
+ &nm_node_set,
+ &nm_heartbeat_set,
+ NULL,
+};
+static struct kobject *nm_cluster_set_make_object(struct kset *kset,
+ const char *name)
+{
+ struct nm_cluster *cluster;
+ printk("trying to make a cluster object\n");
+ cluster = kcalloc(1, sizeof(struct nm_cluster), GFP_KERNEL);
+ if (!cluster)
+ return NULL;
-/*----------------------------------------------------------------------------*/
-/*
- * populating the filesystem.
- */
-static int nm_fill_super(struct super_block * sb, void * data, int silent)
-{
- int ret, sz;
- struct TA_write_ops *ops;
- static struct tree_descr nm_files[] = {
- [NM_Cluster] = {".cluster", &transaction_ops, S_IWUSR},
- [NM_Node] = {".node", &transaction_ops, S_IWUSR},
- [NM_Group] = {".group", &transaction_ops, S_IWUSR},
- /* last one */ {""}
- };
-
- sz = sizeof(nm_files) / sizeof(struct tree_descr);
- ops = kmalloc(sizeof(struct TA_write_ops) +
- (sz * sizeof(ops->write_op[0])),
- GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
+ spin_lock_init(&cluster->bitmap_lock);
- ops->num_ops = sz;
- ops->write_op[NM_Cluster] = write_cluster;
- ops->write_op[NM_Node] = write_node;
- ops->write_op[NM_Group] = write_group;
+ kobject_set_name(&cluster->cl_kobj, name);
+ cluster->cl_kobj.ktype = &nm_cluster_type.ktype;
+ kobject_init(&cluster->cl_kobj);
- single_sb = NULL;
- nmprintk("calling simple_fill_super...\n");
- ret = simple_fill_super(sb, 0x98675309, nm_files);
- if (ret >= 0) {
- TA_GENERIC_SB_MEMBER(sb) = ops;
- single_sb = sb;
- } else {
- kfree(ops);
- }
- return ret;
+ return &cluster->kobj;
}
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *nm_get_sb(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
+static void nm_cluster_set_drop_object(struct kset *kset, struct kobject *kobj)
{
- return get_sb_single(fs_type, flags, data, nm_fill_super);
+ struct nm_cluster_set *cs = to_nm_cluster_set(kset);
+
+ printk("dropping nm_cluster_set %p\n", cs);
+ kobject_put(kobj);
}
-static struct file_system_type nm_fs_type = {
- .owner = THIS_MODULE,
- .name = "nm",
- .get_sb = nm_get_sb,
- .kill_sb = kill_anon_super,
+static struct ukobj_type nm_cluster_set_type = {
+ .ktype = {
+ .sysfs_ops = NULL, /* no attributes */
+ },
+ .make_object = nm_cluster_set_make_object,
+ .drop_object = nm_cluster_set_drop_object,
+ .default_sets = nm_cluster_default_sets,
+ .owner = THIS_MODULE,
};
-#else
-static struct super_block *nm_read_super(struct super_block *sb,
- void *data,
- int silent)
+
+static struct nm_cluster_set nm_cluster_set = {
+ .cs_kset = {
+ .kobj = {
+ .name = "cluster",
+ .ktype = &nm_cluster_set_type.ktype,
+ },
+ },
+};
+
+static void __exit exit_nm(void)
{
- nmprintk("welcome to nm_read_super!!!\n");
- return (nm_fill_super(sb, data, silent) < 0) ? NULL : sb;
+ nmprintk("unloading nm module\n");
+ usysfs_unregister_subsystem(&nm_cluster_set.cs_kset);
+ nm_destroy_ip_hash();
}
-static DECLARE_FSTYPE (nm_fs_type, "nm", nm_read_super, FS_SINGLE|FS_LITTER);
-#endif
+static void kset_init(struct kset *k)
+{
+ kobject_init(&k->kobj);
+ INIT_LIST_HEAD(&k->list);
+}
static int __init init_nm(void)
{
- int retval;
- nm_nodename = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
- if (nm_nodename==NULL) {
- nmprintk("could not allocate a few bytes for nodename!\n");
- return -ENOMEM;
+ int ret;
+ unsigned i;
+
+ for (i = 0; i <= ARRAY_SIZE(nm_callbacks); i++)
+ INIT_LIST_HEAD(&nm_callbacks[i]);
+
+ kset_init(&nm_cluster_set.cs_kset);
+ kset_init(&nm_node_set.ns_kset);
+ kset_init(&hb_heartbeat_set.hs_kset);
+ ret = usysfs_register_subsystem(&nm_cluster_set.cs_kset);
+ if (ret) {
+ printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
+ goto out;
}
- strcpy(nm_nodename, system_utsname.nodename);
- nmprintk("loading nm module: nodename is %s\n", nm_nodename);
- if (nm_init_ip_hash() < 0) {
- nmprintk("failed to allocate node IP hash\n");
- return -ENOMEM;
+ ret = nm_init_ip_hash();
+ if (ret) {
+ printk(KERN_ERR "nodemanager: IP hash initialization "
+ "returned %d\n", ret);
+ goto out;
}
- nm_init_cluster(&cluster);
+out:
+ if (ret)
+ exit_nm();
- if (proc_mkdir("cluster", 0)) {
- if (proc_mkdir("cluster/nm", 0)) {
- }
- }
- nmprintk("calling register_filesystem\n");
- retval = register_filesystem(&nm_fs_type);
- nmprintk("done calling register_filesystem: ret=%d\n", retval);
- if (retval)
- nm_teardown();
- return retval;
+ return ret;
}
-static void __exit exit_nm(void)
-{
- nm_teardown();
- unregister_filesystem(&nm_fs_type);
- nm_destroy_ip_hash();
- kfree(nm_nodename);
- nmprintk("unloading nm module\n");
-}
-
-
-
-
MODULE_LICENSE("GPL");
module_init(init_nm)
module_exit(exit_nm)
Modified: branches/usysfsify/fs/ocfs2/cluster/nodemanager.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/nodemanager.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/nodemanager.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -32,51 +32,13 @@
#include "ocfs2_nodemanager.h"
-
-
-struct _nm_ctxt
-{
- int dummy;
+struct nm_cluster {
+ struct kobject cl_kobj;
+ char cl_name[NM_MAX_NAME_LEN+1];
+ spinlock_t cl_bitmap_lock;
+ unsigned long cl_node_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
};
-enum {
- NM_CLUSTER_DOWN=0,
- NM_CLUSTER_UP
-};
-
-enum {
- NM_GROUP_NOT_READY=0,
- NM_GROUP_READY
-};
-
-enum {
- NM_Root = 1,
- NM_Cluster,
- NM_Node,
- NM_Group,
-};
-
-typedef struct _nm_cluster
-{
- char name[NM_MAX_NAME_LEN+1];
- int state;
- spinlock_t bitmap_lock;
- unsigned long group_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
- unsigned long node_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_cluster;
-
-
-typedef struct _nm_group_inode_private
-{
- struct inode *inode;
- struct list_head net_list;
- struct list_head disk_list;
- cluster_disk disk;
- int state;
- spinlock_t bitmap_lock;
- unsigned long slot_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_group_inode_private;
-
/* TODO: move this */
struct sock;
typedef struct _net_inode_private
@@ -102,24 +64,28 @@
void (*orig_data_ready)(struct sock *sk, int bytes);
} net_inode_private;
-typedef struct _nm_node_inode_private
-{
- struct inode *inode;
- nm_node_info node;
- struct list_head ip_hash;
- net_inode_private net;
-} nm_node_inode_private;
+/* XXX this is now different from the userspace stuff, but that's ok,
+ * because they're not used by an ioctl interface anymore. userspace
+ * should move to something more natural. */
+struct nm_node {
+ struct kobject nd_kobj;
+ char nd_name[NM_MAX_NAME_LEN+1]; /* replace? */
+ __u8 nd_num;
+ /* only one address per node, as attributes, for now. both
+ * in network order */
+ __u32 nd_ipv4_address;
+ __u16 nd_ipv4_port;
+ /* we're making simple assertions that a node can only have one network
+ * identity and report at one place in a heartbeat */
+ net_inode_private nd_net_inode_private;
+};
/* callback stuff */
enum {
NM_NODE_ADD_CB = 0,
NM_NODE_DEL_CB,
- NM_GROUP_ADD_CB,
- NM_GROUP_DEL_CB,
- NM_GROUP_NODE_ADD_CB,
- NM_GROUP_NODE_DEL_CB,
NM_NUM_CB
};
@@ -129,67 +95,15 @@
{
struct list_head list;
nm_cb_func *func;
- //void (*func)(void *, void *, u8);
} nm_callback_func;
+u8 nm_this_node(void);
+int nm_register_callback(unsigned int type, void (*func)(void *, void *, u8));
+int nm_unregister_callback(unsigned int type, void (*func)(void *, void *, u8));
+struct nm_node * nm_get_node_by_num(u8 node_num);
+struct nm_node * nm_get_node_by_ip(u32 addr);
+void nm_node_put(struct nm_node *node);
-u8 nm_this_node(struct inode *group);
-nm_cluster * nm_get_cluster(void);
-int nm_register_callback(int type, void (*func)(void *, void *, u8));
-int nm_unregister_callback(int type, void (*func)(void *, void *, u8));
-int nm_get_group_num_nodes(struct inode *group);
-int nm_make_group_ready(struct inode *group);
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx);
-int nm_remove_node_from_group(struct inode *group, struct inode *node);
-int nm_create_group(char *buf, nm_op *data);
-int nm_add_node_to_group(char *buf, nm_op *data);
-
-extern char *nm_nodename;
-
-
-struct inode * nm_get_group_by_num(u8 group_num);
-struct inode * nm_get_node_by_num(u8 node_num);
-struct inode * __nm_get_node_by_name(const char *node_name, int dir);
-struct inode * nm_get_node_by_ip(u32 addr);
-struct inode * nm_get_group_node_by_index(struct inode *group, u8 index);
-
-static inline struct inode * nm_get_node_by_name(char *node_name)
-{
- return __nm_get_node_by_name(node_name, 0);
-}
-static inline struct inode * nm_get_group_by_name(const char *group_name)
-{
- return __nm_get_node_by_name(group_name, 1);
-}
-
-
-static inline int nm_get_node_global_index(struct inode *node)
-{
- return (node->i_ino - NM_NODE_INODE_START);
-}
-static inline int nm_get_group_global_index(struct inode *group)
-{
- return (group->i_ino - NM_GROUP_INODE_START);
-}
-
-static inline int nm_valid_ino(int ino)
-{
-#if 0
- // these should never be referred to in kernel
- if (ino >= NM_Cluster && ino <= NM_Group)
- return 1;
-#endif
- if (ino >= NM_NODE_INODE_START &&
- ino < NM_NODE_INODE_START + NM_MAX_NODES)
- return 1;
- if (ino >= NM_GROUP_INODE_START &&
- ino < NM_GROUP_INODE_START + NM_MAX_NODES)
- return 1;
- return 0;
-}
-
-
-
#endif /* CLUSTER_NODEMANAGER_H */
Modified: branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -43,29 +43,6 @@
#define NM_GROUP_INODE_START 200000
#define NM_NODE_INODE_START 100000
-
-
-typedef struct _nm_network_iface
-{
- __u16 ip_port; /* for simplicity, just define exactly one port for this if */
- __u16 ip_version;
- union {
- __u32 ip_addr4; /* IPv4 address in NBO */
- __u32 ip_addr6[4]; /* IPv6 address in NBO */
- } addr_u;
-} nm_network_iface;
-
-typedef struct _nm_node_info
-{
- __u8 node_num;
- __u8 pad1;
- __u16 pad2;
- __u32 pad3;
- char node_name[NM_MAX_NAME_LEN+1];
- char pad4[63];
- nm_network_iface ifaces[NM_MAX_IFACES];
-} nm_node_info;
-
/* transaction file nm_op stuff */
#define NM_OP_MAGIC 0xbeaf
@@ -101,7 +78,7 @@
union {
__u8 index;
char name[NM_MAX_NAME_LEN+1];
- nm_node_info node;
+// nm_node_info node;
nm_group_change gc;
} arg_u;
} nm_op;
Modified: branches/usysfsify/fs/ocfs2/cluster/tcp.c
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/tcp.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/tcp.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -175,7 +175,6 @@
/////////////////////
static void net_shutdown(void);
static int net_startup(void);
-static int __init net_driver_entry (void);
static int net_init_driver(void);
static void __exit net_driver_exit (void);
static int net_add_handler(net_msg_handler *nmh);
@@ -187,8 +186,7 @@
static int net_process_message(struct socket *sock, net_msg *hdr);
static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
-static int net_sock_addref_or_connect(struct inode *inode,
- struct socket **sock_ret);
+static int net_sock_addref_or_connect(u8 node_num, struct socket **sock_ret);
static void net_sock_decref(struct inode *inode, int error);
//////////////////////
@@ -218,12 +216,6 @@
u32 net_driver_state = NET_DRIVER_UNINITED;
u32 net_num_dispatched = 0;
-
-/*
- * net_driver_entry()
- *
- * Driver entry point. Called on insmod.
- */
static int __init net_driver_entry (void)
{
struct proc_dir_entry *de;
@@ -613,7 +605,7 @@
}
int net_send_message_iov(u32 msg_type, u32 key, struct iovec *caller_iov,
- size_t caller_iovlen, struct inode *inode,
+ size_t caller_iovlen, u8 target_node,
int *status)
{
int ret;
@@ -627,11 +619,6 @@
BUG_ON(current == net_recv_task);
- if (!inode || !inode->u.generic_ip) {
- netprintk0("bad inode, cannot send message\n");
- ret = -EINVAL;
- goto out;
- }
if (caller_iovlen == 0) {
netprintk0("bad iovec array length\n");
ret = -EINVAL;
@@ -647,7 +634,7 @@
goto out;
}
- ret = net_sock_addref_or_connect(inode, &sock);
+ ret = net_sock_addref_or_connect(target_node, &sock);
if (ret)
goto out;
@@ -754,13 +741,14 @@
* - status will not be set on return code != 0
*/
int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
- struct inode *inode, int *status)
+ u8 target_node, int *status)
{
struct iovec iov = {
.iov_base = data,
.iov_len = len,
};
- return net_send_message_iov(msg_type, key, &iov, 1, inode, status);
+ return net_send_message_iov(msg_type, key, &iov, 1,
+ target_node, status);
}
EXPORT_SYMBOL(net_send_message);
@@ -1436,24 +1424,23 @@
return empty;
}
-static int net_sock_addref_or_connect(struct inode *inode,
- struct socket **sock_ret)
+static int net_sock_addref_or_connect(u8 target_node, struct socket **sock_ret)
{
nm_node_inode_private *priv;
- nm_node_info *node;
+ struct nm_node *node = NULL;
net_inode_private *net = NULL;
struct socket *sock = NULL;
int ret = 0, wait = 0, set_pending = 0;
struct waiting_for_sock wfs;
- priv = inode->u.generic_ip;
- if (!priv) {
- netprintk("bad inode %p\n", inode);
+ /* XXX think about passing refs around.. */
+ node = get_node_by_num(target_node);
+ if (node == NULL) {
+ netprintk("node %u unknown\n", target_node);
ret = -EINVAL;
goto out;
}
- net = &priv->net;
- node = &priv->node;
+ net = &node->nd_net_inode_private;
spin_lock_bh(&net->sock_lock);
if (net->sock && !net->sock_pending) {
@@ -1513,6 +1500,8 @@
}
if (sock)
*sock_ret = sock;
+ if (node)
+ nm_node_put(node);
BUG_ON(ret == 0 && sock == NULL);
netprintk("addref for net %p gave %d\n", net, ret);
@@ -1624,7 +1613,3 @@
BUG_ON(sock == NULL);
return sock;
}
-
-MODULE_LICENSE("GPL");
-module_init (net_driver_entry);
-module_exit (net_driver_exit);
Modified: branches/usysfsify/fs/ocfs2/cluster/tcp.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/tcp.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/tcp.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -213,7 +213,8 @@
int net_register_handler(u32 msg_type, u32 key, int flags,
u32 max_len, net_msg_handler_func *func, void *data);
int net_init_tcp_sock(struct inode *inode);
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
+ u8 target_node, int *status);
int net_send_message_iov(u32 msg_type, u32 key, struct iovec *iov,
size_t iovlen, struct inode *inode, int *status);
int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmod.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmod.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmod.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -1285,17 +1285,11 @@
}
static dlm_ctxt *dlm_alloc_ctxt(const char *domain,
- struct inode *group,
u32 key)
{
int i;
dlm_ctxt *dlm = NULL;
- /* if for some reason we can't get a reference on the group
- * inode (required) then don't even try the rest. */
- if (!igrab(group))
- goto leave;
-
dlm = kmalloc(sizeof(dlm_ctxt), GFP_KERNEL);
if (!dlm) {
dlmprintk0("could not allocate dlm_ctxt\n");
@@ -1341,10 +1335,6 @@
INIT_LIST_HEAD(&dlm->mle_hb_events);
init_rwsem(&dlm->recovery_sem);
- /* this eats the reference we got above. */
- dlm->group = group;
- dlm->group_index = nm_this_node(group);
-
dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
dlm->reco.new_master = NM_INVALID_SLOT_NUM;
@@ -1369,26 +1359,17 @@
* dlm_register_domain: one-time setup per "domain"
*/
dlm_ctxt * dlm_register_domain(const char *domain,
- const char *group_name,
u32 key)
{
int ret;
dlm_ctxt *dlm = NULL;
dlm_ctxt *new_ctxt = NULL;
- struct inode *group = NULL;
if (strlen(domain) > NM_MAX_NAME_LEN) {
dlmprintk0("domain name length too long\n");
goto leave;
}
- group = nm_get_group_by_name(group_name);
- if (!group) {
- dlmprintk("no nm group %s for domain %s!\n",
- group_name, domain);
- goto leave;
- }
-
dlmprintk("register called for domain \"%s\"\n", domain);
retry:
@@ -1421,7 +1402,7 @@
if (!new_ctxt) {
spin_unlock(&dlm_domain_lock);
- new_ctxt = dlm_alloc_ctxt(domain, group, key);
+ new_ctxt = dlm_alloc_ctxt(domain, key);
if (new_ctxt)
goto retry;
goto leave;
@@ -1446,9 +1427,6 @@
if (new_ctxt)
dlm_free_ctxt_mem(new_ctxt);
- if (group)
- iput(group);
-
return dlm;
}
EXPORT_SYMBOL(dlm_register_domain);
Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmod.h
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmod.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmod.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -212,9 +212,7 @@
spinlock_t spinlock;
struct rw_semaphore recovery_sem;
char *name;
- struct inode *group;
u32 key;
- u8 group_index;
u8 joining_node;
unsigned long live_nodes_map[BITS_TO_LONGS(NM_MAX_NODES)];
unsigned long domain_map[BITS_TO_LONGS(NM_MAX_NODES)];
@@ -691,7 +689,6 @@
}
dlm_ctxt * dlm_register_domain(const char *domain,
- const char *group_name,
u32 key);
void dlm_unregister_domain(dlm_ctxt *dlm);
void dlm_get(dlm_ctxt *dlm);
@@ -755,8 +752,8 @@
int dlm_nm_init(dlm_ctxt *dlm);
int dlm_heartbeat_init(dlm_ctxt *dlm);
-void dlm_hb_node_down_cb(struct inode *group, struct inode *node, int idx, void *data);
-void dlm_hb_node_up_cb(struct inode *group, struct inode *node, int idx, void *data);
+void dlm_hb_node_down_cb(struct nm_node *node, int idx, void *data);
+void dlm_hb_node_up_cb(struct nm_node *node, int idx, void *data);
int dlm_hb_node_dead(dlm_ctxt *dlm, int node);
int __dlm_hb_node_dead(dlm_ctxt *dlm, int node);
@@ -778,9 +775,9 @@
void dlm_init_lock(dlm_lock *newlock, int type, u8 node, u64 cookie);
void dlm_mle_node_down(dlm_ctxt *dlm, dlm_master_list_entry *mle,
- struct inode *group, struct inode *node, int idx);
+ struct nm_node *node, int idx);
void dlm_mle_node_up(dlm_ctxt *dlm, dlm_master_list_entry *mle,
- struct inode *group, struct inode *node, int idx);
+ struct nm_node *node, int idx);
int dlm_do_assert_master(dlm_ctxt *dlm, const char *lockname,
unsigned int namelen, void *nodemap);
Modified: branches/usysfsify/fs/ocfs2/dlmglue.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlmglue.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/dlmglue.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -1617,10 +1617,10 @@
/* used by the dlm code to make message headers unique, each
* node in this domain must agree on this. */
- dlm_key = crc32(0, osb->group_name, strlen(osb->group_name));
+ dlm_key = crc32(0, osb->uuid, strlen(osb->uuid));
- /* for now, group_name == domain */
- dlm = dlm_register_domain(osb->group_name, osb->group_name, dlm_key);
+ /* for now, uuid == domain */
+ dlm = dlm_register_domain(osb->uuid, dlm_key);
if (!dlm) {
/* This is a best guess on return value... */
status = -ENOMEM;
Modified: branches/usysfsify/fs/ocfs2/heartbeat.c
===================================================================
--- branches/usysfsify/fs/ocfs2/heartbeat.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/heartbeat.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -34,6 +34,7 @@
#include <cluster/util.h>
#include <cluster/clcommon.h>
#include <cluster/heartbeat.h>
+#include <cluster/nodemanager.h>
#include <dlm/dlmcommon.h>
#include "ocfs_log.h"
@@ -53,6 +54,7 @@
#define OCFS2_HB_NODE_DOWN_PRI (0x0000002)
#define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI
+#if 0
static void ocfs2_hb_node_down_cb(struct inode *group,
struct inode *node,
int node_num,
@@ -61,6 +63,7 @@
struct inode *node,
int node_num,
void *data);
+#endif
static inline void __ocfs_node_map_set_bit(ocfs_node_map *map,
int bit);
@@ -79,16 +82,12 @@
ocfs_node_map_init(&osb->umount_map);
}
-static void ocfs2_hb_node_down_cb(struct inode *group,
- struct inode *node,
+static void ocfs2_hb_node_down_cb(struct nm_node *node,
int node_num,
void *data)
{
ocfs_super *osb = data;
- if (osb->group_inode != group)
- return;
-
OCFS_ASSERT(osb->node_num != node_num);
printk("ocfs2: node down event for %d\n", node_num);
@@ -104,16 +103,12 @@
ocfs_recovery_thread(osb, node_num);
}
-static void ocfs2_hb_node_up_cb(struct inode *group,
- struct inode *node,
+static void ocfs2_hb_node_up_cb(struct nm_node *node,
int node_num,
void *data)
{
ocfs_super *osb = data;
- if (osb->group_inode != group)
- return;
-
OCFS_ASSERT(osb->node_num != node_num);
printk("ocfs2: node up event for %d\n", node_num);
Modified: branches/usysfsify/fs/ocfs2/ocfs.h
===================================================================
--- branches/usysfsify/fs/ocfs2/ocfs.h 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/ocfs.h 2005-03-15 23:35:32 UTC (rev 1979)
@@ -373,8 +373,6 @@
ocfs_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
- char *group_name;
- struct inode *group_inode;
dlm_ctxt *dlm;
ocfs2_lock_res super_lockres;
Modified: branches/usysfsify/fs/ocfs2/super.c
===================================================================
--- branches/usysfsify/fs/ocfs2/super.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/super.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -129,12 +129,12 @@
#endif /* Linux 2.4 stuff */
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name);
+static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id);
static int __init ocfs_driver_entry (void);
static void __exit ocfs_driver_exit (void);
static void ocfs_put_super (struct super_block *sb);
static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
- char **group_name, struct inode *root);
+ struct inode *root);
static void ocfs_dismount_volume(struct super_block *sb);
static int ocfs_initialize_mem_lists (void);
static void ocfs_free_mem_lists (void);
@@ -151,7 +151,7 @@
static int ocfs_init_global_system_inodes(ocfs_super *osb);
static int ocfs_init_local_system_inodes(ocfs_super *osb);
static int ocfs_release_system_inodes(ocfs_super *osb);
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name);
+static int ocfs2_fill_local_node_info(ocfs_super *osb);
static int ocfs2_complete_mount_recovery(ocfs_super *osb);
static int ocfs_check_volume(ocfs_super * osb);
static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
@@ -316,12 +316,11 @@
u32 uid = current->fsuid;
u32 gid = current->fsgid;
int reclaim_id;
- char *group_name = NULL;
ocfs_super *osb = NULL;
LOG_ENTRY_ARGS ("%p, %p, %i", sb, data, silent);
- if (ocfs_parse_options (data, &uid, &gid, &reclaim_id, &group_name) != 0) {
+ if (ocfs_parse_options (data, &uid, &gid, &reclaim_id) != 0) {
status = -EINVAL;
LOG_ERROR_STR ("ocfs_read_super: bad mount option");
goto read_super_error;
@@ -334,7 +333,7 @@
/* this is needed to support O_LARGE_FILE */
sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE;
- status = ocfs_mount_volume (sb, reclaim_id, &group_name, NULL);
+ status = ocfs_mount_volume (sb, reclaim_id, NULL);
if (status < 0)
goto read_super_error;
@@ -376,9 +375,6 @@
LOG_EXIT_STATUS(status);
}
- if (group_name)
- kfree(group_name);
-
LOG_EXIT_STATUS(status);
return status;
@@ -391,9 +387,6 @@
if (inode)
iput (inode);
- if (group_name)
- kfree(group_name);
-
LOG_EXIT_STATUS(status);
return status;
}
@@ -430,12 +423,11 @@
*
* e.g., gid=9999,uid=9999,[no]cache,reclaimid
*/
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name)
+static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id)
{
char *c;
char *value;
int ret = 1;
- int size;
LOG_ENTRY ();
@@ -476,24 +468,6 @@
}
} else if (!strcmp (c, "reclaimid")) {
*reclaim_id = 1;
- } else if (!strcmp(c, "group")) {
- if (!value || !*value) {
- LOG_ERROR_STR
- ("group option requires an argument");
- goto bail;
- }
- LOG_TRACE_ARGS("group name passed = %s\n", value);
-
- size = strlen(value) + 1;
- *group_name = kmalloc(size, GFP_KERNEL);
- if (!(*group_name)) {
- LOG_ERROR_STATUS(-ENOMEM);
- goto bail;
- }
- memset(*group_name, 0, size);
- printk("ocfs2: group name passed = %s, size = %d\n",
- value, size);
- strcpy(*group_name, value);
} else {
LOG_ERROR_ARGS ("Invalid mount option: %s", c);
goto bail;
@@ -844,44 +818,24 @@
return 0;
}
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name)
+/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
+static int ocfs2_fill_local_node_info(ocfs_super *osb)
{
- int status, i;
- struct inode *group = NULL;
- char *p;
+ int status;
- if (group_name) {
- osb->group_name = *group_name;
- *group_name = NULL;
- } else {
- osb->group_name = kmalloc(NM_MAX_NAME_LEN + 1, GFP_KERNEL);
- if (!osb->group_name) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- memset(osb->group_name, 0, NM_MAX_NAME_LEN + 1);
- for (i = 0, p = osb->uuid; i < MAX_VOL_ID_LENGTH; i++, p += 2)
- sprintf(p, "%02X", osb->uuid[i]);
- }
-
- group = nm_get_group_by_name(osb->group_name);
- if (!group) {
- printk("ocfs2: could not join group \"%s\"\n",
- osb->group_name);
- status = -EINVAL;
+ /* XXX hold a ref on the node while mounte? easy enough, if
+ * desirable. */
+ osb->node_num = nm_this_node();
+ if (osb->node_num == NM_MAX_NODES) {
+ printk("ocfs2: could not find this host's node number\n");
+ status = -ENOENT;
goto bail;
}
- osb->group_inode = group;
- osb->node_num = nm_this_node(group);
+ printk("ocfs2: I am node %d\n", osb->node_num);
- printk("ocfs2: I am node %d, a member of group %s\n", osb->node_num,
- osb->group_name);
-
status = 0;
bail:
-
return status;
}
@@ -890,7 +844,7 @@
*
*/
static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
- char **group_name, struct inode *root)
+ struct inode *root)
{
int status, sector_size;
int unlock_super = 0;
@@ -930,7 +884,7 @@
goto leave;
}
- status = ocfs2_fill_local_node_info(osb, group_name);
+ status = ocfs2_fill_local_node_info(osb);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -1500,17 +1454,12 @@
if (osb->slot_info)
ocfs2_free_slot_info(osb->slot_info);
- if (osb->group_inode)
- iput(osb->group_inode);
-
/* FIXME
* This belongs in journal shutdown, but because we have to
* allocate osb->journal at the start of ocfs_initalize_osb(),
* we free it here.
*/
kfree(osb->journal);
- if (osb->group_name)
- kfree(osb->group_name);
if (osb->local_alloc_copy)
kfree(osb->local_alloc_copy);
memset (osb, 0, sizeof (ocfs_super));
Modified: branches/usysfsify/fs/ocfs2/vote.c
===================================================================
--- branches/usysfsify/fs/ocfs2/vote.c 2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/vote.c 2005-03-15 23:35:32 UTC (rev 1979)
@@ -223,7 +223,6 @@
u64 blkno;
enum ocfs2_vote_request request;
struct inode *inode = NULL;
- struct inode *remote_node;
ocfs2_msg_hdr *hdr = &msg->v_hdr;
ocfs2_response_msg response;
@@ -304,21 +303,15 @@
response.r_hdr.h_node_num = htonl(osb->node_num);
response.r_response = htonl(vote_response);
- remote_node = nm_get_node_by_num(node_num);
- if (!remote_node) {
- LOG_ERROR_ARGS("Couldn't get inode for node %u!\n", node_num);
- } else {
- net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
- osb->net_key,
- &response,
- sizeof(ocfs2_response_msg),
- remote_node,
- NULL);
- if (net_status < 0)
- LOG_ERROR_ARGS("message to node %u fails with error "
- "%d!\n", node_num, net_status);
- iput(remote_node);
- }
+ net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
+ osb->net_key,
+ &response,
+ sizeof(ocfs2_response_msg),
+ node_num,
+ NULL);
+ if (net_status < 0)
+ LOG_ERROR_ARGS("message to node %u fails with error "
+ "%d!\n", node_num, net_status);
if (inode)
iput(inode);
@@ -510,7 +503,6 @@
{
int status, i, remote_err;
ocfs2_net_wait_ctxt *w = NULL;
- struct inode *remote_node;
int dequeued = 0;
LOG_ENTRY();
@@ -534,21 +526,13 @@
i);
ocfs_node_map_set_bit(osb, &w->n_node_map, i);
- remote_node = nm_get_node_by_num(i);
- if (!remote_node) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
remote_err = 0;
status = net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
osb->net_key,
request,
sizeof(*request),
- remote_node,
+ i,
&remote_err);
- iput(remote_node);
if (status == -ETIMEDOUT) {
LOG_TRACE_ARGS("remote node %d timed out!\n",
i);
More information about the Ocfs2-commits
mailing list