[Ocfs2-commits] zab commits r1979 - in branches/usysfsify/fs/ocfs2: . cluster dlm

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Tue Mar 15 17:35:34 CST 2005


Author: zab
Date: 2005-03-15 17:35:32 -0600 (Tue, 15 Mar 2005)
New Revision: 1979

Modified:
   branches/usysfsify/fs/ocfs2/cluster/Makefile
   branches/usysfsify/fs/ocfs2/cluster/clcommon.h
   branches/usysfsify/fs/ocfs2/cluster/heartbeat.c
   branches/usysfsify/fs/ocfs2/cluster/heartbeat.h
   branches/usysfsify/fs/ocfs2/cluster/nodemanager.c
   branches/usysfsify/fs/ocfs2/cluster/nodemanager.h
   branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h
   branches/usysfsify/fs/ocfs2/cluster/tcp.c
   branches/usysfsify/fs/ocfs2/cluster/tcp.h
   branches/usysfsify/fs/ocfs2/dlm/dlmmod.c
   branches/usysfsify/fs/ocfs2/dlm/dlmmod.h
   branches/usysfsify/fs/ocfs2/dlmglue.c
   branches/usysfsify/fs/ocfs2/heartbeat.c
   branches/usysfsify/fs/ocfs2/ocfs.h
   branches/usysfsify/fs/ocfs2/super.c
   branches/usysfsify/fs/ocfs2/vote.c
Log:
o this is the current state of the work-in-progress to use usysfs for
  managing state in ocfs2{,/cluster}.  This doesn't even build and there
  is quite a bit left to transition.  This is just so joel and I can keep
  in sync-ish.


Modified: branches/usysfsify/fs/ocfs2/cluster/Makefile
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/Makefile	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/Makefile	2005-03-15 23:35:32 UTC (rev 1979)
@@ -16,6 +16,11 @@
 EXTRA_CFLAGS += -DMISSING_SOCK_CREATE_LITE
 endif
 
+# this is going to get exciting if usysfs is merged
+EXTRA_CFLAGS += -I$(OUR_TOPDIR)/fs/usysfs/
+# XXX should be folded into the kapi stuff
+EXTRA_CFLAGS += -I$(OUR_TOPDIR)/fs/usysfs/compatinclude
+
 ifeq ($(KERNELRELEASE),)
 ifeq ($(KERNEL_26),)
 COMPAT_LIBFS := compat_libfs.o
@@ -24,15 +29,10 @@
 
 INSTALL_MOD_DIR := fs/ocfs2
 
-obj-m := ocfs2_heartbeat.o ocfs2_nodemanager.o ocfs2_tcp.o
+obj-m := ocfs2_nodemanager.o
 
-ocfs2_nodemanager-objs := nodemanager.o util.o transaction_file.o $(COMPAT_LIBFS)
+ocfs2_nodemanager-objs := nodemanager.o heartbeat.o tcp.o $(COMPAT_LIBFS)
 
-ocfs2_heartbeat-objs := heartbeat.o util.o transaction_file.o $(COMPAT_LIBFS)
-
-ocfs2_tcp-objs := gsd.o tcp.o util.o $(COMPAT_LIBFS)
-
-
 ifeq ($(KERNELRELEASE),)
 #
 # Called from a regular "make".
@@ -44,7 +44,6 @@
 	heartbeat.c		\
 	nodemanager.c		\
 	tcp.c			\
-	transaction_file.c	\
 	util.c
 
 HEADERS = 			\
@@ -58,7 +57,6 @@
 	ocfs2_nodemanager.h	\
 	ocfs2_tcp.h		\
 	tcp.h			\
-	transaction_file.h	\
 	util.h
 
 DIST_FILES = $(SOURCES) $(HEADERS)

Modified: branches/usysfsify/fs/ocfs2/cluster/clcommon.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/clcommon.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/clcommon.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -28,7 +28,6 @@
 #ifndef CLUSTER_CLCOMMON_H
 #define CLUSTER_CLCOMMON_H
 
-typedef struct _nm_ctxt nm_ctxt;
 typedef struct _heartbeat_ctxt heartbeat_ctxt;
 
 #define CLUSTER_DISK_UUID_LEN      32      // 16 byte binary == 32 char hex string
@@ -38,7 +37,6 @@
 	// uuid of disk
 	char uuid[CLUSTER_DISK_UUID_LEN+1];
 	// all the rest are for heartbeat
-	dev_t dev;
 	u32 blocksize_bits;
 	u32 num_blocks;
 	u64 start_block;

Modified: branches/usysfsify/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/heartbeat.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/heartbeat.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -81,17 +81,14 @@
 
 
 static void hb_teardown(void);
-static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u8 idx);
-static void hb_nm_group_node_del_cb(void *ptr1, void *ptr2, u8 idx);
 static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u8 idx);
 static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u8 idx);
 static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits, 
 				 u32 blocks, u64 start);
 static ssize_t write_disk(struct file *file, char *buf, size_t size);
-static void hb_do_callbacks(int type, void *ptr1, void *ptr2, int idx);
+static void hb_do_callbacks(int type, struct nm_group *group,
+			    struct nm_node *node, int idx);
 static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate);
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx);
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx);
 static int hb_do_disk_heartbeat(void *page);
 static int hb_thread(void *data);
 static void hb_complete_thread(void);
@@ -119,8 +116,27 @@
 #define hbprintk0(x)           
 #endif
 
+#define HB_THREAD_MS                  2000   // every 2 seconds
 
+struct hb_disk_slot
+{
+	struct inode *inode;
+	struct buffer_head *bh;
+	struct list_head list;
+	unsigned long last_time;
+	u16 margin;
+	u16 state;
+};
 
+struct hb_region {
+	struct kobj		hr_kobj;
+	u64			hr_block_bytes;
+	u64			hr_start_block;
+	u64			hr_blocks;
+	dev_t			hr_dev;
+	struct hb_disk_slot	*hr_slots;
+};
+
 static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 {
 	if (uptodate)
@@ -132,24 +148,17 @@
 	unlock_buffer(bh);
 }
 
-
-
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx)
+static int hb_do_node_down(struct nm_node *node, int idx)
 {
-	//int ret;
-	hbprintk("hb_do_node_down: group=%lu, node=%lu\n", group->i_ino, 
-		 node->i_ino);
-	hbprintk("NOT removing node from group\n");
-	//ret = nm_remove_node_from_group(group, node);
-	hb_do_callbacks(HB_NODE_DOWN_CB, group, node, idx);
+	hbprintk("hb_do_node_down:  node=%u\n", node->nd_num);
+	hb_do_callbacks(HB_NODE_DOWN_CB, node, idx);
 	return 0;
 }
 
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx)
+static int hb_do_node_up(struct nm_node *node, int idx)
 {
-	hbprintk("hb_do_node_up: group=%lu, node=%lu\n", group->i_ino, 
-		 node->i_ino);
-	hb_do_callbacks(HB_NODE_UP_CB, group, node, idx);
+	hbprintk("hb_do_node_up: node=%u\n", node->nd_num);
+	hb_do_callbacks(HB_NODE_UP_CB, node, idx);
 	return 0;
 }
 
@@ -227,10 +236,23 @@
 	return bh;
 }
 
+/* 
+ * heartbeats/block_regions/foo/
+ * 				block_bytes
+ * 				start_block
+ * 				num_blocks
+ * 				active_nodes/
+ *
+ * for now require that we have the local node defined
+ *
+ * heartbeat bits need to pin other node bits, I guess
+ */
+
 static int hb_do_disk_heartbeat(void *page)
 {
 	nm_group_inode_private *priv;
-	struct inode *group, *node;
+	struct nm_group *group;
+	struct nm_node *node;
 	struct list_head *iter;
 	struct buffer_head *bh;
 	hb_disk_slot *slot;
@@ -462,6 +484,7 @@
 }
 	
 
+#if 0
 static ssize_t write_disk(struct file *file, char *buf, size_t size)
 {
 	hb_op *data;
@@ -553,67 +576,6 @@
 	return ret;
 }
 
-
-extern struct file_operations transaction_ops;
-
-/*----------------------------------------------------------------------------*/
-/*
- *	populating the filesystem.
- */
-static int hb_fill_super(struct super_block * sb, void * data, int silent)
-{
-	int ret;
-	struct TA_write_ops *ops;
-	static struct tree_descr hb_files[] = {
-		[HB_Disk] = {".disk", &transaction_ops, S_IWUSR},
-		/* last one */ {""}
-	};
-	
-	ops = kmalloc(sizeof(struct TA_write_ops) +
-		      (sizeof(ops->write_op[0])), 
-		      GFP_KERNEL);
-	if (!ops)
-		return -ENOMEM;
-
-	ops->num_ops = HB_WriteOpArraySize;
-	ops->write_op[HB_Disk] = write_disk;
-
-	hbprintk("calling simple_fill_super...\n");
-	ret = simple_fill_super(sb, 0x5551212f, hb_files);
-	if (ret >= 0)
-		TA_GENERIC_SB_MEMBER(sb) = ops;
-	else 
-		kfree(ops);
-	return ret;
-}
-
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *hb_get_sb(struct file_system_type *fs_type,
-				     int flags,
-				     const char *dev_name,
-				     void *data)
-{
-	return get_sb_single(fs_type, flags, data, hb_fill_super);
-}
-
-static struct file_system_type hb_fs_type = {
-	.owner          = THIS_MODULE,
-	.name           = "hb",
-	.get_sb         = hb_get_sb,
-	.kill_sb        = kill_anon_super,
-};
-#else
-static struct super_block *hb_read_super(struct super_block *sb,
-					 void *data,
-					 int silent)
-{
-	hbprintk("welcome to hb_read_super!!!\n");
-	return (hb_fill_super(sb, data, silent) < 0) ? NULL : sb;
-}
-static DECLARE_FSTYPE (hb_fs_type, "hb", hb_read_super, FS_SINGLE|FS_LITTER);
-#endif
-
 /* TODO: make callbacks all return int */
 static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u8 idx)
 {
@@ -715,99 +677,55 @@
 	hbprintk("hb_nm_group_node_del_cb done: %d\n", ret);
 	return;
 }
+#endif
 
-static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u8 idx)
+#if 0
+static void hb_teardown(void)
 {
-	//struct inode *node = ptr1;
+	/* XXX are there thread/cb dependencies? */
+	hb_complete_thread();
+	nm_unregister_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb);
+	nm_unregister_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb);
+	nm_unregister_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
+	nm_unregister_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
 }
 
-static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u8 idx)
-{
-	struct inode *group = ptr1;
-	nm_group_inode_private *priv;
-
-	hbprintk("hb_nm_group_add_cb: group=%lu, idx=%u\n",
-	       group->i_ino, idx);
-	
-	priv = group->u.generic_ip;
-	if (!priv) {
-		hbprintk("eek! bad group inode!\n");
-		return;
-	}
-
-	spin_lock(&hb_lock);
-	list_add_tail(&priv->net_list, &hb_net_groups);
-	if (priv->disk.uuid[0]) {
-		hbprintk("adding priv=%p inode=%p to disk group list\n", 
-			 priv, group);
-		list_add_tail(&priv->disk_list, &hb_disk_groups);
-	}
-	spin_unlock(&hb_lock);
-}
-
 static int __init init_hb(void)
 {
-	int retval=-1, i;
-	hbprintk("loading heartbeat module: nodename is %s\n", nm_nodename);
+	int ret, i;
 
-	if (proc_mkdir("cluster/heartbeat", 0)) {
-		// ???
-	}
-
-	//hb_net_timestamps = __get_free_page(GFP_KERNEL);
-	//if (!hb_net_timestamps)
-	//	goto done;
-
-	for (i=HB_NODE_DOWN_CB; i<HB_NUM_CB; i++) {
+	for (i = HB_NODE_DOWN_CB; i < HB_NUM_CB; i++) {
 		INIT_LIST_HEAD(&hb_callbacks[i].list);
 		init_MUTEX(&hb_callbacks[i].sem);
 	}
 
-	if (nm_register_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb))
+	ret = nm_register_callback(NM_GROUP_NODE_DEL_CB,
+	      hb_nm_group_node_del_cb);
+	if (ret)
 		goto done;
-	if (nm_register_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb))
+	ret = nm_register_callback(NM_GROUP_NODE_ADD_CB,
+				   hb_nm_group_node_add_cb);
+	if (ret)
 		goto done;
-	if (nm_register_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb))
+	ret = nm_register_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
+	if (ret)
 		goto done;
-	if (nm_register_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb))
+	ret = nm_register_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
+	if (ret)
 		goto done;
  
-	if (hb_launch_thread() < 0)
+	ret = hb_launch_thread();
+	if (ret)
 		goto done;
  
-	retval = register_filesystem(&hb_fs_type);
 done:
-	if (retval)
+	if (ret)
 		hb_teardown();
-	return retval;
+	return ret;
 }
+#endif
 
-static void __exit exit_hb(void)
-{
-	hb_complete_thread();
-	hb_teardown();
-	unregister_filesystem(&hb_fs_type);
-	hbprintk("unloading heartbeat module\n");
-}
-
-static void hb_teardown(void)
-{
-	nm_unregister_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb);
-	nm_unregister_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb);
-	nm_unregister_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
-	nm_unregister_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
-	remove_proc_entry("cluster/heartbeat", NULL);
-
-	/* XXX make sure that we're not being called from any more active
-	 * nm callbacks, then teardown hb_callbacks */
-	//if (hb_net_timestamps)
-	//	kfree(hb_net_timestamps);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_hb)
-module_exit(exit_hb)
-
+#if 0
 /*
  * hb_fill_node_bytemap()
  * 255 bytes... each byte set to 0 (not mounted) or 1 (mounted)
@@ -887,7 +805,226 @@
 	return 0;
 }
 EXPORT_SYMBOL(hb_fill_node_map);
+#endif
 
+/* 
+ * heartbeat usysfs bits.  The heartbeat set is a default set under
+ * the cluster set in nodemanager.c.
+ */
+
+static struct hb_region *to_hb_region(struct kobject *kobj)
+{
+	return kobj ? container_of(kobj, struct hb_region, hr_kobj) : NULL;
+}
+
+static void hb_region_release(struct kobject *kobj)
+{
+	struct hr_region *reg = to_hr_region(kobj);
+	printk("releasing reg %p\n", reg);
+}
+
+static ssize_t hb_region_block_bytes_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%d\n", node->nd_block_bytes);
+}
+
+static ssize_t hb_region_block_bytes_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
+{
+	struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+	unsigned long long tmp;
+	char *p = (char *)page;
+
+	tmp = simple_strtoull(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	/* XXX compare blocks against dev later on commit? */
+	reg->hr_block_bytes = tmp;
+
+	return count;
+}
+static ssize_t hb_region_start_block_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%d\n", node->nd_start_block);
+}
+
+static ssize_t hb_region_start_block_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
+{
+	struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+	unsigned long long tmp;
+	char *p = (char *)page;
+
+	tmp = simple_strtoull(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	/* XXX compare blocks against dev later on commit? */
+	reg->hr_start_block = tmp;
+
+	return count;
+}
+static ssize_t hb_region_blocks_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%d\n", node->nd_blocks);
+}
+
+static ssize_t hb_region_blocks_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
+{
+	struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+	unsigned long long tmp;
+	char *p = (char *)page;
+
+	tmp = simple_strtoull(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	/* XXX compare blocks against dev later on commit? */
+	reg->hr_blocks = tmp;
+
+	return count;
+}
+struct hb_region_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct hb_region *, char *);
+	ssize_t (*store)(struct hb_region *, const char *, size_t);
+};
+
+static struct hb_region_attribute hb_region_attr_block_bytes = {
+	.attr	= { .name = "block_bytes", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_block_bytes_read,
+	.store	= hb_region_block_bytes_write,
+};
+static struct hb_region_attribute hb_region_attr_start_block = {
+	.attr	= { .name = "start_block", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_start_block_read,
+	.store	= hb_region_start_block_write,
+};
+static struct hb_region_attribute hb_region_attr_blocks = {
+	.attr	= { .name = "blocks", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_blocks_read,
+	.store	= hb_region_blocks_write,
+};
+
+static struct attribute *hb_region_default_attrs[] = {
+	&hb_region_block_bytes.attr,
+	&hb_region_start_block.attr,
+	&hb_region_blocks.attr,
+//	&hb_region_dev.attr,  /* link to a real device?  hmm. */
+	NULL,
+};
+
+static ssize_t hb_region_attr_show(struct kobject *kobj,
+			     struct attribute *attr,
+			     char *page)
+{
+	struct hb_region *reg = to_hb_region(kobj);
+	struct hb_region_attribute *nm_nodde_attr =
+		container_of(attr, struct hb_region_attribute, attr);
+	ssize_t ret = 0;
+
+	if (hb_region_attr->show)
+		ret = hb_region_attr->show(node, page);
+	return ret;
+}
+
+static ssize_t hb_region_store(struct kobject *kobj,
+			      struct attribute *attr,
+			      const char *page, size_t count)
+{
+	struct hb_region *reg = to_hb_region(kobj);
+	struct hb_region_attribute *hb_region_attr =
+		container_of(attr, struct hb_region_attribute, attr);
+	ssize_t ret = -EINVAL;
+
+	if (hb_region_attr->store)
+		ret = hb_region_attr->store(node, page, count);
+	return ret;
+}
+
+struct sysfs_ops hb_region_sysfs_ops = {
+	.show	= &hb_region_show,
+	.store	= &hb_region_store,
+};
+
+static struct ukobj_type hb_region_type = {
+	.ktype	= {
+		.release	= hb_region_release,
+		.sysfs_ops	= &hb_region_sysfs_ops,
+		.default_attrs	= hb_region_default_addrs,
+	},
+	.owner	= THIS_MODULE,
+};
+
+/* heartbeat set */
+
+};
+
+static struct hb_heartbeat_set *to_hb_heartbeat_set(struct kset *kset)
+{
+	return kset ? container_of(kset, struct hb_heartbeat_set, hs_kset) : NULL;
+}
+
+static struct kobject *hb_heartbeat_set_make_object(struct kset *kset,
+					      const char *name)
+{
+	struct hb_region *reg = NULL;
+	struct hb_cluster *cluster = to_hb_cluster(kset->kobj.parent);
+	struct kobj *ret = NULL;
+
+	printk("trying to make a heartbeat object under cluster %p\n", cluster);
+
+	reg = kcalloc(1, sizeof(struct hb_region), GFP_KERNEL);
+	if (reg == NULL)
+		goto out; /* ENOMEM */
+
+	strcpy(reg->hr_kobj.name, name);
+	reg->kobj.k_name = reg->kobj.name;
+	reg->kobj.ktype = &hb_region_type.ktype;
+	kobject_init(&reg->hr_kobj);
+	ret = &reg->hr_kobj;
+
+out:
+	if (ret == NULL)
+		kfree(reg);
+
+	return ret;
+}
+
+static void hb_heartbeat_set_drop_object(struct kset *kset, struct kobject *kobj)
+{
+	struct hb_heartbeat_set *hs = to_hb_heartbeat_set(kset);
+
+	printk("dropping hb_heartbeat_set %p\n", hs);
+	kobject_put(kobj);
+}
+
+static struct ukobj_type hb_heartbeat_set_type = {
+	.ktype = {
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_object	= hb_heartbeat_set_make_object,
+	.drop_object	= hb_heartbeat_set_drop_object,
+	.owner		= THIS_MODULE,
+};
+
+struct hb_heartbeat_set hb_heartbeat_set = {
+	.hs_kset = {
+		.kobj = {
+			.name = "heartbeat",
+			.ktype = &hb_heartbeat_set_type.ktype,
+		},
+	},
+};
+
+
+/* hb callback registration and issueing */
+
 static struct hb_callback *hbcall_from_type(int type)
 {
 	if (type < HB_NODE_DOWN_CB || type >= HB_NUM_CB)

Modified: branches/usysfsify/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/heartbeat.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/heartbeat.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -36,32 +36,6 @@
 	HB_NODE_STATE_UP
 };
 
-struct _heartbeat_ctxt
-{
-	int dummy;
-};
-
-typedef struct _hb_disk_slot
-{
-	struct inode *inode;
-	struct buffer_head *bh;
-	struct list_head list;
-	unsigned long last_time;
-	u16 margin;
-	u16 state;
-} hb_disk_slot;
-
-
-
-#define HB_THREAD_MS                  2000   // every 2 seconds
-
-
-enum {
-	HB_TYPE_DISK = 0,
-	HB_TYPE_NET
-};
-
-
 /* callback stuff */
 
 enum {
@@ -71,7 +45,8 @@
 	HB_NUM_CB
 };
 
-typedef void (hb_cb_func)(struct inode *, struct inode *, int, void *);
+struct nm_node;
+typedef void (hb_cb_func)(struct nm_node *, int, void *);
 
 struct hb_callback_func {
 	struct list_head	hc_item;
@@ -96,6 +71,11 @@
 #define HB_DISK_MARGIN             30
 #define HB_NET_MARGIN              30
 
+struct hb_heartbeat_set {
+	struct kset hs_kset;
+	/* some stuff? */
+};
+extern struct hb_heartbeat_set hb_heartbeat_set;
 
 void hb_setup_callback(struct hb_callback_func *hc, int type, hb_cb_func *func,
 		      void *data, int priority);

Modified: branches/usysfsify/fs/ocfs2/cluster/nodemanager.c
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/nodemanager.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/nodemanager.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -1,12 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
- * nodemanager.c
+ * Copyright (C) 2004, 2005 Oracle.  All rights reserved.
  *
- * totally lame static node management placeholder
- *
- * Copyright (C) 2004 Oracle.  All rights reserved.
- *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -21,11 +17,8 @@
  * License along with this program; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
  */
 
-
 #include "cl_compat.h"
 #include "util.h"
 #include "clcommon.h"
@@ -64,6 +57,7 @@
 #include <linux/pagemap.h>
 #include <linux/hash.h>
 #include <linux/bitops.h>
+#include <linux/kobject.h>
 
 #include <asm/uaccess.h>
 
@@ -73,116 +67,21 @@
 
 #include "compat_libfs.h"
 #include "transaction_file.h"
+#include "usysfs.h"
 
 #ifndef __user
 #define __user
 #endif
 
-
-/*
- * This nm module is similar to nfsd/nfsctl.c in that it uses
- * transaction files (in /proc/cluster/nm) to communicate with
- * the kernel module instead of ioctls or other means.
- *
- * Files involved:
- *  /proc/cluster/nm/cluster - used to create/destroy cluster, adds 
- *                             nodes/groups to the cluster, queries info
- *                             about the cluster
- *  /proc/cluster/nm/group   - adds/removes nodes from a group, queries
- *                             info about a group
- *  /proc/cluster/nm/node    - changes info for a node, queries info about
- *                             a node
- *
- * This nm implementation basically allows this node to live in exactly one 
- * cluster.  All "clustered" nodes that are known to this node should be
- * added to the cluster, and all nodes should see the same list of nodes in
- * the same order at all times.  The "slot" number given to a node in this 
- * global cluster list is fixed and never changes.  Groups can be dynamically
- * created within a cluster (TODO: currently static only) and be made up of 
- * one or more nodes (listed at most once) in the global list.  A node may exist
- * in many groups.  Also, a group may have an optional disk UUID which is simply
- * stored for later use by the heartbeat service.  (The heartbeat service will
- * do disk heartbeating only for those groups with valid UUIDs.)  
- *
- * USAGE:
- * For our purposes, the nm service can be autoloaded by an fstab entry or 
- * manually through mount (mount -t nm none /proc/cluster/nm).  Once that is 
- * done, an init script (or single executable on an initrd) should be run to 
- * create the static cluster info, possibly from a file like /etc/nm.conf or 
- * similar.  We should probably create a "dlm" or "everyone" group (with NO disk
- * heartbeating) so that the dlm service can be used with the network only.  
- * This group should contain all known nodes.  After this is done, the net, hb 
- * and dlm modules can come up. The nm service is now ready for use, since 
- * groups don't need to be created till later.
- * 
- * A group services daemon can be written (by someone!? ;-) to run at this 
- * point. Since the "dlm" group has everything it needs for full dlmming (since
- * it uses only network), the dlm itself can be used to arbitrate for group 
- * creation, and additions/deletions from groups.  Callbacks should be 
- * registered with nm by other services that care on each of these events.  For
- * instance, heartbeat should register a callback with nm for group creation, 
- * and addition and deletion from a group so that it can make any necessary 
- * changes to its heartbeating (primarily so that it can begin/end disk 
- * heartbeat for any group/node that needs it).
- *   
- * NOTE NOTE NOTE !!!!:
- * This is intended to be a quickie implementation.  (translation: lame)  I do
- * not want to step on anyone's toes who may have implemented something wayyy 
- * better.  If something out there "wins", we will plug into that instead.  If 
- * nothing really takes off, we at least have a (lame) reference to work off of.
- * However, since this implementation exists solely to make ocfs2 work, and one
- * of the major advantages of ocfs version 1 was ease of setup, we don't want 
- * to move to something substantially more complicated than this (one conf 
- * file).
- *
- */ 
-
-
-
-/* globals */
-nm_cluster cluster;
-struct super_block *single_sb;
-char *nm_nodename;
-EXPORT_SYMBOL(nm_nodename);
-static spinlock_t nm_lock = SPIN_LOCK_UNLOCKED;
 static DECLARE_MUTEX(nm_cb_sem);
 struct list_head nm_callbacks[NM_NUM_CB];
 
-
-static void nm_teardown(void);
 static int nm_create_cluster(char *buf);
-static void nm_init_cluster(nm_cluster *cluster);
 int nm_create_node(char *buf, nm_op *data);
 int nm_name_cluster(char *buf, nm_op *data);
-int nm_destroy_cluster(char *buf);
-int nm_get_cluster_num_nodes(char *buf);
-int nm_get_cluster_num_groups(char *buf);
 int nm_get_node_info(char *buf, nm_op *data);
-int nm_get_group_info(char *buf, nm_op *data);
-nm_cluster *nm_get_cluster(void);
-struct inode *nm_get_node_by_name(char *node_name);
 static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx);
 
-/* support for adding files, dirs, hardlinks in /proc/cluster/nm/... */
-extern struct file_operations simple_dir_operations;
-extern struct inode_operations simple_dir_inode_operations;
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request);
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-
-static ssize_t write_node(struct file *file, char *buf, size_t size);
-static ssize_t write_group(struct file *file, char *buf, size_t size);
-static ssize_t write_cluster(struct file *file, char *buf, size_t size);
-
-static u8 nm_get_group_index(struct inode *group, struct inode *inode, struct dentry **child);
-
-#define NM_HASH_BITS     7
-#define NM_HASH_SIZE     (1 << NM_HASH_BITS)
-#define NM_HASH_MASK     (NM_HASH_SIZE - 1)
-
-
-
 #define nmprintk(x, arg...)    printk("(nm:%d) " x, current->pid, ##arg)
 #define nmprintk0(x)           printk("(nm:%d) " x, current->pid)
 #if 0
@@ -190,1171 +89,558 @@
 #define nmprintk0(x)
 #endif
 
-static struct list_head *nm_ip_hash = NULL;
-static spinlock_t nm_ip_hash_lock;
-
-static int nm_init_ip_hash(void);
-static void nm_destroy_ip_hash(void);
-
-
-static void nm_destroy_ip_hash(void)
+static int nm_find_next_slot(spinlock_t *lock, void *bitmap, int max,
+			     int request)
 {
-	int i;
-	if (!nm_ip_hash)
-		return;
-	for (i=0; i<NM_HASH_SIZE; i++) {
-		/* TODO: cleanup */
-	}
-	free_page((unsigned long)nm_ip_hash);
-}
+	int start = 0, slot_num, ret = -1;
 
-static int nm_init_ip_hash(void)
-{
-	int i;
-	
-	if ((PAGE_SIZE / sizeof(struct list_head)) < NM_HASH_SIZE) {
-		nmprintk("eek!  hash size too big for this arch!\n");
-		BUG();
-	}
-
-	nm_ip_hash = (struct list_head *) __get_free_page(GFP_KERNEL);
-	if (!nm_ip_hash)
-		return -ENOMEM;
-	for (i=0; i<NM_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&nm_ip_hash[i]);
-	spin_lock_init(&nm_ip_hash_lock);
-	return 0;
-}
-
-
-
-
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request)
-{
-	int start = 0, slot_num;
 	if (request != NM_INVALID_SLOT_NUM)
 		start = request;
+
+	spin_lock(lock);
+
 	slot_num = find_next_zero_bit (bitmap, max, start);
 	if (slot_num >= max)
-		return -1;
+		goto out;
 	if (request != NM_INVALID_SLOT_NUM && slot_num != request)
-		return -1;
-	set_bit(slot_num, bitmap);
-	return slot_num;
-}
-
-
-
-
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent,
-				   struct tree_descr *file, int ino)
-{
-	struct qstr name;
-	struct dentry *dentry = ERR_PTR(-EINVAL);
-	struct inode *inode;
-
-	if (!file->name)
 		goto out;
-	name.name = file->name;
-	name.len = strlen(name.name);
-	nmprintk("adding file %.*s\n", name.len, name.name);
-	name.hash = full_name_hash(name.name, name.len);
-	dentry = d_alloc(parent, &name);
-	if (!dentry) {
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode = new_inode(s);
-	if (!inode) {
-		dput(dentry);
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode->i_mode = file->mode;
-	inode->i_uid = inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
-	inode->i_blocks = 0;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	if (file->mode & S_IFDIR) {
-		inode->i_op = &simple_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-	} else {
-		inode->i_fop = file->ops;
-	}		    
-	inode->i_ino = ino;
-	insert_inode_hash(inode);
-	d_add(dentry, inode);
 
+	set_bit(slot_num, bitmap);
+	ret = slot_num;
+
 out:
-	return dentry;
+	spin_unlock(lock);
+	return ret;
 }
 
-
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent,
-				   struct tree_descr *file, int ino)
+/* XXX return nm_node, see why people are calling it in the first place.. */
+struct nm_node * nm_get_node_by_num(u8 node_num)
 {
-	struct qstr name;
-	struct dentry *dentry = ERR_PTR(-EINVAL);
-	struct inode *inode;
-
-	if (!file->name)
-		goto out;
-	name.name = file->name;
-	name.len = strlen(name.name);
-	nmprintk("adding link %.*s\n", name.len, name.name);
-	name.hash = full_name_hash(name.name, name.len);
-	dentry = d_alloc(parent, &name);
-	if (!dentry) {
-		nmprintk("failed to d_alloc\n");
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode = iget(s, ino);
-	if (!inode) {
-		nmprintk("failed to iget\n");
-		dput(dentry);
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	if (!inode->u.generic_ip) {
-		nmprintk("bad inode: %d\n", ino);
-		iput(inode);
-		dput(dentry);
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode->i_nlink++;
-	d_add(dentry, inode);
-
-out:
-	return dentry;
+	BUG();
+	return NULL;
 }
+EXPORT_SYMBOL(nm_get_node_by_num);
 
-
-
-
-
-/* cluster, node and group transaction files. 
- * here's where the actual work of nm takes place. */
-
-static int nm_create_cluster(char *buf)
+/* tcp calls this when a node connects so that it can deref the inode
+ * to find the inode_private->net state.. hmm.  it probably just wants
+ * to look it up so that it can verify that it's a node we like and
+ * allocate its own state?  hmm, that's a pickle. */
+struct nm_node * nm_get_node_by_ip(u32 addr)
 {
-	int ret = -EINVAL;
-	
-	nmprintk("create cluster...\n");
-	
-	spin_lock(&nm_lock);
-	if (cluster.state == NM_CLUSTER_UP) {
-		ret = sprintf(buf, "%d: cluster already up\n", -EINVAL);
-	} else {
-		cluster.state = NM_CLUSTER_UP;
-		ret = sprintf(buf, "0: cluster state: UP");
-	}
-	spin_unlock(&nm_lock);
-	return ret;
+	BUG();
+	return NULL;
 }
+EXPORT_SYMBOL(nm_get_node_by_ip);
 
-
-
-int nm_create_group(char *buf, nm_op *data)
+#if 0
+/* this tree is only used by get_group_by_name */
+static struct nm_group * nm_group_tree_lookup(nm_cluster *cluster,
+					      const char *group_name,
+					      struct rb_node ***ret_p,
+					      struct rb_node **ret_parent)
 {
-	struct tree_descr desc;
-	struct dentry *dentry = NULL;
-	struct inode *inode = NULL;
-	int ino, group_num;
-	int ret = -EINVAL;
-	nm_group_inode_private *g = NULL;
+        struct rb_node **p = &cluster->group_name_tree.rb_node;
+        struct rb_node *parent = NULL;
+	struct nm_group *group, *ret = NULL;
+	int cmp;
 
-	nmprintk("create group...\n");
+        while (*p) {
+                parent = *p;
+                group = rb_entry(parent, struct nm_group, gr_name_rb_node);
+		cmp = strcmp(group_name, group->gr_name);
 
-	data->arg_u.gc.name[NM_MAX_NAME_LEN] = '\0';
-	inode = nm_get_group_by_name(data->arg_u.gc.name);
-	if (inode) {
-		ret = sprintf(buf, "%d: group %u (%s) already exists", -EEXIST, 
-			      nm_get_group_global_index(inode), 
-			      data->arg_u.gc.name);
-		iput(inode);
-		return ret;
-	}
+                if (cmp < 0)
+                        p = &(*p)->rb_left;
+                else if (cmp > 0) {
+                        p = &(*p)->rb_right;
+                else {
+			ret = group;
+                        break;
+		}
+        }
+                                                                                
+        if (ret_p != NULL)
+                *ret_p = p;
+        if (ret_parent != NULL)
+                *ret_parent = parent;
 
-	group_num = data->arg_u.gc.group_num;
-	if (group_num > NM_INVALID_SLOT_NUM)
-		goto leave;
+	if (ret)
+		/* get ref */;
 
-	spin_lock(&cluster.bitmap_lock);
-	group_num = nm_find_next_slot(&(cluster.group_bitmap[0]), 255, 
-				      group_num);
-	spin_unlock(&cluster.bitmap_lock);
-
-	if (group_num < 0) {
-		nmprintk("out of group slots!\n");
-		goto leave;
-	}
-
-	ino = group_num + NM_GROUP_INODE_START;
-
-	desc.name = data->arg_u.gc.name;
-	desc.ops = NULL;
-	desc.mode = S_IFDIR | 0755;
-	dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
-	if (IS_ERR(dentry))
-		goto leave;
-	inode = igrab(dentry->d_inode);
-	if (!inode) {
-		nmprintk("igrab failed!\n");
-		goto leave;
-	}
-		
-	g = kmalloc(sizeof(nm_group_inode_private), GFP_KERNEL);
-	if (!g) 
-		goto leave;
-
-	memset(g, 0, sizeof(nm_group_inode_private));
-	memcpy(g->disk.uuid, data->arg_u.gc.disk_uuid, CLUSTER_DISK_UUID_LEN);
-	spin_lock_init(&g->bitmap_lock);
-	if (g->disk.uuid[0])
-		g->state = NM_GROUP_NOT_READY;
-	else
-		g->state = NM_GROUP_READY;
-	g->inode = inode;
-	inode->u.generic_ip = g;
-
-	ret = sprintf(buf, "0: group %u (%s) added, uuid: %s", group_num,
-		      data->arg_u.gc.name, g->disk.uuid);
-	nm_do_callbacks(NM_GROUP_ADD_CB, inode, NULL, group_num);
-
-leave:
-	if (ret < 0) {
-		if (inode) {
-			if (inode->u.generic_ip)
-				kfree(inode->u.generic_ip);
-			iput(inode);
-		}
-		if (dentry)
-			dput(dentry);
-	}
-	return ret;
+        return ret;
 }
-EXPORT_SYMBOL(nm_create_group);
 
-int nm_create_node(char *buf, nm_op *data)
+struct nm_group * nm_get_group_by_name(nm_cluster *cluster,
+				       const char *group_name)
 {
-	struct tree_descr desc;
-	struct dentry *dentry = NULL;
-	struct inode *inode = NULL;
-	int ino, node_num, bucket;
-	int ret = -EINVAL;
-	nm_node_inode_private *n = NULL;
-	struct page *page = NULL;
+	struct nm_group *group;
 
-	nmprintk("add cluster node ...\n");
+	read_lock(&cluster->rbtree_lock);
+	group = nm_group_tree_lookup(cluster, group_name, NULL, NULL);
+	read_unlock(&cluster->rbtree_lock);
 
-	data->arg_u.node.node_name[NM_MAX_NAME_LEN] = '\0';
-	inode = nm_get_node_by_name(data->arg_u.node.node_name);
-	if (inode) {
-		ret = sprintf(buf, "%d: node %u (%s) already exists", -EEXIST, 
-			      nm_get_node_global_index(inode), 
-			      data->arg_u.node.node_name);
-		iput(inode);
-		return ret;
-	}
+	return group;
+}
+EXPORT_SYMBOL(nm_get_group_by_name);
+#endif
 
-	node_num = data->arg_u.node.node_num;
-	if (node_num > NM_INVALID_SLOT_NUM) {
-		nmprintk("bad node_num: %d\n", node_num);
-		goto leave;
-	}
-
-	spin_lock(&cluster.bitmap_lock);
-	node_num = nm_find_next_slot(&(cluster.node_bitmap[0]), 255, node_num);
-	spin_unlock(&cluster.bitmap_lock);
-
-	if (node_num < 0) {
-		nmprintk("out of node slots!\n");
-		goto leave;
-	}
-
-	ino = node_num + NM_NODE_INODE_START;
-
-	desc.name = data->arg_u.node.node_name;
-	desc.ops = NULL;
-	desc.mode = S_IFREG | S_IWUSR;
-	dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
-	if (IS_ERR(dentry)) {
-		nmprintk("bad dentry\n");
-		goto leave;
-	}
-	inode = igrab(dentry->d_inode);
-	if (!inode) {
-		nmprintk("igrab failed!\n");
-		goto leave;
-	}
-		
-	n = kmalloc(sizeof(nm_node_inode_private), GFP_KERNEL);
-	if (!n) {
-		nmprintk("could not kmalloc\n");
-		goto leave;
-	}
-	memcpy(&n->node, &data->arg_u.node, sizeof(nm_node_info));
-	INIT_LIST_HEAD(&n->ip_hash);
-
-	spin_lock_init(&n->net.sock_lock);
-	n->net.sock = NULL;
-	n->net.sock_refs = 0;
-	n->net.sock_pending = 0;
-	n->net.defer_release = 0;
-	INIT_LIST_HEAD(&n->net.pending_waiters);
-	init_waitqueue_head(&n->net.waitq);
-	INIT_LIST_HEAD(&n->net.handlers);
-	INIT_LIST_HEAD(&n->net.active_item);
-	n->net.page = NULL;
-	n->net.page_off = 0;
-
-	page = alloc_page(GFP_KERNEL);
-	if (page == NULL) {
-		nmprintk("page allocation failed\n");
-		goto leave;
-	}
-	n->net.page = page;
-
-	/* hash on first ip address */
-	spin_lock(&nm_ip_hash_lock);
-	bucket = hash_long(n->node.ifaces[0].addr_u.ip_addr4, NM_HASH_BITS);
-	list_add_tail(&n->ip_hash, &nm_ip_hash[bucket]);
-	spin_unlock(&nm_ip_hash_lock);
-	nmprintk("hashed ip %d.%d.%d.%d to bucket %d\n", 
-		 NIPQUAD(n->node.ifaces[0].addr_u.ip_addr4), bucket);
-	n->inode = inode;
-	inode->u.generic_ip = n;
-
-	ret = sprintf(buf, "0: node %u (%s) added", node_num, 
-		      n->node.node_name);
-	nm_do_callbacks(NM_NODE_ADD_CB, inode, NULL, node_num);
-
-leave:
-	if (ret < 0) {
-		if (page)
-			__free_page(page);
-		if (inode) {
-			if (inode->u.generic_ip)
-				kfree(inode->u.generic_ip);
-			iput(inode);
-		}
-		if (dentry)
-			dput(dentry);
-	}
-	return ret;
+u8 nm_this_node(void)
+{
+	/* sigh, in our one cluster one node one ip.. */
+	BUG();
+	return 0;
 }
+EXPORT_SYMBOL(nm_this_node);
 
-int nm_make_group_ready(struct inode *group)
+int nm_register_callback(unsigned int type, void (*func)(void *, void *, u8))
 {
-	nm_group_inode_private *g = group->u.generic_ip;
-	if (!g)
+	nm_callback_func *f;
+
+	if (type >= NM_NUM_CB)
 		return -EINVAL;
-	g->state = NM_GROUP_READY;
+	f = kmalloc(sizeof(nm_callback_func), GFP_KERNEL);
+	if (f == NULL)
+		return -ENOMEM;
+	memset(f, 0, sizeof(nm_callback_func));
+	f->func = func;
+	down(&nm_cb_sem);
+	list_add_tail(&f->list, &nm_callbacks[type]);
+	up(&nm_cb_sem);
 	return 0;
 }
-EXPORT_SYMBOL(nm_make_group_ready);
+EXPORT_SYMBOL(nm_register_callback);
 
-int nm_add_node_to_group(char *buf, nm_op *data)
+int nm_unregister_callback(unsigned int type, void (*func)(void *, void *, u8))
 {
-	struct tree_descr desc;
-	struct inode *inode = NULL;
-	struct dentry *dentry = NULL, *child = NULL;
-	nm_group_inode_private *g = NULL;
-	int group_num, slot_num;
+	struct list_head *iter, *tmpiter;
 	int ret = -EINVAL;
-	u8 ino;
-	char tmpname[6];
+	nm_callback_func *f;
 
-	nmprintk("add node to group...\n");
+	if (type >= NM_NUM_CB)
+		return ret;
 
-	group_num = data->arg_u.gc.group_num;
-	ino = data->arg_u.gc.node_num;
-	slot_num = data->arg_u.gc.slot_num;
-
-	/* request a certain slot, or NM_INVALID_SLOT_NUM for any slot */
-	if (slot_num > NM_INVALID_SLOT_NUM)
-		goto leave;
-	
-	if (ino >= NM_INVALID_SLOT_NUM || group_num >= NM_INVALID_SLOT_NUM)
-		goto leave;
-
-       	inode = nm_get_group_by_num(group_num);
-	if (!inode)
-		goto leave;
-	if (list_empty(&inode->i_dentry))
-		goto leave;
-	dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias));
-	if (!dentry)
-		goto leave;
-	g = inode->u.generic_ip;
-	if (!g)
-		goto leave;
-
-	if (g->state == NM_GROUP_NOT_READY) {
-		ret = sprintf(buf, "%d: group disk has not been discovered.  "
-			      "cannot add nodes.", -EROFS);
-		goto leave;
+	down(&nm_cb_sem);
+	list_for_each_safe(iter, tmpiter, &nm_callbacks[type]) {
+		f = list_entry (iter, nm_callback_func, list);
+		if (f->func == func) {
+			list_del(&f->list);
+			kfree(f);
+			ret = 0;
+			break;
+		}
 	}
-
-	spin_lock(&g->bitmap_lock);
-	slot_num = nm_find_next_slot(&(g->slot_bitmap[0]), 255, slot_num);
-	spin_unlock(&g->bitmap_lock);
-	if (slot_num < 0)
-		goto leave;
-
-	/* create hardlink to ino with name "slot_num" */
-	sprintf(tmpname, "%03u", slot_num);
-	desc.name = &(tmpname[0]);
-	desc.ops = NULL;
-	desc.mode = 0;
-	child = nm_add_link(single_sb, dentry, &desc, 
-			    NM_NODE_INODE_START+ino);
-	if (IS_ERR(child)) {
-		nmprintk("error adding link for %s\n", tmpname);
-		child = NULL;
-		goto leave;
-	}
-
-	ret = sprintf(buf, "0: node %u added to group: %.*s", 
-		      ino, dentry->d_name.len, dentry->d_name.name);
-
-	if (!igrab(child->d_inode))
-		goto leave;
-	nm_do_callbacks(NM_GROUP_NODE_ADD_CB, inode, child->d_inode, slot_num);
-	iput(child->d_inode);
-
-leave:
-	if (dentry)
-		dput(dentry);
-	if (child)
-		dput(child);
-	if (inode)
-		iput(inode);
+	up(&nm_cb_sem);
 	return ret;
 }
-EXPORT_SYMBOL(nm_add_node_to_group);
+EXPORT_SYMBOL(nm_unregister_callback);
 
-int nm_remove_node_from_group(struct inode *group, struct inode *node)
+static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx)
 {
-	struct dentry *child = NULL;
-	nm_group_inode_private *g = NULL;
-	int slot_num;
-	int ret = -EINVAL;
-
-	nmprintk("remove node from group...\n");
-
-	slot_num = nm_get_group_index(group, node, &child);
-
-	if (slot_num == NM_MAX_NODES || !child)
-		goto leave;
-
-	g = group->u.generic_ip;
-	if (!g)
-		goto leave;
-		
-	nmprintk("killing the dentry now!!\n");
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-	down(&group->i_zombie);
-	node->i_nlink--;
-	d_delete(child);
-	up(&group->i_zombie);
-#else
-	down(&group->i_sem);
-	node->i_nlink--;
-	d_delete(child);
-	up(&group->i_sem);
-#endif
-	nmprintk("done killing the dentry!!\n");
-
-
-	if (!igrab(node))
-		goto leave;
-	nm_do_callbacks(NM_GROUP_NODE_DEL_CB, group, node, slot_num);
-	iput(node);
+	struct list_head *iter;
+	nm_callback_func *f;
 	
-	spin_lock(&g->bitmap_lock);
-	clear_bit(slot_num, (void *)(&g->slot_bitmap[0]));
-	spin_unlock(&g->bitmap_lock);
-
-	ret = 0;
-
-leave:
-	if (child)
-		dput(child);
-	return ret;
+	down(&nm_cb_sem);
+	list_for_each(iter, &nm_callbacks[type]) {
+		f = list_entry (iter, nm_callback_func, list);
+		(f->func) (ptr1, ptr2, idx);
+	}
+	up(&nm_cb_sem);
 }
 
+/* node usysfs bits */
 
-
-int nm_name_cluster(char *buf, nm_op *data)
+static struct nm_cluster *to_nm_cluster(struct kobject *kobj)
 {
-	int ret = -EINVAL;
-
-	nmprintk("name cluster...\n");
-	spin_lock(&nm_lock);
-	if (cluster.state == NM_CLUSTER_UP) {
-		ret = sprintf(buf, "%d: cluster name could not be set.  "
-			      "cluster already up.", -EINVAL);
-		goto leave;
-	}
-	memset(cluster.name, 0, NM_MAX_NAME_LEN+1);
-	memcpy(cluster.name, data->arg_u.name, NM_MAX_NAME_LEN);
-	ret = sprintf(buf, "0: cluster name set: %s", cluster.name);
-leave:
-	spin_unlock(&nm_lock);
-	return ret;
+	return kobj ? container_of(kobj, struct nm_cluster, cl_kobj) : NULL;
 }
 
-int nm_destroy_cluster(char *buf)
+static struct nm_node *to_nm_node(struct kobject *kobj)
 {
-	int ret;
-	nmprintk("destroy cluster...\n");
-
-	/* TODO */
-	spin_lock(&nm_lock);
-	nm_init_cluster(&cluster);
-	ret = sprintf(buf, "0: rudely destroyed cluster!!!");
-	spin_unlock(&nm_lock);
-	return ret;
+	return kobj ? container_of(kobj, struct nm_node, nd_kobj) : NULL;
 }
 
-
-int nm_get_cluster_num_nodes(char *buf)
+static void nm_node_release(struct kobject *kobj)
 {
-	int num_nodes=0, i;
-	
-	nmprintk("get cluster num nodes...\n");
-
-	spin_lock(&cluster.bitmap_lock); 
-	for (i=0; i<ARRAY_SIZE(cluster.node_bitmap); i++)
-		num_nodes += hweight_long(cluster.node_bitmap[i]);
-	spin_unlock(&cluster.bitmap_lock);
-
-	return sprintf(buf, "0: %d", num_nodes);
+	struct nm_node *node = to_nm_node(kobj);
+	printk("releasing node %p\n", node);
 }
 
-int nm_get_cluster_num_groups(char *buf)
+static ssize_t nm_node_num_read(struct nm_node *node, char *page)
 {
-	int num_groups=0, i;
-	
-	nmprintk("get cluster num groups...\n");
-
-	spin_lock(&cluster.bitmap_lock);
-	for (i=0; i<ARRAY_SIZE(cluster.group_bitmap); i++)
-		num_groups += hweight_long(cluster.group_bitmap[i]);
-	spin_unlock(&cluster.bitmap_lock);
-
-	return sprintf(buf, "0: %d", num_groups);
+	return sprintf(page, "%d\n", node->nd_num);
 }
 
-int nm_get_group_num_nodes(struct inode *group)
+static ssize_t nm_node_num_write(struct nm_node *node, const char *page,
+				 size_t count)
 {
-	int num_nodes=0, i;
-	nm_group_inode_private *g;
-	
-	nmprintk("get group num nodes...\n");
-	
-	g = group->u.generic_ip;
-	if (!g)
+	struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+	unsigned long tmp;
+	char *p = (char *)page;
+	int node_num;
+
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
 		return -EINVAL;
 
-	spin_lock(&g->bitmap_lock);
-	for (i=0; i<ARRAY_SIZE(g->slot_bitmap); i++)
-		num_nodes += hweight_long(g->slot_bitmap[i]);
-	spin_unlock(&g->bitmap_lock);
+	if (tmp >= NM_MAX_NODES)
+		return -ERANGE;
 
-	return num_nodes;
-}
+	node_num = nm_find_next_slot(&cluster->cl_bitmap_lock,
+				     &(cluster->cl_node_bitmap[0]), 255, tmp);
+	if (node_num < 0)
+		return node_num;
 
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx)
-{
-	nm_group_inode_private *priv;
-	int next;
+	node->nd_num = tmp;
 
-	if (*idx >= 255)
-		return NULL;
-	priv = group->u.generic_ip;
-	if (!priv)
-		return NULL;
-	next = find_next_bit(priv->slot_bitmap, 255, *idx);
-	if (next >= 255)
-		return NULL;
-	*idx = next;
-	return util_rarray_idx_to_slot(&priv->disk.slots, next);
+	return count;
 }
-EXPORT_SYMBOL(nm_iterate_group_disk_slots);
-
-int nm_get_node_info(char *buf, nm_op *data)
+static ssize_t nm_node_ipv4_port_read(struct nm_node *node, char *page)
 {
-	int ret, tmpret, i;
-	nm_node_inode_private *priv;
-	nm_network_iface *n;
-	struct inode *inode = NULL;
-	struct dentry *dentry;
-	u8 node_num;
-	u16 vers;
-
-	ret = -EINVAL;
-	node_num = data->arg_u.index;
-	inode = nm_get_node_by_num(node_num);
-	if (inode) {
-		dentry = list_entry(inode->i_dentry.next, struct dentry, 
-				    d_alias);
-		priv = inode->u.generic_ip;
-		ret = sprintf(buf, "0: global_index=%u\n"
-			           "name=%.*s\n",
-				priv->node.node_num, dentry->d_name.len, 
-				dentry->d_name.name);
-		buf += ret;
-		for (i=0; i<NM_MAX_IFACES; i++) {
-			n = &priv->node.ifaces[i];
-			vers = ntohs(n->ip_version);
-			nmprintk("ip_version=%u, vers=%u\n", 
-				 n->ip_version, vers);
-			if (vers!=4 && vers!=6)
-				continue;
-			/* TODO: how to print ipv6? */
-			tmpret = sprintf(buf, "iface%d.port=%u\n"
-				            "iface%d.version=%d\n"
-					    "iface%d.addr=%d.%d.%d.%d\n",
-				      i, ntohs(n->ip_port), i, vers, i,
-				      NIPQUAD(n->addr_u.ip_addr4));
-			buf += tmpret;
-			ret += tmpret;
-		}
-		iput(inode);
-	}
-	return ret;
+	return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port));
 }
 
-int nm_get_group_info(char *buf, nm_op *data)
+static ssize_t nm_node_ipv4_port_write(struct nm_node *node, const char *page,
+				 size_t count)
 {
-	int ret, tmpret;
-	nm_group_inode_private *g = NULL;
-	struct inode *inode = NULL;
-	u8 group_num;
-	struct dentry *dentry, *child;
+	unsigned long tmp;
+	char *p = (char *)page;
 
-	ret = -EINVAL;
-	group_num = data->arg_u.index;
-	inode = nm_get_group_by_num(group_num);
-	if (inode) {
-		g = inode->u.generic_ip;
-		dentry = list_entry(inode->i_dentry.next, struct dentry, 
-				    d_alias);
-		ret = sprintf(buf, "0: group_num=%u\n"
-		        	   "name=%.*s\n"
-				   "disk_uuid=%s\n",
-			      group_num, dentry->d_name.len, 
-			      dentry->d_name.name, g->disk.uuid);
-		buf += ret;
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
 
-		spin_lock(&dcache_lock);
-		list_for_each_entry(child, &dentry->d_subdirs, d_child) {
-			tmpret = sprintf(buf, "%.*s\n", child->d_name.len, 
-					 child->d_name.name);
-			buf += tmpret;
-			ret += tmpret;
-		}
-		spin_unlock(&dcache_lock);
-		iput(inode);
-	}
-	return ret;
-}
-
-	
-
-static ssize_t write_cluster(struct file *file, char *buf, size_t size)
-{
-	nm_op *data;
-	int ret;
-	u8 me;
-	
-	nmprintk("write_cluster\n");
-
-        if (size < sizeof(*data))
-                return -EINVAL;
-        data = (nm_op *) buf;
-	if (data->magic != NM_OP_MAGIC)
+	if (tmp == 0)
 		return -EINVAL;
+	if (tmp >= (u16)-1)
+		return -ERANGE;
 
-	switch (data->opcode) {
-		case NM_OP_CREATE_CLUSTER:
-			ret = nm_create_cluster(buf);
-			break;
-		case NM_OP_CREATE_GROUP:
-			ret = nm_create_group(buf, data);
-			break;
-		case NM_OP_NAME_CLUSTER:
-			ret = nm_name_cluster(buf, data);
-			break;
-		case NM_OP_DESTROY_CLUSTER:
-			ret = nm_destroy_cluster(buf);
-			break;
-		case NM_OP_ADD_CLUSTER_NODE:
-			ret = nm_create_node(buf, data);
-			break;
-		case NM_OP_GET_CLUSTER_NUM_NODES:
-			ret = nm_get_cluster_num_nodes(buf);
-			break;
-		case NM_OP_GET_GLOBAL_NODE_NUM:
-			ret = 0;
-			me = nm_this_node(NULL);
-			if (me >= NM_MAX_NODES)
-				ret = -EINVAL;
-			ret = sprintf(buf, "%d: %u", ret, me);
-			break;
-		default:
-			ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL, 
-				      data->opcode);
-			break;
-	}
-	nmprintk("leaving!\n");
-	return ret;
+	node->nd_ipv4_port = htons(tmp);
+
+	return count;
 }
 
-static ssize_t write_node(struct file *file, char *buf, size_t size)
+static ssize_t nm_node_ipv4_address_read(struct nm_node *node, char *page)
 {
-	nm_op *data;
-	int ret;
-	
-	nmprintk("write_node\n");
-
-        if (size < sizeof(*data))
-                return -EINVAL;
-        data = (nm_op *) buf;
-	if (data->magic != NM_OP_MAGIC)
-		return -EINVAL;
-
-	switch (data->opcode) {
-		case NM_OP_GET_NODE_INFO:
-			ret = nm_get_node_info(buf, data);
-			break;
-		default:
-			ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL, 
-				      data->opcode);
-			break;
-	}
-	nmprintk("leaving!\n");
-	return ret;
+	return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address));
 }
 
-static ssize_t write_group(struct file *file, char *buf, size_t size)
+/* XXX this is acting as commit until commit really lands.. all this will be
+ * hoisted into the commit method */
+static ssize_t nm_node_ipv4_address_write(struct nm_node *node,
+					  const char *page,
+					  size_t count)
 {
-	nm_op *data;
 	int ret;
-	
-	nmprintk("write_group\n");
+	union {
+		u32 ipv4_addr; /* network order */
+		unsigned char bytes[4];
+	} u;
 
-        if (size < sizeof(*data))
-                return -EINVAL;
-        data = (nm_op *) buf;
-	if (data->magic != NM_OP_MAGIC)
+	ret = sscanf(page, "%c.%c.%c.%c", &u.bytes[0], &u.bytes[1], &u.bytes[2],
+		     &u.bytes[3]);
+	if (ret != 4)
 		return -EINVAL;
 
-	nmprintk("opcode is %u, add_group is %u\n", data->opcode, 
-		 NM_OP_ADD_GROUP_NODE);
-	switch (data->opcode) {
-		case NM_OP_GET_GROUP_INFO:
-			ret = nm_get_group_info(buf, data);
-			break;
+	memcpy(&node->nd_ipv4_address, &u.ipv4_addr, sizeof(u.ipv4_addr));
 
-		case NM_OP_ADD_GROUP_NODE:
-			ret = nm_add_node_to_group(buf, data);
-			break;
+#if 0
+	/* XXX kick off to a thread? */
+	/* XXX bubble inode arg down to callbacks? :/ */
+	nm_do_callbacks(NM_NODE_ADD_CB, node);
+#endif
 
-		default:
-			ret = sprintf(buf, "%d: bad opcode: %u", 
-				      -EINVAL, data->opcode);
-			break;
-	}
-	nmprintk("leaving!\n");
-	return ret;
+	return count;
 }
 
+struct nm_node_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct nm_node *, char *);
+	ssize_t (*store)(struct nm_node *, const char *, size_t);
+};
 
+static struct nm_node_attribute nm_node_attr_num = {
+	.attr	= { .name = "num", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_num_read,
+	.store	= nm_node_num_write,
+};
+static struct nm_node_attribute nm_node_attr_ipv4_port = {
+	.attr	= { .name = "ipv4_port", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_ipv4_port_read,
+	.store	= nm_node_ipv4_port_write,
+};
+static struct nm_node_attribute nm_node_attr_ipv4_address = {
+	.attr	= { .name = "ipv4_address", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_ipv4_address_read,
+	.store	= nm_node_ipv4_address_write,
+};
+static struct attribute *nm_node_default_attrs[] = {
+	&nm_node_attr_num.attr,
+	&nm_node_attr_ipv4_port.attr,
+	&nm_node_attr_ipv4_address.attr,
+	NULL,
+};
 
-struct inode * nm_get_group_by_num(u8 group_num)
+static ssize_t nm_node_show(struct kobject *kobj,
+			    struct attribute *attr,
+			    char *page)
 {
-	struct inode *inode = iget(single_sb, group_num + NM_GROUP_INODE_START);
-	if (!inode)
-		return NULL;
-	if (!inode->u.generic_ip) {
-		iput(inode);
-		return NULL;
-	}
-	return inode;
-}	
-EXPORT_SYMBOL(nm_get_group_by_num);
+	struct nm_node *node = to_nm_node(kobj);
+	struct nm_node_attribute *nm_node_attr =
+		container_of(attr, struct nm_node_attribute, attr);
+	ssize_t ret = 0;
 
-struct inode * nm_get_node_by_num(u8 node_num)
-{
-	struct inode *inode = iget(single_sb, node_num + NM_NODE_INODE_START);
-	if (!inode)
-		return NULL;
-	if (!inode->u.generic_ip) {
-		iput(inode);
-		return NULL;
-	}
-	return inode;
+	if (nm_node_attr->show)
+		ret = nm_node_attr->show(node, page);
+	return ret;
 }
-EXPORT_SYMBOL(nm_get_node_by_num);
 
-/* ipv4 only for now... */
-struct inode * nm_get_node_by_ip(u32 addr)
+static ssize_t nm_node_store(struct kobject *kobj,
+			      struct attribute *attr,
+			      const char *page, size_t count)
 {
-	int bucket;
-	struct list_head *iter;
-	nm_node_inode_private *priv;
-	struct inode *ret = NULL;
-	
-	bucket = hash_long(addr, NM_HASH_BITS);
+	struct nm_node *node = to_nm_node(kobj);
+	struct nm_node_attribute *nm_node_attr =
+		container_of(attr, struct nm_node_attribute, attr);
+	ssize_t ret = -EINVAL;
 
-	spin_lock(&nm_ip_hash_lock);
-	list_for_each(iter, &nm_ip_hash[bucket]) {
-		priv = list_entry(iter, nm_node_inode_private, ip_hash);
-		if (priv->node.ifaces[0].addr_u.ip_addr4 == addr) {
-			ret = igrab(priv->inode);
-			break;
-		}
-		    
-	}
-	spin_unlock(&nm_ip_hash_lock);
+	if (nm_node_attr->store)
+		ret = nm_node_attr->store(node, page, count);
 	return ret;
 }
-EXPORT_SYMBOL(nm_get_node_by_ip);
 
-nm_cluster * nm_get_cluster(void)
-{
-	return &cluster;
-}
+struct sysfs_ops nm_node_sysfs_ops = {
+	.show	= &nm_node_show,
+	.store	= &nm_node_store,
+};
 
-struct inode * nm_get_group_node_by_index(struct inode *group, u8 index)
-{
-	struct dentry *dentry = NULL, *parent;
-	struct inode *inode = NULL;
-	char tmpname[6];
+static struct ukobj_type nm_node_type = {
+	.ktype	= {
+		.release	= nm_node_release,
+		.sysfs_ops	= &nm_node_sysfs_ops,
+		.default_attrs	= nm_node_default_attrs,
+	},
+	.owner	= THIS_MODULE,
+};
 
-	if (list_empty(&group->i_dentry))
-		return NULL;
-	parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
-	if (!parent)
-		return NULL;
-	
-	sprintf(tmpname, "%03u", index);
-	dentry = lookup_one_len(tmpname, parent, strlen(tmpname));
-	if (!IS_ERR(dentry)) {
-		inode = dentry->d_inode;
-		if (inode) {
-			inode = igrab(inode);
-			if (!inode->u.generic_ip || !S_ISREG (inode->i_mode)) {
-				nmprintk("bad inode!\n");
-				iput(inode);
-				inode = NULL;
-			}
-		}
-		if (!inode)
-			dput(dentry);
-	}
-	dput(parent);
-	return inode;
+/* node set */
+
+struct nm_node_set {
+	struct kset ns_kset;
+	/* some stuff? */
+};
+
+static struct nm_node_set *to_nm_node_set(struct kset *kset)
+{
+	return kset ? container_of(kset, struct nm_node_set, ns_kset) : NULL;
 }
-EXPORT_SYMBOL(nm_get_group_node_by_index);
 
-struct inode * __nm_get_node_by_name(const char *node_name, int dir)
+
+static struct kobject *nm_node_set_make_object(struct kset *kset,
+					      const char *name)
 {
-	struct dentry *dentry = NULL;
-	struct inode *inode = NULL;
+	struct nm_node *node = NULL;
+	struct nm_cluster *cluster = to_nm_cluster(kset->kobj.parent);
+	struct kobject *ret = NULL;
+	net_inode_private *nip;
+	struct page *page;
 
-	NM_ASSERT(node_name);
-	NM_ASSERT(single_sb);
-	NM_ASSERT(single_sb->s_root);
+	printk("trying to make a node object under cluster %p\n", cluster);
 
-	dentry = lookup_one_len(node_name, single_sb->s_root, 
-				strlen(node_name));
-	if (!IS_ERR(dentry)) {
-		inode = dentry->d_inode;
-		if (inode) {
-			inode = igrab(inode);
-			if (!inode->u.generic_ip ||
-		    	(dir && !S_ISDIR (inode->i_mode)) ||
-		    	(!dir && !S_ISREG (inode->i_mode))) {
-				nmprintk("bad inode!\n");
-				iput(inode);
-				inode = NULL;
-			}
-		}
-	}
-	return inode;
-}
-EXPORT_SYMBOL(__nm_get_node_by_name);
+	if (strlen(name) > NM_MAX_NAME_LEN)
+		goto out; /* ENAMETOOLONG */
 
+	node = kcalloc(1, sizeof(struct nm_node), GFP_KERNEL);
+	if (node == NULL)
+		goto out; /* ENOMEM */
 
-/* 
- * if group is NULL: return the global index for this node
- * if group is non NULL: return the index within the group of this node
- *
- * NOTE: currently getting the group index is slow
- *       will need to change this somehow
- */
-u8 nm_this_node(struct inode *group)
-{
-	struct inode *inode = NULL;
-	struct dentry *child = NULL;
-	u8 node_num = NM_MAX_NODES;
+	strcpy(node->nd_name, name); /* use kobj.name instead? */
 
-       	inode = nm_get_node_by_name(nm_nodename);
-	if (inode && inode->u.generic_ip) {
-		if (group)
-			node_num = nm_get_group_index(group, inode, &child);
-		else 
-			node_num = nm_get_node_global_index(inode);
+	/* this should be somewhere else */
+	nip = &node->nd_net_inode_private;
+	spin_lock_init(&nip->sock_lock);
+	nip->sock = NULL;
+	nip->sock_refs = 0;
+	nip->sock_pending = 0;
+	nip->defer_release = 0;
+	INIT_LIST_HEAD(&nip->pending_waiters);
+	init_waitqueue_head(&nip->waitq);
+	INIT_LIST_HEAD(&nip->handlers);
+	INIT_LIST_HEAD(&nip->active_item);
+	nip->page = NULL;
+	nip->page_off = 0;
 
+	page = alloc_page(GFP_KERNEL);
+	if (page == NULL) {
+		nmprintk("page allocation failed\n");
+		goto out; /* ENOMEM */
 	}
-	iput(inode);
-	dput(child);
-	//nmprintk("for group=%p, this node is %u\n", group, node_num);
-	return node_num;
-}
-EXPORT_SYMBOL(nm_this_node);
+	nip->page = page;
 
-/* slow */
-static u8 nm_get_group_index(struct inode *group, struct inode *inode, 
-			      struct dentry **child)
-{
-	struct dentry *tmp = NULL, *parent = NULL;
-	u8 slot_num = NM_MAX_NODES;
-	struct list_head *iter;
-	char tmpname[6];
-	char *err;
+	kobject_set_name(&node->nd_kobj, name);
+	node->nd_kobj.ktype = &nm_node_type.ktype;
+	kobject_init(&node->nd_kobj);
 
-	*child = NULL;
-	parent = NULL;
-	if (list_empty(&group->i_dentry))
-		goto leave;
-	parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
-	if (!parent)
-		goto leave;
-		
-	spin_lock(&dcache_lock);
-	list_for_each(iter, &parent->d_subdirs) {
-		tmp = list_entry(iter, struct dentry, d_child);
-		if (tmp->d_inode == inode)
-			break;
-		tmp = NULL;
-	}
-	if (tmp)
-		dget_locked(tmp);
-	spin_unlock(&dcache_lock);
+	ret = &node->nd_kobj;
 
-	if (!tmp || tmp->d_name.len > 3)
-		goto leave;
-	strncpy(tmpname, tmp->d_name.name, tmp->d_name.len);
-	tmpname[tmp->d_name.len] = '\0';
-	err=NULL;
-	slot_num = simple_strtoul(tmpname, &err, 10);
-	
-	if (*err != '\0')
-		slot_num = NM_MAX_NODES;  // error
-	else
-		*child = dget(tmp);  // done, get extra ref for child
-		
-leave:
-	dput(parent);
-	dput(tmp);
+out:
+	if (ret == NULL)
+		kfree(node);
 
-	return slot_num;
+	return ret;
 }
 
-int nm_register_callback(int type, void (*func)(void *, void *, u8))
+static void nm_node_set_drop_object(struct kset *kset, struct kobject *kobj)
 {
-	nm_callback_func *f;
+	struct nm_node_set *ns = to_nm_node_set(kset);
 
-	if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
-		return -EINVAL;
-	f = kmalloc(sizeof(nm_callback_func), GFP_KERNEL);
-	if (f == NULL)
-		return -ENOMEM;
-	memset(f, 0, sizeof(nm_callback_func));
-	f->func = func;
-	down(&nm_cb_sem);
-	list_add_tail(&f->list, &nm_callbacks[type]);
-	up(&nm_cb_sem);
-	return 0;
+	printk("dropping nm_node_set %p\n", ns);
+	kobject_put(kobj);
 }
-EXPORT_SYMBOL(nm_register_callback);
 
-int nm_unregister_callback(int type, void (*func)(void *, void *, u8))
-{
-	struct list_head *iter, *tmpiter;
-	int ret = -EINVAL;
-	nm_callback_func *f;
+static struct ukobj_type nm_node_set_type = {
+	.ktype = {
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_object	= nm_node_set_make_object,
+	.drop_object	= nm_node_set_drop_object,
+	.owner		= THIS_MODULE,
+};
 
-	if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
-		return ret;
+static struct nm_node_set nm_node_set = {
+	.ns_kset = {
+		.kobj = {
+			.name = "cluster",
+			.ktype = &nm_node_set_type.ktype,
+		},
+	},
+};
 
-	down(&nm_cb_sem);
-	list_for_each_safe(iter, tmpiter, &nm_callbacks[type]) {
-		f = list_entry (iter, nm_callback_func, list);
-		if (f->func == func) {
-			list_del(&f->list);
-			kfree(f);
-			ret = 0;
-			break;
-		}
-	}
-	up(&nm_cb_sem);
-	return ret;
-}
-EXPORT_SYMBOL(nm_unregister_callback);
+/* cluster */
 
-static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx)
+
+static void nm_cluster_release(struct kobject *kobj)
 {
-	struct list_head *iter;
-	nm_callback_func *f;
-	
-	down(&nm_cb_sem);
-	list_for_each(iter, &nm_callbacks[type]) {
-		f = list_entry (iter, nm_callback_func, list);
-		(f->func) (ptr1, ptr2, idx);
-	}
-	up(&nm_cb_sem);
+	struct nm_cluster *cluster = to_nm_cluster(kobj);
+	printk("releasing cluster %p\n", cluster);
 }
 
+static struct ukobj_type nm_cluster_type = {
+	.ktype	= {
+		.release	= nm_cluster_release,
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.owner	= THIS_MODULE,
+};
 
-static void nm_teardown(void)
-{
-	remove_proc_entry("cluster/nm", NULL);
-	remove_proc_entry("cluster", NULL);
-}
+/* cluster set */
 
-static void nm_init_cluster(nm_cluster *cluster)
+struct nm_cluster_set {
+	struct kset cs_kset;
+	/* some stuff? */
+};
+
+static struct nm_cluster_set *to_nm_cluster_set(struct kset *kset)
 {
-	int i;
-	memset(cluster, 0, sizeof(nm_cluster));
-	cluster->state = NM_CLUSTER_DOWN;
-	spin_lock_init(&cluster->bitmap_lock);
-	
-	for (i=NM_NODE_ADD_CB; i<=NM_GROUP_NODE_DEL_CB; i++)
-		INIT_LIST_HEAD(&nm_callbacks[i]);
+	return kset ? container_of(kset, struct nm_cluster_set, cs_kset) : NULL;
 }
 
+static struct kset *nm_cluster_default_sets[] = {
+	&nm_node_set,
+	&nm_heartbeat_set,
+	NULL,
+};
 
+static struct kobject *nm_cluster_set_make_object(struct kset *kset,
+					      const char *name)
+{
+	struct nm_cluster *cluster;
 
+	printk("trying to make a cluster object\n");
 
+	cluster = kcalloc(1, sizeof(struct nm_cluster), GFP_KERNEL);
+	if (!cluster)
+		return NULL;
 
-/*----------------------------------------------------------------------------*/
-/*
- *	populating the filesystem.
- */
-static int nm_fill_super(struct super_block * sb, void * data, int silent)
-{
-	int ret, sz;
-	struct TA_write_ops *ops;
-	static struct tree_descr nm_files[] = {
-		[NM_Cluster] = {".cluster", &transaction_ops, S_IWUSR},
-		[NM_Node] = {".node", &transaction_ops, S_IWUSR},
-		[NM_Group] = {".group", &transaction_ops, S_IWUSR},
-		/* last one */ {""}
-	};
-	
-	sz = sizeof(nm_files) / sizeof(struct tree_descr);
-	ops = kmalloc(sizeof(struct TA_write_ops) +
-		      (sz * sizeof(ops->write_op[0])), 
-		      GFP_KERNEL);
-	if (!ops)
-		return -ENOMEM;
+	spin_lock_init(&cluster->bitmap_lock);
 
-	ops->num_ops = sz;
-	ops->write_op[NM_Cluster] = write_cluster;
-	ops->write_op[NM_Node] = write_node;
-	ops->write_op[NM_Group] = write_group;
+	kobject_set_name(&cluster->cl_kobj, name);
+	cluster->cl_kobj.ktype = &nm_cluster_type.ktype;
+	kobject_init(&cluster->cl_kobj);
 
-	single_sb = NULL;
-	nmprintk("calling simple_fill_super...\n");
-	ret = simple_fill_super(sb, 0x98675309, nm_files);
-	if (ret >= 0) {
-		TA_GENERIC_SB_MEMBER(sb) = ops;
-		single_sb = sb;
-	} else {
-		kfree(ops);
-	}
-	return ret;
+	return &cluster->kobj;
 }
 
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *nm_get_sb(struct file_system_type *fs_type,
-				     int flags,
-				     const char *dev_name,
-				     void *data)
+static void nm_cluster_set_drop_object(struct kset *kset, struct kobject *kobj)
 {
-	return get_sb_single(fs_type, flags, data, nm_fill_super);
+	struct nm_cluster_set *cs = to_nm_cluster_set(kset);
+
+	printk("dropping nm_cluster_set %p\n", cs);
+	kobject_put(kobj);
 }
 
-static struct file_system_type nm_fs_type = {
-	.owner          = THIS_MODULE,
-	.name           = "nm",
-	.get_sb         = nm_get_sb,
-	.kill_sb        = kill_anon_super,
+static struct ukobj_type nm_cluster_set_type = {
+	.ktype = {
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_object	= nm_cluster_set_make_object,
+	.drop_object	= nm_cluster_set_drop_object,
+	.default_sets	= nm_cluster_default_sets,
+	.owner	= THIS_MODULE,
 };
-#else
-static struct super_block *nm_read_super(struct super_block *sb,
-					 void *data,
-					 int silent)
+
+static struct nm_cluster_set nm_cluster_set = {
+	.cs_kset = {
+		.kobj = {
+			.name = "cluster",
+			.ktype = &nm_cluster_set_type.ktype,
+		},
+	},
+};
+
+static void __exit exit_nm(void)
 {
-	nmprintk("welcome to nm_read_super!!!\n");
-	return (nm_fill_super(sb, data, silent) < 0) ? NULL : sb;
+	nmprintk("unloading nm module\n");
+	usysfs_unregister_subsystem(&nm_cluster_set.cs_kset);
+	nm_destroy_ip_hash();
 }
 
-static DECLARE_FSTYPE (nm_fs_type, "nm", nm_read_super, FS_SINGLE|FS_LITTER);
-#endif
+static void kset_init(struct kset *k)
+{
+	kobject_init(&k->kobj);
+	INIT_LIST_HEAD(&k->list);
+}
 
 static int __init init_nm(void)
 {
-	int retval;
-	nm_nodename = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
-	if (nm_nodename==NULL) {
-		nmprintk("could not allocate a few bytes for nodename!\n");
-		return -ENOMEM;
+	int ret;
+	unsigned i;
+
+	for (i = 0; i <= ARRAY_SIZE(nm_callbacks); i++)
+		INIT_LIST_HEAD(&nm_callbacks[i]);
+
+	kset_init(&nm_cluster_set.cs_kset);
+	kset_init(&nm_node_set.ns_kset);
+	kset_init(&hb_heartbeat_set.hs_kset);
+	ret = usysfs_register_subsystem(&nm_cluster_set.cs_kset);
+	if (ret) {
+		printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
+		goto out;
 	}
-	strcpy(nm_nodename, system_utsname.nodename);
-	nmprintk("loading nm module: nodename is %s\n", nm_nodename);
 
-	if (nm_init_ip_hash() < 0) {
-		nmprintk("failed to allocate node IP hash\n");
-		return -ENOMEM;
+	ret = nm_init_ip_hash();
+	if (ret) {
+		printk(KERN_ERR "nodemanager: IP hash initialization "
+			"returned %d\n", ret);
+		goto out;
 	}
 
-	nm_init_cluster(&cluster);
+out:
+	if (ret)
+		exit_nm();
 
-	if (proc_mkdir("cluster", 0)) {
-		if (proc_mkdir("cluster/nm", 0)) {
-		}
-	}
-	nmprintk("calling register_filesystem\n");
-	retval = register_filesystem(&nm_fs_type);
-	nmprintk("done calling register_filesystem: ret=%d\n", retval);
-	if (retval)
-		nm_teardown();
-	return retval;
+	return ret;
 }
 
-static void __exit exit_nm(void)
-{
-	nm_teardown();
-	unregister_filesystem(&nm_fs_type);
-	nm_destroy_ip_hash();
-	kfree(nm_nodename);
-	nmprintk("unloading nm module\n");
-}
-
-
-
-
 MODULE_LICENSE("GPL");
 module_init(init_nm)
 module_exit(exit_nm)

Modified: branches/usysfsify/fs/ocfs2/cluster/nodemanager.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/nodemanager.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/nodemanager.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -32,51 +32,13 @@
 
 #include "ocfs2_nodemanager.h"
 
-
-
-struct _nm_ctxt
-{
-	int dummy;
+struct nm_cluster {
+	struct kobject	cl_kobj;
+	char		cl_name[NM_MAX_NAME_LEN+1];
+	spinlock_t	cl_bitmap_lock;
+	unsigned long	cl_node_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
 };
 
-enum {
-	NM_CLUSTER_DOWN=0,
-	NM_CLUSTER_UP
-};
-
-enum {
-	NM_GROUP_NOT_READY=0,
-	NM_GROUP_READY
-};
-
-enum {
-	NM_Root = 1,
-	NM_Cluster,
-	NM_Node,
-	NM_Group,
-};
-
-typedef struct _nm_cluster
-{
-	char name[NM_MAX_NAME_LEN+1];
-	int state;
-	spinlock_t bitmap_lock;
-	unsigned long group_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-	unsigned long node_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_cluster;
-
-
-typedef struct _nm_group_inode_private
-{
-	struct inode *inode;
-	struct list_head net_list;
-	struct list_head disk_list;
-	cluster_disk disk;
-	int state;
-	spinlock_t bitmap_lock;
-	unsigned long slot_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_group_inode_private;
-
 /* TODO: move this */
 struct sock;
 typedef struct _net_inode_private
@@ -102,24 +64,28 @@
 	void			(*orig_data_ready)(struct sock *sk, int bytes);
 } net_inode_private;
 
-typedef struct _nm_node_inode_private
-{
-	struct inode *inode;
-	nm_node_info node;
-	struct list_head ip_hash;
-	net_inode_private net;
-} nm_node_inode_private;
+/* XXX this is now different from the userspace stuff, but that's ok,
+ * because they're not used by an ioctl interface anymore.  userspace
+ * should move to something more natural. */
+struct nm_node {
+	struct kobject		nd_kobj; 
+	char			nd_name[NM_MAX_NAME_LEN+1]; /* replace? */
+	__u8			nd_num;
+	/* only one address per node, as attributes, for now.  both
+	 * in network order */
+	__u32			nd_ipv4_address;
+	__u16			nd_ipv4_port;
 
+	/* we're making simple assertions that a node can only have one network
+	 * identity and report at one place in a heartbeat */
+	net_inode_private	nd_net_inode_private;
+};
 
 /* callback stuff */
 
 enum {
 	NM_NODE_ADD_CB = 0,
 	NM_NODE_DEL_CB,
-	NM_GROUP_ADD_CB,
-	NM_GROUP_DEL_CB,
-	NM_GROUP_NODE_ADD_CB,
-	NM_GROUP_NODE_DEL_CB,
 	NM_NUM_CB
 };
 
@@ -129,67 +95,15 @@
 {
 	struct list_head list;
 	nm_cb_func *func;
-	//void (*func)(void *, void *, u8);
 } nm_callback_func;
 
 
+u8 nm_this_node(void);
+int nm_register_callback(unsigned int type, void (*func)(void *, void *, u8));
+int nm_unregister_callback(unsigned int type, void (*func)(void *, void *, u8));
 
+struct nm_node * nm_get_node_by_num(u8 node_num);
+struct nm_node * nm_get_node_by_ip(u32 addr);
+void nm_node_put(struct nm_node *node);
 
-u8 nm_this_node(struct inode *group);
-nm_cluster * nm_get_cluster(void);
-int nm_register_callback(int type, void (*func)(void *, void *, u8));
-int nm_unregister_callback(int type, void (*func)(void *, void *, u8));
-int nm_get_group_num_nodes(struct inode *group);
-int nm_make_group_ready(struct inode *group);
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx);
-int nm_remove_node_from_group(struct inode *group, struct inode *node);
-int nm_create_group(char *buf, nm_op *data);
-int nm_add_node_to_group(char *buf, nm_op *data);
-
-extern char *nm_nodename;
-
-
-struct inode * nm_get_group_by_num(u8 group_num);
-struct inode * nm_get_node_by_num(u8 node_num);
-struct inode * __nm_get_node_by_name(const char *node_name, int dir);
-struct inode * nm_get_node_by_ip(u32 addr);
-struct inode * nm_get_group_node_by_index(struct inode *group, u8 index);
-
-static inline struct inode * nm_get_node_by_name(char *node_name)
-{
-	return __nm_get_node_by_name(node_name, 0);
-}
-static inline struct inode * nm_get_group_by_name(const char *group_name)
-{
-	return __nm_get_node_by_name(group_name, 1);
-}
-
-
-static inline int nm_get_node_global_index(struct inode *node)
-{
-	return (node->i_ino - NM_NODE_INODE_START);
-}
-static inline int nm_get_group_global_index(struct inode *group)
-{
-	return (group->i_ino - NM_GROUP_INODE_START);
-}
-
-static inline int nm_valid_ino(int ino)
-{
-#if 0
-	// these should never be referred to in kernel
-	if (ino >= NM_Cluster && ino <= NM_Group)
-		return 1;
-#endif
-	if (ino >= NM_NODE_INODE_START &&
-	    ino < NM_NODE_INODE_START + NM_MAX_NODES)
-		return 1;
-	if (ino >= NM_GROUP_INODE_START &&
-	    ino < NM_GROUP_INODE_START + NM_MAX_NODES)
-		return 1;
-	return 0;
-}
-
-
-	
 #endif /* CLUSTER_NODEMANAGER_H */

Modified: branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/ocfs2_nodemanager.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -43,29 +43,6 @@
 #define NM_GROUP_INODE_START    200000
 #define NM_NODE_INODE_START     100000
 
-
-
-typedef struct _nm_network_iface
-{
-	__u16 ip_port;			/* for simplicity, just define exactly one port for this if */
-	__u16 ip_version;
-	union {
-		__u32 ip_addr4;		/* IPv4 address in NBO */
-		__u32 ip_addr6[4];	/* IPv6 address in NBO */
-	} addr_u;
-} nm_network_iface;
-
-typedef struct _nm_node_info 
-{
-	__u8  node_num;
-	__u8  pad1;
-	__u16 pad2;
-	__u32 pad3;
-	char node_name[NM_MAX_NAME_LEN+1];
-	char pad4[63];
-	nm_network_iface ifaces[NM_MAX_IFACES];
-} nm_node_info;
-
 /* transaction file nm_op stuff */
 
 #define NM_OP_MAGIC      0xbeaf
@@ -101,7 +78,7 @@
 	union {
 		__u8 index;
 		char name[NM_MAX_NAME_LEN+1];
-		nm_node_info node;
+//		nm_node_info node;
 		nm_group_change gc;
 	} arg_u;
 } nm_op;

Modified: branches/usysfsify/fs/ocfs2/cluster/tcp.c
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/tcp.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/tcp.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -175,7 +175,6 @@
 /////////////////////
 static void net_shutdown(void);
 static int net_startup(void);
-static int __init net_driver_entry (void);
 static int net_init_driver(void);
 static void __exit net_driver_exit (void);
 static int net_add_handler(net_msg_handler *nmh);
@@ -187,8 +186,7 @@
 static int net_process_message(struct socket *sock, net_msg *hdr);
 static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
 
-static int net_sock_addref_or_connect(struct inode *inode,
-				      struct socket **sock_ret);
+static int net_sock_addref_or_connect(u8 node_num, struct socket **sock_ret);
 static void net_sock_decref(struct inode *inode, int error);
 
 //////////////////////
@@ -218,12 +216,6 @@
 u32 net_driver_state = NET_DRIVER_UNINITED;
 u32 net_num_dispatched = 0;
 
-
-/*
- * net_driver_entry()
- *
- * Driver entry point. Called on insmod.
- */
 static int __init net_driver_entry (void)
 {
 	struct proc_dir_entry *de;
@@ -613,7 +605,7 @@
 }
 
 int net_send_message_iov(u32 msg_type, u32 key, struct iovec *caller_iov,
-			 size_t caller_iovlen, struct inode *inode,
+			 size_t caller_iovlen, u8 target_node,
 			 int *status)
 {
 	int ret;
@@ -627,11 +619,6 @@
 
 	BUG_ON(current == net_recv_task);
 
-	if (!inode || !inode->u.generic_ip) {
-		netprintk0("bad inode, cannot send message\n");
-		ret = -EINVAL;
-		goto out;
-	}
 	if (caller_iovlen == 0) {
 		netprintk0("bad iovec array length\n");
 		ret = -EINVAL;
@@ -647,7 +634,7 @@
 		goto out;
 	}
 
-	ret = net_sock_addref_or_connect(inode, &sock);
+	ret = net_sock_addref_or_connect(target_node, &sock);
 	if (ret)
 		goto out;
 
@@ -754,13 +741,14 @@
  *   - status will not be set on return code != 0
  */
 int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
-		     struct inode *inode, int *status)
+		     u8 target_node, int *status)
 {
 	struct iovec iov = {
 		.iov_base = data,
 		.iov_len = len,
 	};
-	return net_send_message_iov(msg_type, key, &iov, 1, inode, status);
+	return net_send_message_iov(msg_type, key, &iov, 1,
+				    target_node, status);
 }
 EXPORT_SYMBOL(net_send_message);
 
@@ -1436,24 +1424,23 @@
 	return empty;
 }
 
-static int net_sock_addref_or_connect(struct inode *inode,
-				      struct socket **sock_ret)
+static int net_sock_addref_or_connect(u8 target_node, struct socket **sock_ret)
 {
 	nm_node_inode_private *priv;
-	nm_node_info *node;
+	struct nm_node *node = NULL;
 	net_inode_private *net = NULL;
 	struct socket *sock = NULL;
 	int ret = 0, wait = 0, set_pending = 0;
 	struct waiting_for_sock wfs;
 
-	priv = inode->u.generic_ip;
-	if (!priv) {
-		netprintk("bad inode %p\n", inode);
+	/* XXX think about passing refs around.. */
+	node = get_node_by_num(target_node);
+	if (node == NULL) {
+		netprintk("node %u unknown\n", target_node);
 		ret = -EINVAL;
 		goto out;
 	}
-	net = &priv->net;
-	node = &priv->node;
+	net = &node->nd_net_inode_private;
 
 	spin_lock_bh(&net->sock_lock); 
 	if (net->sock && !net->sock_pending) {
@@ -1513,6 +1500,8 @@
 	}
 	if (sock)
 		*sock_ret = sock;
+	if (node)
+		nm_node_put(node);
 
 	BUG_ON(ret == 0 && sock == NULL);
 	netprintk("addref for net %p gave %d\n", net, ret);
@@ -1624,7 +1613,3 @@
 	BUG_ON(sock == NULL);
 	return sock;
 }
-
-MODULE_LICENSE("GPL");
-module_init (net_driver_entry);
-module_exit (net_driver_exit);

Modified: branches/usysfsify/fs/ocfs2/cluster/tcp.h
===================================================================
--- branches/usysfsify/fs/ocfs2/cluster/tcp.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/cluster/tcp.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -213,7 +213,8 @@
 int net_register_handler(u32 msg_type, u32 key, int flags, 
 			 u32 max_len, net_msg_handler_func *func, void *data);
 int net_init_tcp_sock(struct inode *inode);
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
+		     u8 target_node, int *status);
 int net_send_message_iov(u32 msg_type, u32 key, struct iovec *iov,
 			 size_t iovlen, struct inode *inode, int *status);
 int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmod.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmod.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmod.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -1285,17 +1285,11 @@
 }
 
 static dlm_ctxt *dlm_alloc_ctxt(const char *domain,
-				struct inode *group,
 				u32 key)
 {
 	int i;
 	dlm_ctxt *dlm = NULL;
 
-	/* if for some reason we can't get a reference on the group
-	 * inode (required) then don't even try the rest. */
-	if (!igrab(group))
-		goto leave;
-
 	dlm = kmalloc(sizeof(dlm_ctxt), GFP_KERNEL);
 	if (!dlm) {
 		dlmprintk0("could not allocate dlm_ctxt\n");
@@ -1341,10 +1335,6 @@
 	INIT_LIST_HEAD(&dlm->mle_hb_events);
 	init_rwsem(&dlm->recovery_sem);
 
-	/* this eats the reference we got above. */
-	dlm->group = group;
-	dlm->group_index = nm_this_node(group);
-
 	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
 
 	dlm->reco.new_master = NM_INVALID_SLOT_NUM;
@@ -1369,26 +1359,17 @@
  * dlm_register_domain: one-time setup per "domain"
  */
 dlm_ctxt * dlm_register_domain(const char *domain,
-			       const char *group_name,
 			       u32 key)
 {
 	int ret;
 	dlm_ctxt *dlm = NULL;
 	dlm_ctxt *new_ctxt = NULL;
-	struct inode *group = NULL;
 
 	if (strlen(domain) > NM_MAX_NAME_LEN) {
 		dlmprintk0("domain name length too long\n");
 		goto leave;
 	}
 
-	group = nm_get_group_by_name(group_name);
-	if (!group) {
-		dlmprintk("no nm group %s for domain %s!\n", 
-			  group_name, domain);
-		goto leave;
-	}
-
 	dlmprintk("register called for domain \"%s\"\n", domain);
 
 retry:
@@ -1421,7 +1402,7 @@
 	if (!new_ctxt) {
 		spin_unlock(&dlm_domain_lock);
 
-		new_ctxt = dlm_alloc_ctxt(domain, group, key);
+		new_ctxt = dlm_alloc_ctxt(domain, key);
 		if (new_ctxt)
 			goto retry;
 		goto leave;
@@ -1446,9 +1427,6 @@
 	if (new_ctxt)
 		dlm_free_ctxt_mem(new_ctxt);
 
-	if (group)
-		iput(group);
-
 	return dlm;
 }
 EXPORT_SYMBOL(dlm_register_domain);

Modified: branches/usysfsify/fs/ocfs2/dlm/dlmmod.h
===================================================================
--- branches/usysfsify/fs/ocfs2/dlm/dlmmod.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/dlm/dlmmod.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -212,9 +212,7 @@
 	spinlock_t spinlock;
 	struct rw_semaphore recovery_sem;
 	char *name;
-	struct inode *group;
 	u32 key;
-	u8  group_index;
 	u8  joining_node;
 	unsigned long live_nodes_map[BITS_TO_LONGS(NM_MAX_NODES)];
 	unsigned long domain_map[BITS_TO_LONGS(NM_MAX_NODES)];
@@ -691,7 +689,6 @@
 }
 
 dlm_ctxt * dlm_register_domain(const char *domain,
-			       const char *group_name,
 			       u32 key);
 void dlm_unregister_domain(dlm_ctxt *dlm);
 void dlm_get(dlm_ctxt *dlm);
@@ -755,8 +752,8 @@
 
 int dlm_nm_init(dlm_ctxt *dlm);
 int dlm_heartbeat_init(dlm_ctxt *dlm);
-void dlm_hb_node_down_cb(struct inode *group, struct inode *node, int idx, void *data);
-void dlm_hb_node_up_cb(struct inode *group, struct inode *node, int idx, void *data);
+void dlm_hb_node_down_cb(struct nm_node *node, int idx, void *data);
+void dlm_hb_node_up_cb(struct nm_node *node, int idx, void *data);
 int dlm_hb_node_dead(dlm_ctxt *dlm, int node);
 int __dlm_hb_node_dead(dlm_ctxt *dlm, int node);
 
@@ -778,9 +775,9 @@
 void dlm_init_lock(dlm_lock *newlock, int type, u8 node, u64 cookie);
 
 void dlm_mle_node_down(dlm_ctxt *dlm, dlm_master_list_entry *mle,
-		       struct inode *group, struct inode *node, int idx);
+		       struct nm_node *node, int idx);
 void dlm_mle_node_up(dlm_ctxt *dlm, dlm_master_list_entry *mle,
-		       struct inode *group, struct inode *node, int idx);
+		       struct nm_node *node, int idx);
 int dlm_do_assert_master(dlm_ctxt *dlm, const char *lockname, 
 			 unsigned int namelen, void *nodemap);
 

Modified: branches/usysfsify/fs/ocfs2/dlmglue.c
===================================================================
--- branches/usysfsify/fs/ocfs2/dlmglue.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/dlmglue.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -1617,10 +1617,10 @@
 
 	/* used by the dlm code to make message headers unique, each
 	 * node in this domain must agree on this. */
-	dlm_key = crc32(0, osb->group_name, strlen(osb->group_name));
+	dlm_key = crc32(0, osb->uuid, strlen(osb->uuid));
 
-	/* for now, group_name == domain */
-	dlm = dlm_register_domain(osb->group_name, osb->group_name, dlm_key);
+	/* for now, uuid == domain */
+	dlm = dlm_register_domain(osb->uuid, dlm_key);
 	if (!dlm) {
 		/* This is a best guess on return value... */
 		status = -ENOMEM;

Modified: branches/usysfsify/fs/ocfs2/heartbeat.c
===================================================================
--- branches/usysfsify/fs/ocfs2/heartbeat.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/heartbeat.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -34,6 +34,7 @@
 #include <cluster/util.h>
 #include <cluster/clcommon.h>
 #include <cluster/heartbeat.h>
+#include <cluster/nodemanager.h>
 #include <dlm/dlmcommon.h>
 
 #include "ocfs_log.h"
@@ -53,6 +54,7 @@
 #define OCFS2_HB_NODE_DOWN_PRI     (0x0000002)
 #define OCFS2_HB_NODE_UP_PRI	   OCFS2_HB_NODE_DOWN_PRI
 
+#if 0
 static void ocfs2_hb_node_down_cb(struct inode *group,
 				  struct inode *node,
 				  int node_num,
@@ -61,6 +63,7 @@
 				struct inode *node,
 				int node_num,
 				void *data);
+#endif
 
 static inline void __ocfs_node_map_set_bit(ocfs_node_map *map,
 					   int bit);
@@ -79,16 +82,12 @@
 	ocfs_node_map_init(&osb->umount_map);
 }
 
-static void ocfs2_hb_node_down_cb(struct inode *group,
-				  struct inode *node,
+static void ocfs2_hb_node_down_cb(struct nm_node *node,
 				  int node_num,
 				  void *data)
 {
 	ocfs_super *osb = data;
 
-	if (osb->group_inode != group)
-		return;
-
 	OCFS_ASSERT(osb->node_num != node_num);
 
 	printk("ocfs2: node down event for %d\n", node_num);
@@ -104,16 +103,12 @@
 	ocfs_recovery_thread(osb, node_num);
 }
 
-static void ocfs2_hb_node_up_cb(struct inode *group,
-				struct inode *node,
+static void ocfs2_hb_node_up_cb(struct nm_node *node,
 				int node_num,
 				void *data)
 {
 	ocfs_super *osb = data;
 
-	if (osb->group_inode != group)
-		return;
-
 	OCFS_ASSERT(osb->node_num != node_num);
 
 	printk("ocfs2: node up event for %d\n", node_num);

Modified: branches/usysfsify/fs/ocfs2/ocfs.h
===================================================================
--- branches/usysfsify/fs/ocfs2/ocfs.h	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/ocfs.h	2005-03-15 23:35:32 UTC (rev 1979)
@@ -373,8 +373,6 @@
 	ocfs_alloc_stats alloc_stats;
 	char dev_str[20];		/* "major,minor" of the device */
 
-	char *group_name;
-	struct inode *group_inode;
 	dlm_ctxt *dlm;
 	ocfs2_lock_res super_lockres;
 

Modified: branches/usysfsify/fs/ocfs2/super.c
===================================================================
--- branches/usysfsify/fs/ocfs2/super.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/super.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -129,12 +129,12 @@
 #endif /* Linux 2.4 stuff */
 
 
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name);
+static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id);
 static int __init ocfs_driver_entry (void);
 static void __exit ocfs_driver_exit (void);
 static void ocfs_put_super (struct super_block *sb);
 static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
-			      char **group_name, struct inode *root);
+			      struct inode *root);
 static void ocfs_dismount_volume(struct super_block *sb);
 static int ocfs_initialize_mem_lists (void);
 static void ocfs_free_mem_lists (void);
@@ -151,7 +151,7 @@
 static int ocfs_init_global_system_inodes(ocfs_super *osb);
 static int ocfs_init_local_system_inodes(ocfs_super *osb);
 static int ocfs_release_system_inodes(ocfs_super *osb);
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name);
+static int ocfs2_fill_local_node_info(ocfs_super *osb);
 static int ocfs2_complete_mount_recovery(ocfs_super *osb);
 static int ocfs_check_volume(ocfs_super * osb);
 static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
@@ -316,12 +316,11 @@
 	u32 uid = current->fsuid;
 	u32 gid = current->fsgid;
 	int reclaim_id;
-	char *group_name = NULL;
 	ocfs_super *osb = NULL;
 
 	LOG_ENTRY_ARGS ("%p, %p, %i", sb, data, silent);
 
-	if (ocfs_parse_options (data, &uid, &gid, &reclaim_id, &group_name) != 0) {
+	if (ocfs_parse_options (data, &uid, &gid, &reclaim_id) != 0) {
 		status = -EINVAL;
 		LOG_ERROR_STR ("ocfs_read_super: bad mount option");
 		goto read_super_error;
@@ -334,7 +333,7 @@
 	/* this is needed to support O_LARGE_FILE */
 	sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE;
 
-	status = ocfs_mount_volume (sb, reclaim_id, &group_name, NULL);
+	status = ocfs_mount_volume (sb, reclaim_id, NULL);
 	if (status < 0)
 		goto read_super_error;
 
@@ -376,9 +375,6 @@
 			LOG_EXIT_STATUS(status);
 	}
 
-	if (group_name)
-		kfree(group_name);
-
 	LOG_EXIT_STATUS(status);
 	return status;		
 
@@ -391,9 +387,6 @@
 	if (inode)
 		iput (inode);
 
-	if (group_name)
-		kfree(group_name);
-
 	LOG_EXIT_STATUS(status);
 	return status;
 }
@@ -430,12 +423,11 @@
  *
  * e.g., gid=9999,uid=9999,[no]cache,reclaimid
  */
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name)
+static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id)
 {
 	char *c;
 	char *value;
 	int ret = 1;
-	int size;
 
 	LOG_ENTRY ();
 	
@@ -476,24 +468,6 @@
 			}
 		} else if (!strcmp (c, "reclaimid")) {
 			*reclaim_id = 1;
-		} else if (!strcmp(c, "group")) {
-			if (!value || !*value) {
-				LOG_ERROR_STR
-					("group option requires an argument");
-				goto bail;
-			}
-			LOG_TRACE_ARGS("group name passed = %s\n", value);
-
-			size = strlen(value) + 1;
-			*group_name = kmalloc(size, GFP_KERNEL);
-			if (!(*group_name)) {
-				LOG_ERROR_STATUS(-ENOMEM);
-				goto bail;
-			}
-			memset(*group_name, 0, size);
-			printk("ocfs2: group name passed = %s, size = %d\n",
-			       value, size);
-			strcpy(*group_name, value);
 		} else {
 			LOG_ERROR_ARGS ("Invalid mount option: %s", c);
 			goto bail;
@@ -844,44 +818,24 @@
 	return 0;
 }
 
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name)
+/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
+static int ocfs2_fill_local_node_info(ocfs_super *osb)
 {
-	int status, i;
-	struct inode *group = NULL;
-	char *p;
+	int status;
 
-	if (group_name) {
-		osb->group_name = *group_name;
-		*group_name = NULL;
-	} else {
-		osb->group_name = kmalloc(NM_MAX_NAME_LEN + 1, GFP_KERNEL);
-		if (!osb->group_name) {
-			status = -ENOMEM;
-			LOG_ERROR_STATUS(status);
-			goto bail;
-		}
-		memset(osb->group_name, 0, NM_MAX_NAME_LEN + 1);
-		for (i = 0, p = osb->uuid; i < MAX_VOL_ID_LENGTH; i++, p += 2)
-			sprintf(p, "%02X", osb->uuid[i]);
-	}
-
-	group = nm_get_group_by_name(osb->group_name);
-	if (!group) {
-		printk("ocfs2: could not join group \"%s\"\n",
-		       osb->group_name);
-		status = -EINVAL;
+	/* XXX hold a ref on the node while mounte?  easy enough, if
+	 * desirable. */
+	osb->node_num = nm_this_node();
+	if (osb->node_num == NM_MAX_NODES) {
+		printk("ocfs2: could not find this host's node number\n");
+		status = -ENOENT;
 		goto bail;
 	}
 
-	osb->group_inode = group;
-	osb->node_num = nm_this_node(group);
+	printk("ocfs2: I am node %d\n", osb->node_num);
 
-	printk("ocfs2: I am node %d, a member of group %s\n", osb->node_num,
-	       osb->group_name);
-
 	status = 0;
 bail:
-
 	return status;
 }
 
@@ -890,7 +844,7 @@
  *
  */
 static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
-			      char **group_name, struct inode *root)
+			      struct inode *root)
 {
 	int status, sector_size;
 	int unlock_super = 0;
@@ -930,7 +884,7 @@
 		goto leave;
 	}
 
-	status = ocfs2_fill_local_node_info(osb, group_name);
+	status = ocfs2_fill_local_node_info(osb);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
@@ -1500,17 +1454,12 @@
 	if (osb->slot_info)
 		ocfs2_free_slot_info(osb->slot_info);
 
-	if (osb->group_inode)
-		iput(osb->group_inode);
-
 	/* FIXME
 	 * This belongs in journal shutdown, but because we have to
 	 * allocate osb->journal at the start of ocfs_initalize_osb(),
 	 * we free it here.
 	 */
 	kfree(osb->journal);
-	if (osb->group_name)
-		kfree(osb->group_name);
 	if (osb->local_alloc_copy)
 		kfree(osb->local_alloc_copy);
 	memset (osb, 0, sizeof (ocfs_super));

Modified: branches/usysfsify/fs/ocfs2/vote.c
===================================================================
--- branches/usysfsify/fs/ocfs2/vote.c	2005-03-15 22:35:08 UTC (rev 1978)
+++ branches/usysfsify/fs/ocfs2/vote.c	2005-03-15 23:35:32 UTC (rev 1979)
@@ -223,7 +223,6 @@
 	u64 blkno;
 	enum ocfs2_vote_request request;
 	struct inode *inode = NULL;
-	struct inode *remote_node;
 	ocfs2_msg_hdr *hdr = &msg->v_hdr;
 	ocfs2_response_msg response;
 
@@ -304,21 +303,15 @@
 	response.r_hdr.h_node_num = htonl(osb->node_num);
 	response.r_response = htonl(vote_response);
 
-	remote_node = nm_get_node_by_num(node_num);
-	if (!remote_node) {
-		LOG_ERROR_ARGS("Couldn't get inode for node %u!\n", node_num);
-	} else {
-		net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
-					      osb->net_key,
-					      &response,
-					      sizeof(ocfs2_response_msg),
-					      remote_node,
-					      NULL);
-		if (net_status < 0)
-			LOG_ERROR_ARGS("message to node %u fails with error "
-				       "%d!\n", node_num, net_status);
-		iput(remote_node);
-	}
+	net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
+				      osb->net_key,
+				      &response,
+				      sizeof(ocfs2_response_msg),
+				      node_num,
+				      NULL);
+	if (net_status < 0)
+		LOG_ERROR_ARGS("message to node %u fails with error "
+			       "%d!\n", node_num, net_status);
 
 	if (inode)
 		iput(inode);
@@ -510,7 +503,6 @@
 {
 	int status, i, remote_err;
 	ocfs2_net_wait_ctxt *w = NULL;
-	struct inode *remote_node;
 	int dequeued = 0;
 
 	LOG_ENTRY();
@@ -534,21 +526,13 @@
 				       i);
 			ocfs_node_map_set_bit(osb, &w->n_node_map, i);
 
-			remote_node = nm_get_node_by_num(i);
-			if (!remote_node) {
-				status = -EINVAL;
-				LOG_ERROR_STATUS(status);
-				goto bail;
-			}
-
 			remote_err = 0;
 			status = net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
 						  osb->net_key,
 						  request,
 						  sizeof(*request),
-						  remote_node,
+						  i,
 						  &remote_err);
-			iput(remote_node);
 			if (status == -ETIMEDOUT) {
 				LOG_TRACE_ARGS("remote node %d timed out!\n",
 					       i);



More information about the Ocfs2-commits mailing list