[Ocfs2-commits] jlbec commits r2006 - in trunk/fs: ocfs2 ocfs2/cluster ocfs2/dlm usysfs

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Fri Mar 18 00:01:11 CST 2005


Author: jlbec
Signed-off-by: mfasheh
Date: 2005-03-18 00:01:10 -0600 (Fri, 18 Mar 2005)
New Revision: 2006

Removed:
   trunk/fs/ocfs2/cluster/clcommon.h
   trunk/fs/ocfs2/cluster/gsd.c
   trunk/fs/ocfs2/cluster/gsd.h
   trunk/fs/ocfs2/cluster/ocfs2_tcp.h
Modified:
   trunk/fs/ocfs2/cluster/Makefile
   trunk/fs/ocfs2/cluster/heartbeat.c
   trunk/fs/ocfs2/cluster/heartbeat.h
   trunk/fs/ocfs2/cluster/nodemanager.c
   trunk/fs/ocfs2/cluster/nodemanager.h
   trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h
   trunk/fs/ocfs2/cluster/tcp.c
   trunk/fs/ocfs2/cluster/tcp.h
   trunk/fs/ocfs2/dlm/dlmast.c
   trunk/fs/ocfs2/dlm/dlmconvert.c
   trunk/fs/ocfs2/dlm/dlmfs.c
   trunk/fs/ocfs2/dlm/dlmfs_compat.c
   trunk/fs/ocfs2/dlm/dlmlock.c
   trunk/fs/ocfs2/dlm/dlmmaster.c
   trunk/fs/ocfs2/dlm/dlmmod.c
   trunk/fs/ocfs2/dlm/dlmmod.h
   trunk/fs/ocfs2/dlm/dlmrecovery.c
   trunk/fs/ocfs2/dlm/dlmthread.c
   trunk/fs/ocfs2/dlm/dlmunlock.c
   trunk/fs/ocfs2/dlm/userdlm.c
   trunk/fs/ocfs2/dlmglue.c
   trunk/fs/ocfs2/heartbeat.c
   trunk/fs/ocfs2/ocfs.h
   trunk/fs/ocfs2/super.c
   trunk/fs/ocfs2/vote.c
   trunk/fs/usysfs/dir.c
   trunk/fs/usysfs/mount.c
   trunk/fs/usysfs/usysfs.h
Log:

o Merge the usysfsify branch.  The O2CB ABI is now usysfs.  You must
  have revision 668 of ocfs2-tools or better.

Signed-off-by: mfasheh



Modified: trunk/fs/ocfs2/cluster/Makefile
===================================================================
--- trunk/fs/ocfs2/cluster/Makefile	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/Makefile	2005-03-18 06:01:10 UTC (rev 2006)
@@ -16,6 +16,11 @@
 EXTRA_CFLAGS += -DMISSING_SOCK_CREATE_LITE
 endif
 
+# this is going to get exciting if usysfs is merged
+EXTRA_CFLAGS += -I$(OUR_TOPDIR)/fs/usysfs/
+# XXX should be folded into the kapi stuff
+EXTRA_CFLAGS += -I$(OUR_TOPDIR)/fs/usysfs/compatinclude
+
 ifeq ($(KERNELRELEASE),)
 ifeq ($(KERNEL_26),)
 COMPAT_LIBFS := compat_libfs.o
@@ -24,15 +29,10 @@
 
 INSTALL_MOD_DIR := fs/ocfs2
 
-obj-m := ocfs2_heartbeat.o ocfs2_nodemanager.o ocfs2_tcp.o
+obj-m := ocfs2_nodemanager.o
 
-ocfs2_nodemanager-objs := nodemanager.o util.o transaction_file.o $(COMPAT_LIBFS)
+ocfs2_nodemanager-objs := nodemanager.o heartbeat.o tcp.o $(COMPAT_LIBFS)
 
-ocfs2_heartbeat-objs := heartbeat.o util.o transaction_file.o $(COMPAT_LIBFS)
-
-ocfs2_tcp-objs := gsd.o tcp.o util.o $(COMPAT_LIBFS)
-
-
 ifeq ($(KERNELRELEASE),)
 #
 # Called from a regular "make".
@@ -40,25 +40,20 @@
 
 SOURCES =			\
 	compat_libfs.c		\
-	gsd.c			\
 	heartbeat.c		\
 	nodemanager.c		\
 	tcp.c			\
-	transaction_file.c	\
 	util.c
 
 HEADERS = 			\
-	clcommon.h		\
 	cl_compat.h		\
 	compat_libfs.h		\
-	gsd.h			\
 	heartbeat.h		\
 	nodemanager.h		\
 	ocfs2_heartbeat.h	\
 	ocfs2_nodemanager.h	\
 	ocfs2_tcp.h		\
 	tcp.h			\
-	transaction_file.h	\
 	util.h
 
 DIST_FILES = $(SOURCES) $(HEADERS)

Deleted: trunk/fs/ocfs2/cluster/clcommon.h
===================================================================
--- trunk/fs/ocfs2/cluster/clcommon.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/clcommon.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -1,49 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * clcommon.h
- *
- * Common stuff
- *
- * Copyright (C) 2004 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
- */
-
-#ifndef CLUSTER_CLCOMMON_H
-#define CLUSTER_CLCOMMON_H
-
-typedef struct _nm_ctxt nm_ctxt;
-typedef struct _heartbeat_ctxt heartbeat_ctxt;
-
-#define CLUSTER_DISK_UUID_LEN      32      // 16 byte binary == 32 char hex string
-
-typedef struct _cluster_disk
-{
-	// uuid of disk
-	char uuid[CLUSTER_DISK_UUID_LEN+1];
-	// all the rest are for heartbeat
-	dev_t dev;
-	u32 blocksize_bits;
-	u32 num_blocks;
-	u64 start_block;
-	util_rarray slots;
-} cluster_disk;
-
-
-#endif /* CLUSTER_CLCOMMON_H */

Deleted: trunk/fs/ocfs2/cluster/gsd.c
===================================================================
--- trunk/fs/ocfs2/cluster/gsd.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/gsd.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -1,245 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- */
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <asm/uaccess.h>
-#include <linux/file.h>
-
-#include "cl_compat.h"
-#include "util.h"
-#include "clcommon.h"
-#include "nodemanager.h"
-#include "tcp.h"
-#include "gsd.h"
-
-static char *gsd_handler_buf = NULL;
-/* sigh.  these will be claned up, I'm just sure of it. */
-static u8 gsd_node_num;
-static struct inode *gsd_inode;
-
-static int gsd_message_action(gsd_message *g)
-{
-	int ret;
-	nm_op op;
-	int namelen = g->namelen;
-	struct inode *node=NULL, *group=NULL;
-	char name[NM_MAX_NAME_LEN+1];
-	
-	if (namelen > NM_MAX_NAME_LEN)
-		return -EINVAL;
-	strncpy(name, g->name, namelen);
-	name[namelen] = '\0';
-	
-	memset(&op, 0, sizeof(op));
-	switch (g->action) {
-		case GSD_ACTION_ADD_GROUP:
-			group = nm_get_group_by_name(name);
-			if (group) {
-				ret = 0;
-				break;
-			}
-			op.arg_u.gc.group_num = NM_INVALID_SLOT_NUM;
-			memcpy(op.arg_u.gc.name, name, namelen);
-			memcpy(op.arg_u.gc.disk_uuid, name, namelen);
-
-			ret = nm_create_group(gsd_handler_buf, &op);
-			if (ret >= 0)
-				ret = 0;
-			break;
-
-		case GSD_ACTION_ADD_GROUP_NODE:
-			group = nm_get_group_by_name(name);
-			if (!group) {
-				ret = -EINVAL;
-				break;
-			}
-			node = nm_get_group_node_by_index(group, g->from);
-			if (node) {
-				ret = 0;
-				if (nm_get_node_global_index(node) != g->from)
-					ret = -EINVAL;
-				break;
-			}
-			op.arg_u.gc.group_num = nm_get_group_global_index(group);
-			op.arg_u.gc.node_num = g->from;
-			op.arg_u.gc.slot_num = g->from;
-			ret = nm_add_node_to_group(gsd_handler_buf, &op);
-			if (ret >= 0)
-				ret = 0;
-			break;
-		default:
-			ret = -EINVAL;
-			break;
-	}
-
-	if (node)
-		iput(node);
-	if (group)
-		iput(group);
-	return ret;
-}
-
-static int gsd_message_handler(net_msg *msg, u32 len, void *data)
-{
-	gsd_message *g = (gsd_message *)msg->buf;
-	gsd_message_to_host(g);
-	return gsd_message_action(g);
-}
-
-int gsd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-	      unsigned long arg)
-{
-	gsd_ioc gsd_data;
-	int ret = 0;
-	gsd_message g;
-	int response = 0;
-	struct inode *to = NULL;
-	struct file *file = NULL;
-
-	switch (cmd) {
-	    case GSD_IOC_CREATE_GROUP:
-		    memset(&gsd_data, 0, sizeof(gsd_ioc));
-		    ret = copy_from_user(&gsd_data, (gsd_ioc *)arg,
-				         sizeof(gsd_ioc));
-		   
-		    file = fget(gsd_data.fd); 
-		    if (!file || !file->f_dentry || !file->f_dentry->d_inode) { 
-			    ret = -EINVAL;
-			    break; 
-		    } 
-		    to = file->f_dentry->d_inode;
-
-		    g.action = GSD_ACTION_ADD_GROUP;
-		    g.from = gsd_node_num;
-		    g.namelen = gsd_data.namelen;
-		    memcpy(g.name, gsd_data.name, gsd_data.namelen);
-
-		    if (to == gsd_inode) { 
-			    /* create the group locally */
-			    ret = gsd_message_action(&g);
-		    } else { 
-			    /* create the group on remote node */
-			    gsd_message_to_net(&g);
-			    ret = net_send_message(GSD_MESSAGE, 0, &g,
-					    	   sizeof(g), to, &response); 
-			    if (ret == 0) 
-				    ret = response;
-		    }
-
-		    memset(&gsd_data, 0, sizeof(gsd_ioc));
-		    gsd_data.status = ret;
-		    ret = copy_to_user((gsd_ioc *)arg, &gsd_data,
-				       sizeof(gsd_ioc));
-		    break;
-
-	    case GSD_IOC_ADD_GROUP_NODE:
-		    memset(&gsd_data, 0, sizeof(gsd_ioc));
-		    ret = copy_from_user(&gsd_data, (gsd_ioc *)arg,
-				         sizeof(gsd_ioc));
-		   
-		    file = fget(gsd_data.fd); 
-		    if (!file || !file->f_dentry || !file->f_dentry->d_inode) { 
-			    ret = -EINVAL;
-			    break; 
-		    } 
-		    to = file->f_dentry->d_inode;
-
-		    g.action = GSD_ACTION_ADD_GROUP_NODE;
-		    g.from = gsd_node_num;
-		    g.namelen = gsd_data.namelen;
-		    memcpy(g.name, gsd_data.name, gsd_data.namelen);
-
-		    if (to == gsd_inode) {
-			    /* create the group locally */
-			    ret = gsd_message_action(&g);
-		    } else { 
-			    /* create the group on remote node */
-			    gsd_message_to_net(&g);
-			    ret = net_send_message(GSD_MESSAGE, 0, &g,
-						   sizeof(g), to, &response); 
-			    if (ret == 0) 
-				    ret = response;
-		    }
-		    memset(&gsd_data, 0, sizeof(gsd_ioc));
-		    gsd_data.status = ret;
-		    ret = copy_to_user((gsd_ioc *)arg, &gsd_data,
-				       sizeof(gsd_ioc));
-		    break;
-	    default:
-		    BUG();
-		    break;
-	}
-
-	if (file)
-		fput(file);
-
-	return ret;
-}				/* net_ioctl */
-
-int gsd_setup(void)
-{
-	int ret;
-
-	gsd_node_num = nm_this_node(NULL);
-	if (gsd_node_num >= NM_MAX_NODES) {
-		printk("local nm node number not initialized!\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	gsd_inode = nm_get_node_by_num(gsd_node_num);
-	if (!gsd_inode) {
-		printk("local nm node inode not initialized!\n");
-		return -1;
-	}
-
-	/* need this stupidity until I can divorce the actual nm actions
-	 * from the output they send to their user buffer */
-	gsd_handler_buf = (char *) __get_free_page(GFP_KERNEL);
-	if (!gsd_handler_buf) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = net_register_handler(GSD_MESSAGE, 0, 0, sizeof(gsd_message),
-				   gsd_message_handler, NULL);
-out:
-	if (ret)
-		if (gsd_inode) {
-			iput(gsd_inode);
-			gsd_inode = NULL;
-		}
-
-	return ret;
-}
-
-void gsd_teardown(void)
-{
-	free_page((unsigned long)gsd_handler_buf);
-	if (gsd_inode) {
-		iput(gsd_inode);
-		gsd_inode = NULL;
-	}
-}
-

Deleted: trunk/fs/ocfs2/cluster/gsd.h
===================================================================
--- trunk/fs/ocfs2/cluster/gsd.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/gsd.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef CLUSTER_GSD_H
-#define CLUSTER_GSD_H
-
-int gsd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-	      unsigned long arg);
-
-int gsd_setup(void);
-void gsd_teardown(void);
-
-#endif

Modified: trunk/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/heartbeat.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -1,12 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
- * heartbeat.c
+ * Copyright (C) 2004, 2005 Oracle.  All rights reserved.
  *
- * Keeps track of alive nodes in the cluster.
- *
- * Copyright (C) 2004 Oracle.  All rights reserved.
- *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -22,11 +18,10 @@
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
- * Authors: Kurt Hackel
+ * TODO:
+ * 	- make sure attributes can't be written to after object commital 
  */
 
-
-
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
@@ -61,12 +56,13 @@
 #include <linux/file.h>
 #include <linux/bitops.h>
 #include <linux/kthread.h>
+#include "usysfs.h"
 
 #include <asm/uaccess.h>
+#include <asm/bitops.h>
 
 #include "cl_compat.h"
 #include "util.h"
-#include "clcommon.h"
 
 #include "heartbeat.h"
 #include "tcp.h"
@@ -79,32 +75,21 @@
 #define __user
 #endif
 
+static void hb_do_callbacks(int type, struct nm_node *node, int idx);
 
-static void hb_teardown(void);
-static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u8 idx);
-static void hb_nm_group_node_del_cb(void *ptr1, void *ptr2, u8 idx);
-static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u8 idx);
-static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u8 idx);
-static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits, 
-				 u32 blocks, u64 start);
-static ssize_t write_disk(struct file *file, char *buf, size_t size);
-static void hb_do_callbacks(int type, void *ptr1, void *ptr2, int idx);
-static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate);
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx);
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx);
-static int hb_do_disk_heartbeat(void *page);
-static int hb_thread(void *data);
-static void hb_complete_thread(void);
-static int hb_launch_thread(void);
-static int hb_fill_node_bytemap(struct inode *group, void *map, int size);
+/* 
+ * The first heartbeat pass had one global thread that would serialize all hb
+ * callback calls.  This global serializing sem should only be removed once
+ * we've made sure that all callees can deal with being called concurrently
+ * from multiple hb region threads.
+ */
+static DECLARE_RWSEM(hb_callback_sem);
+/*
+ * region setup and teardown races with node_fill_map here.  We use
+ * the callback sem to protect them.
+ */
+static LIST_HEAD(hb_active_regions);
 
-
-/* globals */
-static spinlock_t hb_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(hb_net_groups);
-static LIST_HEAD(hb_disk_groups);
-static struct task_struct *hb_task = NULL;
-
 static struct hb_callback {
 	struct list_head list;
 	struct semaphore sem;
@@ -119,8 +104,38 @@
 #define hbprintk0(x)           
 #endif
 
+#define HB_THREAD_MS                  2000   // every 2 seconds
 
+struct hb_disk_slot {
+	struct buffer_head	*ds_bh;
+	u64			ds_block;
+	u8			ds_node_num;
+	unsigned long		ds_last_time;
+	u16			ds_margin;
+	/* the single hb-thread only ever touches these items, no locking */
+	struct list_head	ds_dead_item;
+	struct list_head	ds_alive_item;
+};
 
+/* each thread owns a region.. when we're asked to tear down the region
+ * we ask the thread to stop, who cleans up the region */
+struct hb_region {
+	struct kobject		hr_kobj;
+	struct list_head	hr_active_item;
+	struct task_struct 	*hr_task;
+	u64			hr_block_bytes;
+	u64			hr_block_bits;
+	u64			hr_start_block;
+	u8			hr_blocks;
+
+	struct block_device	*hr_bdev;
+	struct hb_disk_slot	*hr_slots;
+	/* a single hb-thread writer and many fill_node readers are protected */
+	rwlock_t		hr_slot_list_lock;
+	struct list_head	hr_alive_list;
+	struct list_head	hr_dead_list;
+};
+
 static void hb_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 {
 	if (uptodate)
@@ -132,24 +147,17 @@
 	unlock_buffer(bh);
 }
 
-
-
-static int hb_do_node_down(struct inode *group, struct inode *node, int idx)
+static int hb_do_node_down(struct nm_node *node, int idx)
 {
-	//int ret;
-	hbprintk("hb_do_node_down: group=%lu, node=%lu\n", group->i_ino, 
-		 node->i_ino);
-	hbprintk("NOT removing node from group\n");
-	//ret = nm_remove_node_from_group(group, node);
-	hb_do_callbacks(HB_NODE_DOWN_CB, group, node, idx);
+	hbprintk("hb_do_node_down:  node=%u\n", node->nd_num);
+	hb_do_callbacks(HB_NODE_DOWN_CB, node, idx);
 	return 0;
 }
 
-static int hb_do_node_up(struct inode *group, struct inode *node, int idx)
+static int hb_do_node_up(struct nm_node *node, int idx)
 {
-	hbprintk("hb_do_node_up: group=%lu, node=%lu\n", group->i_ino, 
-		 node->i_ino);
-	hb_do_callbacks(HB_NODE_UP_CB, group, node, idx);
+	hbprintk("hb_do_node_up: node=%u\n", node->nd_num);
+	hb_do_callbacks(HB_NODE_UP_CB, node, idx);
 	return 0;
 }
 
@@ -167,28 +175,27 @@
 }
 
 /* 
- * hb_init_disk_hb_group() sets disk->blocksize_bits which defines the block
- * size that we'll use to calculate block offsets.  The blocksize of the
- * device might change under us, though.  this detects when that happens
- * and tries to lookup the bh again with the newer blocksize.  The exciting
- * wrinkle here is that its fatal to call __getblk() with the wrong block
- * size in some 2.6 kernels.
+ * hr_region's block_bytes attribute defines the block size that we'll use to
+ * calculate block offsets.  The blocksize of the device might change under us,
+ * though.  this detects when that happens and tries to lookup the bh again
+ * with the newer blocksize.  The exciting wrinkle here is that its fatal to
+ * call __getblk() with the wrong block size in some 2.6 kernels.
  *
- * We only ever use a few bytes of the block so it is ok that we return
- * a smaller bh than what is implied by blocksize_bits.  We're just fixing
- * up addressing here.
+ * We only ever use a few bytes of the block so it is ok that we return a
+ * smaller bh than what is implied by blocksize_bits.  We're just fixing up
+ * addressing here.
  */
-static struct buffer_head *hb_getblk(int orig_blkno, cluster_disk *disk)
+static struct buffer_head *hb_getblk(struct block_device *bdev, int orig_blkno,
+				     int bits)
 {
 	/* XXX getblk() takes an int block in 2.4 :/ */
 	int blkno;
-	int bits, dev_bits;
+	int dev_bits;
 	struct buffer_head *bh = NULL;
 
-	bits = disk->blocksize_bits;
 	blkno = orig_blkno;
 
-	dev_bits = ocfs_dev_bits(disk->dev);
+	dev_bits = bdev->bd_inode->i_blkbits;
 	if (dev_bits < 0)
 		goto out;
 
@@ -197,697 +204,586 @@
 	else if (dev_bits > bits)
 		blkno >>= dev_bits - bits;
 
-	bh = getblk(disk->dev, blkno, 1 << dev_bits);
+	bh = __getblk(bdev, blkno, 1 << dev_bits);
 out:
 	return bh;
 }
 
-static struct buffer_head *hb_get_locked_mapped(hb_disk_slot *slot,
-						int ino,
-						cluster_disk *disk)
+static struct buffer_head *hb_get_locked_mapped(struct hb_region *reg,
+						struct hb_disk_slot *slot)
 {
 	struct buffer_head *bh = NULL;
 
-	bh = slot->bh;
+	bh = slot->ds_bh;
 	if (bh) {
 		lock_buffer(bh);
 		if (buffer_mapped(bh))
 			goto out;
-		slot->bh = NULL;
+		slot->ds_bh = NULL;
 		unlock_buffer(bh);
 		brelse(bh);
 	}
 
-	slot->bh = hb_getblk(ino + disk->start_block, disk);
-	if (slot->bh) {
-		bh = slot->bh;
+	slot->ds_bh = hb_getblk(reg->hr_bdev, slot->ds_block,
+				reg->hr_block_bits);
+	if (slot->ds_bh) {
+		bh = slot->ds_bh;
 		lock_buffer(bh);
 	}
 out:
 	return bh;
 }
 
-static int hb_do_disk_heartbeat(void *page)
+static void hb_do_disk_heartbeat(struct hb_region *reg)
 {
-	nm_group_inode_private *priv;
-	struct inode *group, *node;
-	struct list_head *iter;
+	struct nm_node *node;
 	struct buffer_head *bh;
-	hb_disk_slot *slot;
+	struct hb_disk_slot *slot;
 	hb_disk_heartbeat_block *hb_block;
-	int rw, ino, idx, ret, i;
-	struct inode **dead_nodes, **live_nodes;
-	LIST_HEAD(tmplist);
-	cluster_disk *disk;
+	/* only need to worry about locking when we touch the reg lists
+	 * which fill_node_map sees.  otherwise only we touch these
+	 * lists and the slot items */
+	LIST_HEAD(newborn);
+	LIST_HEAD(deceased);
 	u64 cputime;
+	int i, rw;
 
-	// NM_MAX_NODES is 255
-	dead_nodes = page;
-	live_nodes = page + (sizeof(struct inode *) * 256);
-	
-	spin_lock(&hb_lock);
-	list_splice_init(&hb_disk_groups, &tmplist);
-	spin_unlock(&hb_lock);
+	/* first we clear uptodate on other nodes slots and write our slot */
+	for(i = 0; i < reg->hr_blocks; i++) {
+		slot = &reg->hr_slots[i];
 
-	list_for_each(iter, &tmplist) {
-		priv = list_entry(iter, nm_group_inode_private, disk_list);
-		group = priv->inode;
-		disk = &priv->disk;
+		bh = hb_get_locked_mapped(reg, slot);
+		if (bh == NULL) {
+			hbprintk("getblk failed in slot %d!\n", i);
+			continue;
+		}
 
-		memset(page, 0, PAGE_SIZE);
-		down(&group->i_sem);
+		if (slot->ds_node_num == nm_this_node()) {
+			memset(bh->b_data, 0, bh->b_size);
+			hb_block = (hb_disk_heartbeat_block *)bh->b_data;
+			BUG_ON(bh->b_size < sizeof(*hb_block));
 
-		idx = 0;
-		while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
+			/* TODO: time stuff */
+			cputime = OCFS_CURRENT_SECONDS;
+			if (!cputime)
+				cputime = 1;
+			hb_block->time = cpu_to_le64(cputime);
 
-			node = slot->inode;
-			if (!node) {
-				hbprintk("no inode in slot %d!\n", idx);
-				idx++;
-				continue;
-			}
-			ino = nm_get_node_global_index(node);
+			set_buffer_uptodate(bh);
+			clear_buffer_dirty(bh);
+			rw = WRITE;
+		} else {
+			clear_buffer_uptodate(bh);
+			rw = READ;
+		}
 
-			bh = hb_get_locked_mapped(slot, ino, disk);
-			if (bh == NULL) {
-				hbprintk("getblk failed in slot %d!\n",
-					 idx);
-				idx++;
-				continue;
-			}
-			
-			if (ino == nm_this_node(group)) {
-				memset(bh->b_data, 0, bh->b_size);
-				hb_block= (hb_disk_heartbeat_block *)bh->b_data;
-				BUG_ON(bh->b_size < sizeof(*hb_block));
+		bh->b_end_io = hb_end_buffer_io_sync;
+		hb_submit_bh(rw, bh);
+	}
 
-				/* TODO: time stuff */
-				cputime = OCFS_CURRENT_SECONDS;
-				if (!cputime)
-					cputime = 1;
-				hb_block->time = cpu_to_le64(cputime);
+	/* now we read again and see what other nodes have done */
+	for(i = 0; i < reg->hr_blocks; i++) {
+		/* never, ever, generate events for our node */
+		if (i == nm_this_node())
+			continue;
+		slot = &reg->hr_slots[i];
 
-				set_buffer_uptodate(bh);
-				clear_buffer_dirty(bh);
-				rw = WRITE;
-			} else {
-				clear_buffer_uptodate(bh);
-				rw = READ;
-			}
-
-			bh->b_end_io = hb_end_buffer_io_sync;
-			hb_submit_bh(rw, bh);
-			idx++;
+		bh = slot->ds_bh;
+		if (bh == NULL) {
+			hbprintk("no bh in slot %d!\n", i);
+			continue;
 		}
-	
-		idx = 0;
-		while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
-			bh = slot->bh;
-			if (!bh) {
-				hbprintk("no bh in slot %d!\n", idx);
-				idx++;
-				continue;
-			}
+		wait_on_buffer(bh);
+		hb_block = (hb_disk_heartbeat_block *)bh->b_data;
 
-			node = slot->inode;
-			if (!node) {
-				hbprintk("no inode in slot %d!\n", idx);
-				idx++;
-				continue;
-			}
+		cputime = le64_to_cpu(hb_block->time);
+		if (slot->ds_last_time != cputime) {
+			/* the node is active */
+			if (!list_empty(&slot->ds_dead_item))
+				list_del_init(&slot->ds_dead_item);
 
-			ino = nm_get_node_global_index(node);
+			if (list_empty(&slot->ds_alive_item))
+				list_add_tail(&slot->ds_alive_item, &newborn);
 
-			wait_on_buffer(bh);
-			hb_block = (hb_disk_heartbeat_block *)bh->b_data;
+			slot->ds_last_time = cputime;
+			slot->ds_margin = HB_DISK_MARGIN;
+			continue;
+		} 
 
-			/* TODO: time stuff */
-			cputime = le64_to_cpu(hb_block->time);
-			if (slot->last_time != cputime) {
-				if (slot->state == HB_NODE_STATE_INIT) {
-					hbprintk("first time for this node!\n");
-					live_nodes[ino] = node;
-					slot->state = HB_NODE_STATE_UP;
-				}
-				node->i_atime = mk_inode_time(cputime, 0);
-				slot->last_time = cputime;
-				slot->margin = HB_DISK_MARGIN;
-				hb_do_callbacks(HB_NODE_RESPONDED_CB, 
-						group, node, idx);
-			} else {
-				slot->margin--;
-				hbprintk("node %d missed.  margin=%d\n", 
-					 ino, slot->margin);
-			}
+		/* only tick down on idlw nodes that we think are alive.
+		 * this stops us from getting a sea of node down events for
+		 * nodes that have never been active. */
+		if (!list_empty(&slot->ds_dead_item) ||
+		    list_empty(&slot->ds_alive_item))
+			continue;
 
-			if (ino != nm_this_node(group) && slot->margin <= 0) {
-				hbprintk("node %d JUST DIED!!!!\n", ino);
-				dead_nodes[ino] = node;
-				slot->state = HB_NODE_STATE_DOWN;
-			}
-			idx++;
+		/* decrease slot margin to zero as long as we don't
+		 * see any updates */
+		if (slot->ds_margin) {
+			if (--slot->ds_margin)
+				continue;
 		}
 
-		up(&group->i_sem);
+		/* ok, margin is 0, it's really dead */ 
+		if (list_empty(&slot->ds_dead_item)) {
+			hbprintk("node %d JUST DIED!!!!\n", i);
+			list_add_tail(&slot->ds_dead_item, &deceased);
+		}
+	}
 
-		/* Do holding group i_sem while doing node-up/down.
-		 * Changes may need to be made to the group, so 
-		 * i_sem will be needed... */
-		for (i=0; i<NM_MAX_NODES; i++) {
-			if (live_nodes[i])
-				ret = hb_do_node_up(group, live_nodes[i], i);
-			else if (dead_nodes[i])
-				ret = hb_do_node_down(group, dead_nodes[i], i);
+	/* we're the only thing that modifies the lists, we don't have to lock
+	 * while we're just reading them.  the write locks protect the
+	 * fill_node_map readers.  */
+	list_for_each_entry(slot, &newborn, ds_alive_item) {
+		node = nm_get_node_by_num(slot->ds_node_num);
+		if (node == NULL) {
+			hbprintk("saw hb for node %d but don't have a node\n",
+				 slot->ds_node_num);
+			continue;	
 		}
+		hb_do_node_up(node, slot->ds_node_num);
+		nm_node_put(node);
 	}
-	
-	spin_lock(&hb_lock);
-	list_splice(&tmplist, &hb_disk_groups);
-	spin_unlock(&hb_lock);
-	return 0;
+	list_for_each_entry(slot, &deceased, ds_dead_item) {
+		node = nm_get_node_by_num(slot->ds_node_num);
+		if (node == NULL) {
+			hbprintk("node %d went down but don't have a node\n",
+				 slot->ds_node_num);
+			continue;	
+		}
+		hb_do_node_down(node, slot->ds_node_num);
+		nm_node_put(node);
+	}
+
+	write_lock(&reg->hr_slot_list_lock);
+	list_splice_init(&newborn, &reg->hr_alive_list);
+	list_splice_init(&deceased, &reg->hr_dead_list);
+	write_unlock(&reg->hr_slot_list_lock);
 }
 
+/*
+ * we ride the region ref that the region dir holds.  before the region
+ * dir is removed and drops it ref it will wait to tear down this
+ * thread.
+ */
 static int hb_thread(void *data)
 {
-	void *page = data;
+	struct hb_region *reg = data;
 
 	hbprintk("hb thread running\n");
 	
 	while (!kthread_should_stop()) {
-		hb_do_disk_heartbeat(page);
+		hb_do_disk_heartbeat(reg);
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(msecs_to_jiffies(HB_THREAD_MS));
 	}
 
 	hbprintk("hb thread exiting\n");
-	free_page((unsigned long)page);
+
 	return 0;
 }
 
-/* Launch the hb thread for the mounted volume */
-static int hb_launch_thread(void)
+void hb_init(void)
 {
-	void *page;
-	int ret;
+	int i;
 
-	page = (void *)__get_free_page(GFP_KERNEL);
-	if (!page) {
-		ret = -ENOMEM;
-		goto out;
+	for (i = 0; i < ARRAY_SIZE(hb_callbacks); i++) {
+		INIT_LIST_HEAD(&hb_callbacks[i].list);
+		init_MUTEX(&hb_callbacks[i].sem);
 	}
+}
 
-	hbprintk("starting hb thread...\n");
-	hb_task = kthread_run(hb_thread, page, "hb_thread");
-	if (IS_ERR(hb_task)) {
-		hb_task = NULL;
-		hbprintk("unable to launch hb thread, error=%ld",
-			 PTR_ERR(hb_task));
-		ret = -EINVAL;
-		goto out;
+/*
+ * get a map of all nodes that are heartbeating in any regions
+ */
+void hb_fill_node_map(unsigned long *map, unsigned bytes)
+{
+	struct hb_region *reg;
+	struct hb_disk_slot *slot;
+
+	BUG_ON(bytes < (BITS_TO_LONGS(NM_MAX_NODES) * sizeof(unsigned long)));
+
+	memset(map, 0, bytes);
+
+	/* callers want to serialize this map and callbacks so that they
+	 * can trust that they don't miss nodes coming to the party */
+	down_read(&hb_callback_sem);
+
+	list_for_each_entry(reg, &hb_active_regions, hr_active_item) {
+		read_lock(&reg->hr_slot_list_lock);
+		list_for_each_entry(slot, &reg->hr_alive_list, ds_alive_item)
+			set_bit(slot->ds_node_num, map);
+		read_unlock(&reg->hr_slot_list_lock);
 	}
 
-	/* hb_thread is responsible for freeing the page if it runs */
-	page = NULL;
-	ret = 0;
+	up_read(&hb_callback_sem);
 
-out:
-	if (page)
-		free_page((unsigned long)page);
-	return ret;
+	/* our node is Always Up */
+	set_bit(nm_this_node(), map);
 }
+EXPORT_SYMBOL(hb_fill_node_map);
 
-static void hb_complete_thread(void)
+/* 
+ * heartbeat usysfs bits.  The heartbeat set is a default set under
+ * the cluster set in nodemanager.c.
+ */
+
+static struct hb_region *to_hb_region(struct kobject *kobj)
 {
-	if (hb_task) {
-		hbprintk("waiting for hb thread to exit\n");
-		kthread_stop(hb_task);
-		hb_task = NULL;
-	}
+	return kobj ? container_of(kobj, struct hb_region, hr_kobj) : NULL;
 }
 
-static int hb_init_disk_hb_group(struct inode *group, dev_t dev, u32 bits, 
-				 u32 blocks, u64 start)
+/* drop_object only drops its ref after killing the thread, nothing should
+ * be using the region anymore.  this has to clean up any state that
+ * attributes might have built up. */
+static void hb_region_release(struct kobject *kobj)
 {
-	int ret = -EINVAL;
-	cluster_disk *disk;
-	nm_group_inode_private *priv;
+	struct hb_region *reg = to_hb_region(kobj);
+	printk("releasing reg %p\n", reg);
 
-	priv = group->u.generic_ip;
-	if (!priv)
-		goto leave;
+	if (reg->hr_bdev)
+		blkdev_put(reg->hr_bdev);
+	kfree(reg->hr_slots); /* might be null if never activated */
+	kfree(reg);
+}
 
-	if (priv->state == NM_GROUP_READY)
-		return 0;
+static ssize_t hb_region_block_bytes_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%lld\n", (long long)reg->hr_block_bytes);
+}
 
-	/* hold an extra ref as long as hb keeps track of the group */
-	igrab(group);
+static ssize_t hb_region_block_bytes_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
+{
+	unsigned long long tmp;
+	char *p = (char *)page;
 
-	disk = &priv->disk;
-	if (blocks > NM_MAX_NODES)
-	       blocks = NM_MAX_NODES;
-	disk->dev = dev;
-	disk->blocksize_bits = bits;
-	disk->num_blocks = blocks;
-	disk->start_block = start;
-	util_init_rarray(&disk->slots, sizeof(hb_disk_slot));
+	tmp = simple_strtoull(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
 
-	/* start allowing group additions */
-	ret = nm_make_group_ready(group);
+	/* XXX probably very stupid. */
+	if (tmp >= 65536) /* just so we can use hweight16 */
+		return -ERANGE;
+	if (hweight16(tmp) != 1)
+		return -EINVAL;
 
-leave:
-	if (ret < 0)
-		iput(group);
+	/* XXX compare blocks against dev later on commit? */
+	reg->hr_block_bytes = tmp;
+	/* XXX is this right? */
+	reg->hr_block_bits = ffs(reg->hr_block_bytes) - 1;
 
-	return ret;
+	return count;
 }
-	
+static ssize_t hb_region_start_block_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%lld\n", (long long)reg->hr_start_block);
+}
 
-static ssize_t write_disk(struct file *file, char *buf, size_t size)
+static ssize_t hb_region_start_block_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
 {
-	hb_op *data;
-	struct inode *group = NULL;
-	struct file *filp = NULL;
-	dev_t dev;
-	int ret, tmpret;
-	nm_group_inode_private *priv;
-	u8 tmpmap[NM_MAX_NODES];
-	
-	hbprintk("write_disk\n");
+	unsigned long long tmp;
+	char *p = (char *)page;
 
-        if (size < sizeof(*data))
-                return -EINVAL;
-	data = (hb_op *) buf; if (data->magic != HB_OP_MAGIC)
+	tmp = simple_strtoull(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
 		return -EINVAL;
 
-	switch (data->opcode)
-	{
-		case HB_OP_START_DISK_HEARTBEAT:
-			if (data->bits < 9 || data->bits > 12) {
-				ret = sprintf(buf, "%d: bad blocksize bits! %u",
-					      -EINVAL, data->bits);
-				break;
-			}
-			group = nm_get_group_by_num(data->group_num);
-			if (!group || !group->u.generic_ip) {
-				ret = sprintf(buf, "%d: bad group number! %u", 
-					      -EINVAL, data->group_num);
-				break;
-			}
-			priv = group->u.generic_ip;
-			if (strncmp(priv->disk.uuid, data->disk_uuid, 
-				    CLUSTER_DISK_UUID_LEN) != 0) {
-				ret = sprintf(buf, "%d: bad disk uuid!", 
-					      -EINVAL);
-				break;
-			}
-			filp = fget(data->fd);
-			if (!filp) {
-				ret = sprintf(buf, "%d: bad fd!", -EINVAL);
-				break;
-			}
-			dev = filp->f_dentry->d_inode->i_rdev;
-			tmpret = hb_init_disk_hb_group(group, dev, data->bits, 
-						       data->blocks, 
-						       data->start);
-			if (tmpret < 0) {
-				fput(filp);
-				ret = sprintf(buf, "%d: failed to init disk "
-					      "heartbeat for group %u!", 
-					      -EINVAL, data->group_num);
-			} else {
-				ret = sprintf(buf, "0: disk heartbeat started "
-					      "for group %u!", data->group_num);
-			}
-			break;
+	/* XXX compare blocks against dev later on commit? */
+	reg->hr_start_block = tmp;
 
-		case HB_OP_GET_NODE_MAP:
-			group = nm_get_group_by_num(data->group_num);
-			if (!group || !group->u.generic_ip) {
-				ret = sprintf(buf, "%d: bad group number! %u", 
-					      -EINVAL, data->group_num);
-				break;
-			}
-
-			memset(tmpmap, 0, sizeof(tmpmap));
-			if ((ret = hb_fill_node_bytemap(group, tmpmap,
-							NM_MAX_NODES)) == 0) {
-				ret = sprintf(buf, "0: ");
-				buf += ret;
-				memcpy(buf, tmpmap, sizeof(tmpmap));
-				ret += sizeof(tmpmap);
-			} else {
-				ret = sprintf(buf, "%d: error occurred in "
-					      "hb_fill_node_bytemap", ret);
-			}
-			break;
-
-		default:
-			ret = sprintf(buf, "%d: bad opcode! %u", -EINVAL, 
-				      data->opcode);
-			break;
-	}
-
-	if (group)
-		iput(group);
-	
-	return ret;
+	return count;
 }
+static ssize_t hb_region_blocks_read(struct hb_region *reg, char *page)
+{
+	return sprintf(page, "%d\n", reg->hr_blocks);
+}
 
+static ssize_t hb_region_blocks_write(struct hb_region *reg,
+					   const char *page,
+					   size_t count)
+{
+	unsigned long long tmp;
+	char *p = (char *)page;
 
-extern struct file_operations transaction_ops;
+	tmp = simple_strtoull(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
 
-/*----------------------------------------------------------------------------*/
-/*
- *	populating the filesystem.
- */
-static int hb_fill_super(struct super_block * sb, void * data, int silent)
-{
-	int ret;
-	struct TA_write_ops *ops;
-	static struct tree_descr hb_files[] = {
-		[HB_Disk] = {".disk", &transaction_ops, S_IWUSR},
-		/* last one */ {""}
-	};
-	
-	ops = kmalloc(sizeof(struct TA_write_ops) +
-		      (sizeof(ops->write_op[0])), 
-		      GFP_KERNEL);
-	if (!ops)
+	if (tmp >= NM_MAX_NODES)
+		return -ERANGE;
+
+	reg->hr_slots = kcalloc(tmp, sizeof(struct hb_disk_slot), GFP_KERNEL);
+	if (reg->hr_slots == NULL)
 		return -ENOMEM;
 
-	ops->num_ops = HB_WriteOpArraySize;
-	ops->write_op[HB_Disk] = write_disk;
+	reg->hr_blocks = tmp;
 
-	hbprintk("calling simple_fill_super...\n");
-	ret = simple_fill_super(sb, 0x5551212f, hb_files);
-	if (ret >= 0)
-		TA_GENERIC_SB_MEMBER(sb) = ops;
-	else 
-		kfree(ops);
-	return ret;
+	return count;
 }
 
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *hb_get_sb(struct file_system_type *fs_type,
-				     int flags,
-				     const char *dev_name,
-				     void *data)
+static ssize_t hb_region_dev_read(struct hb_region *reg, char *page)
 {
-	return get_sb_single(fs_type, flags, data, hb_fill_super);
+	const char *str = bdevname(reg->hr_bdev, page);
+	return sprintf(page, "%s\n", str);
 }
 
-static struct file_system_type hb_fs_type = {
-	.owner          = THIS_MODULE,
-	.name           = "hb",
-	.get_sb         = hb_get_sb,
-	.kill_sb        = kill_anon_super,
-};
-#else
-static struct super_block *hb_read_super(struct super_block *sb,
-					 void *data,
-					 int silent)
+/* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */
+static ssize_t hb_region_dev_write(struct hb_region *reg, const char *page,
+				   size_t count)
 {
-	hbprintk("welcome to hb_read_super!!!\n");
-	return (hb_fill_super(sb, data, silent) < 0) ? NULL : sb;
-}
-static DECLARE_FSTYPE (hb_fs_type, "hb", hb_read_super, FS_SINGLE|FS_LITTER);
-#endif
+	long fd;
+	char *p = (char *)page;
+	struct file *filp = NULL;
+	struct inode *inode = NULL;
+	ssize_t ret = -EINVAL;
+	struct hb_disk_slot *slot;
+	int i;
 
-/* TODO: make callbacks all return int */
-static void hb_nm_group_node_add_cb(void *ptr1, void *ptr2, u8 idx)
-{
-	hb_disk_slot *slot;
-	struct inode *group = ptr1;
-	struct inode *node = ptr2;
-	cluster_disk *disk;
-	nm_group_inode_private *priv;
-	int ino, ret = 0;
-	u64 block;
+	fd = simple_strtol(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		goto out;
 
-	hbprintk("hb_nm_group_node_add_cb: group=%lu, node=%lu, idx=%u\n",
-	       group->i_ino, node->i_ino, idx);
+	if (fd < 0 || fd >= INT_MAX)
+		goto out;
 
-	down(&group->i_sem);	
-	priv = group->u.generic_ip;
-	if (!priv) {
-		hbprintk("eek! bad group inode!\n");
-		goto leave;
-	}
-	disk = &priv->disk;
-	if (disk->uuid[0]) {
-		ret = util_resize_rarray(&disk->slots, idx+1);
-		if (ret < 0) {
-			hbprintk("eeeeeeek!!!! failed to resize disk state "
-				 "data\n");
-			goto leave;
-		}
-	
-		ino = nm_get_node_global_index(node);
-		if (ino > disk->num_blocks) {
-			hbprintk("disk heartbeat area does not have enough "
-				 "blocks!\n");
-			goto leave;
-		}
-		block = ino + disk->start_block;
-	
-		slot = util_rarray_idx_to_slot(&disk->slots, idx);
-		if (!slot) {
-			hbprintk("eeeeeeek!!!! failed to get disk state data "
-				 "pointer: %d\n", idx);
-			goto leave;
-		}
-		slot->inode = igrab(node);
-		slot->last_time = 0;
-		slot->margin = HB_INITIAL_DISK_MARGIN;
-#warning needs to change for 2.6
-		slot->bh = hb_getblk(block, disk);
-		slot->state = HB_NODE_STATE_INIT;
-	} else {
-		hbprintk("doing nothing for group add for non-disk heartbeat "
-			 "group\n");
-	}
-	
-leave:
-	up(&group->i_sem);
-	return;	
-}
+	filp = fget(fd);
+	if (filp == NULL)
+		goto out;
 
-static void hb_nm_group_node_del_cb(void *ptr1, void *ptr2, u8 idx)
-{
-	hb_disk_slot *slot;
-	struct inode *group = ptr1;
-	struct inode *node = ptr2;
-	cluster_disk *disk;
-	nm_group_inode_private *priv;
-	int ret = -EINVAL;
+	if (reg->hr_blocks == 0 || reg->hr_slots == NULL)
+		goto out;
 
-	hbprintk("hb_nm_group_node_del_cb: group=%lu, node=%lu, idx=%u\n",
-	       group->i_ino, node->i_ino, idx);
+	inode = igrab(filp->f_mapping->host);
+	if (inode == NULL)
+		goto out;
+	reg->hr_bdev = I_BDEV(filp->f_mapping->host);
 
-	down(&group->i_sem);
-	priv = group->u.generic_ip;
-	if (!priv) {
-		hbprintk("eek! bad group inode!\n");
-		goto leave;
+	ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0);
+	if (ret) {
+		reg->hr_bdev = NULL;
+		goto out;
 	}
-	disk = &priv->disk;
-	slot = util_rarray_idx_to_slot(&disk->slots, idx);
-	if (!slot) {
-		hbprintk("eeeeeeek!!!! failed to get disk state data "
-			 "pointer: %d\n", idx);
-		goto leave;
+	inode = NULL;
+
+	for(i = 0; i < reg->hr_blocks; i++) {
+		slot = &reg->hr_slots[i];
+		slot->ds_block = reg->hr_start_block + i;
+		slot->ds_node_num = i;
+		slot->ds_margin = HB_INITIAL_DISK_MARGIN;
+		INIT_LIST_HEAD(&slot->ds_alive_item);
+		INIT_LIST_HEAD(&slot->ds_dead_item);
 	}
-	if (slot->inode!=node) {
-		hbprintk("eeeeeeek!!!! node inode changed!\n");
-		goto leave;
+
+	reg->hr_task = kthread_run(hb_thread, reg, "hb-%s",
+				   reg->hr_kobj.k_name);
+	if (IS_ERR(reg->hr_task)) {
+		reg->hr_task = NULL;
+		goto out;
 	}
-	iput(node);
-	if (slot->bh) {
-		wait_on_buffer(slot->bh);
-		brelse(slot->bh);
-	}
-	memset(slot, 0, sizeof(hb_disk_slot));
-	ret = 0;
-leave:
 
-	up(&group->i_sem);
-	hbprintk("hb_nm_group_node_del_cb done: %d\n", ret);
-	return;
-}
+	down_write(&hb_callback_sem);
+	list_add_tail(&reg->hr_active_item, &hb_active_regions);
+	up_write(&hb_callback_sem);
 
-static void hb_nm_node_add_cb(void *ptr1, void *ptr2, u8 idx)
-{
-	//struct inode *node = ptr1;
+	ret = count;
+out:
+	if (filp)
+		fput(filp);
+	if (inode)
+		iput(inode);
+	if (ret < 0) {
+		if (reg->hr_bdev) {
+			blkdev_put(reg->hr_bdev);
+			reg->hr_bdev = NULL;
+		}
+	}
+	return ret;
 }
+struct hb_region_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct hb_region *, char *);
+	ssize_t (*store)(struct hb_region *, const char *, size_t);
+};
 
-static void hb_nm_group_add_cb(void *ptr1, void *ptr2, u8 idx)
-{
-	struct inode *group = ptr1;
-	nm_group_inode_private *priv;
+static struct hb_region_attribute hb_region_attr_block_bytes = {
+	.attr	= { .name = "block_bytes", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_block_bytes_read,
+	.store	= hb_region_block_bytes_write,
+};
+static struct hb_region_attribute hb_region_attr_start_block = {
+	.attr	= { .name = "start_block", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_start_block_read,
+	.store	= hb_region_start_block_write,
+};
+static struct hb_region_attribute hb_region_attr_blocks = {
+	.attr	= { .name = "blocks", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_blocks_read,
+	.store	= hb_region_blocks_write,
+};
+static struct hb_region_attribute hb_region_attr_dev = {
+	.attr	= { .name = "dev", .mode = S_IRUGO | S_IWUSR },
+	.show	= hb_region_dev_read,
+	.store	= hb_region_dev_write,
+};
 
-	hbprintk("hb_nm_group_add_cb: group=%lu, idx=%u\n",
-	       group->i_ino, idx);
-	
-	priv = group->u.generic_ip;
-	if (!priv) {
-		hbprintk("eek! bad group inode!\n");
-		return;
-	}
+static struct attribute *hb_region_default_attrs[] = {
+	&hb_region_attr_block_bytes.attr,
+	&hb_region_attr_start_block.attr,
+	&hb_region_attr_blocks.attr,
+	&hb_region_attr_dev.attr,
+	NULL,
+};
 
-	spin_lock(&hb_lock);
-	list_add_tail(&priv->net_list, &hb_net_groups);
-	if (priv->disk.uuid[0]) {
-		hbprintk("adding priv=%p inode=%p to disk group list\n", 
-			 priv, group);
-		list_add_tail(&priv->disk_list, &hb_disk_groups);
-	}
-	spin_unlock(&hb_lock);
+static ssize_t hb_region_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *page)
+{
+	struct hb_region *reg = to_hb_region(kobj);
+	struct hb_region_attribute *hb_region_attr =
+		container_of(attr, struct hb_region_attribute, attr);
+	ssize_t ret = 0;
+
+	if (hb_region_attr->show)
+		ret = hb_region_attr->show(reg, page);
+	return ret;
 }
 
-static int __init init_hb(void)
+static ssize_t hb_region_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *page, size_t count)
 {
-	int retval=-1, i;
-	hbprintk("loading heartbeat module: nodename is %s\n", nm_nodename);
+	struct hb_region *reg = to_hb_region(kobj);
+	struct hb_region_attribute *hb_region_attr =
+		container_of(attr, struct hb_region_attribute, attr);
+	ssize_t ret = -EINVAL;
 
-	if (proc_mkdir("cluster/heartbeat", 0)) {
-		// ???
-	}
+	if (hb_region_attr->store)
+		ret = hb_region_attr->store(reg, page, count);
+	return ret;
+}
 
-	//hb_net_timestamps = __get_free_page(GFP_KERNEL);
-	//if (!hb_net_timestamps)
-	//	goto done;
+struct sysfs_ops hb_region_sysfs_ops = {
+	.show	= &hb_region_show,
+	.store	= &hb_region_store,
+};
 
-	for (i=HB_NODE_DOWN_CB; i<HB_NUM_CB; i++) {
-		INIT_LIST_HEAD(&hb_callbacks[i].list);
-		init_MUTEX(&hb_callbacks[i].sem);
-	}
+static struct ukobj_type hb_region_type = {
+	.ktype	= {
+		.release	= hb_region_release,
+		.sysfs_ops	= &hb_region_sysfs_ops,
+		.default_attrs	= hb_region_default_attrs,
+	},
+	.owner	= THIS_MODULE,
+};
 
-	if (nm_register_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb))
-		goto done;
-	if (nm_register_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb))
-		goto done;
-	if (nm_register_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb))
-		goto done;
-	if (nm_register_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb))
-		goto done;
- 
-	if (hb_launch_thread() < 0)
-		goto done;
- 
-	retval = register_filesystem(&hb_fs_type);
-done:
-	if (retval)
-		hb_teardown();
-	return retval;
-}
+/* heartbeat set */
 
-static void __exit exit_hb(void)
+struct hb_heartbeat_set {
+	struct ukset hs_ukset;
+	/* some stuff? */
+};
+
+static struct hb_heartbeat_set *to_hb_heartbeat_set(struct kset *kset)
 {
-	hb_complete_thread();
-	hb_teardown();
-	unregister_filesystem(&hb_fs_type);
-	hbprintk("unloading heartbeat module\n");
+	return kset ?
+		container_of(to_ukset(kset), struct hb_heartbeat_set, hs_ukset)
+		: NULL;
 }
 
-static void hb_teardown(void)
+static struct kobject *hb_heartbeat_set_make_object(struct kset *kset,
+						    const char *name)
 {
-	nm_unregister_callback(NM_GROUP_NODE_DEL_CB, hb_nm_group_node_del_cb);
-	nm_unregister_callback(NM_GROUP_NODE_ADD_CB, hb_nm_group_node_add_cb);
-	nm_unregister_callback(NM_NODE_ADD_CB, hb_nm_node_add_cb);
-	nm_unregister_callback(NM_GROUP_ADD_CB, hb_nm_group_add_cb);
-	remove_proc_entry("cluster/heartbeat", NULL);
+	struct hb_region *reg = NULL;
+	struct kobject *ret = NULL;
 
-	/* XXX make sure that we're not being called from any more active
-	 * nm callbacks, then teardown hb_callbacks */
-	//if (hb_net_timestamps)
-	//	kfree(hb_net_timestamps);
-}
+	printk("trying to make a heartbeat object\n");
 
-MODULE_LICENSE("GPL");
-module_init(init_hb)
-module_exit(exit_hb)
+	reg = kcalloc(1, sizeof(struct hb_region), GFP_KERNEL);
+	if (reg == NULL)
+		goto out; /* ENOMEM */
 
-/*
- * hb_fill_node_bytemap()
- * 255 bytes... each byte set to 0 (not mounted) or 1 (mounted)
- *
- */
-static int hb_fill_node_bytemap(struct inode *group, void *map, int size)
-{
-	hb_disk_slot *slot;
-	int idx = 0;
-	nm_group_inode_private *priv;
-	u8 *bytemap = (u8 *)map;
-	
-	priv = group->u.generic_ip;
+	INIT_LIST_HEAD(&reg->hr_active_item);
+	rwlock_init(&reg->hr_slot_list_lock);
+	INIT_LIST_HEAD(&reg->hr_alive_list);
+	INIT_LIST_HEAD(&reg->hr_dead_list);
 
-	down(&group->i_sem);
+	kobject_set_name(&reg->hr_kobj, name);
+	reg->hr_kobj.ktype = &hb_region_type.ktype;
+	kobject_init(&reg->hr_kobj);
 
-	if (priv->disk.uuid[0]) {
-		while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
-			if (idx >= size-1) {
-				hbprintk("map size (%d) too small for "
-					 "index (%d)\n", size, idx);
-				up(&group->i_sem);
-				return -EINVAL;
-			}
-			if (slot->state == HB_NODE_STATE_UP)
-				bytemap[idx] = 1;
-			idx++;
-		}
-	} else {
-		hbprintk("filling straight from slot bitmap for non-disk "
-			 "heartbeat group\n");
-		idx = 0;
-		while ((idx = find_next_bit(priv->slot_bitmap, NM_MAX_NODES,
-					    idx)) != -1) {
-			if (idx == NM_MAX_NODES)
-				break;
-			bytemap[idx] = 1;
-		}
-	}
+	ret = &reg->hr_kobj;
 
-	up(&group->i_sem);
+out:
+	if (ret == NULL)
+		kfree(reg);
 
-	return 0;
+	return ret;
 }
 
-int hb_fill_node_map(struct inode *group, void *map, int size)
+static void hb_heartbeat_set_drop_object(struct kset *kset,
+				         struct kobject *kobj)
 {
-	hb_disk_slot *slot;
-	int idx = 0;
-	nm_group_inode_private *priv;
-	
-	priv = group->u.generic_ip;
+	struct hb_region *reg = to_hb_region(kobj);
 
-	memset(map, 0, size);
-	down(&group->i_sem);
+	printk("dropping hr_region %p\n", reg);
 
-	if (priv->disk.uuid[0]) {
-		while ((slot = nm_iterate_group_disk_slots(group, &idx))) {
-			if (idx >= size-1) {
-				hbprintk("map size (%d) too small for "
-					 "index (%d)\n", size, idx);
-				up(&group->i_sem);
-				return -EINVAL;
-			}
-			if (slot->state == HB_NODE_STATE_UP)
-				set_bit(idx, map);
-			idx++;
-		}
-	} else {
-		hbprintk("filling straight from slot bitmap for non-disk "
-			 "heartbeat group\n");
-		memcpy(map, priv->slot_bitmap, size);
+	down_read(&hb_callback_sem);
+	if (!list_empty(&reg->hr_active_item))
+		list_del_init(&reg->hr_active_item);
+	up_read(&hb_callback_sem);
+
+	/* stop the thread when the user removes the region dir */
+	if (reg->hr_task) {
+		kthread_stop(reg->hr_task);
+		reg->hr_task = NULL;
 	}
 
-	up(&group->i_sem);
+	kobject_put(kobj);
+}
 
-	return 0;
+struct ukobj_type hb_heartbeat_set_type = {
+	.ktype = {
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_object	= hb_heartbeat_set_make_object,
+	.drop_object	= hb_heartbeat_set_drop_object,
+	.owner		= THIS_MODULE,
+};
+
+/* this is just here to avoid touching ukset in heartbeat.h which the
+ * entire damn world #includes */
+struct kset *hb_alloc_hb_set(void)
+{
+	struct hb_heartbeat_set *hs = NULL;
+	struct kset *ret = NULL;
+
+	hs = kcalloc(1, sizeof(struct hb_heartbeat_set), GFP_KERNEL);
+	if (hs == NULL)
+		goto out;
+
+	ukset_init_type_name(&hs->hs_ukset, "heartbeat",
+			     &hb_heartbeat_set_type.ktype);
+
+	ret = &hs->hs_ukset.kset;
+out:
+	if (ret == NULL)
+		kfree(hs);
+	return ret;
 }
-EXPORT_SYMBOL(hb_fill_node_map);
 
+void hb_free_hb_set(struct kset *kset)
+{
+	struct hb_heartbeat_set *hs = to_hb_heartbeat_set(kset);
+	kfree(hs);
+}
+
+
+/* hb callback registration and issueing */
+
 static struct hb_callback *hbcall_from_type(int type)
 {
 	if (type < HB_NODE_DOWN_CB || type >= HB_NUM_CB)
@@ -964,7 +860,7 @@
 }
 EXPORT_SYMBOL(hb_unregister_callback);
 
-static void hb_do_callbacks(int type, void *ptr1, void *ptr2, int idx)
+static void hb_do_callbacks(int type, struct nm_node *node, int idx)
 {
 	struct list_head *iter;
 	struct hb_callback_func *f;
@@ -974,15 +870,19 @@
 	if (IS_ERR(hbcall))
 		return;
 
+	/* XXX not interruptible?  this is in the hb thread.. */
+	down_write(&hb_callback_sem);
+
 	if (down_interruptible(&hbcall->sem)) {
 		hbprintk("missed hb callback(%d) due to EINTR!\n", type);
-		return;
+		goto out;
 	}
 	
 	list_for_each(iter, &hbcall->list) {
 		f = list_entry(iter, struct hb_callback_func, hc_item);
-		(f->hc_func)(ptr1, ptr2, idx, f->hc_data);
+		(f->hc_func)(node, idx, f->hc_data);
 	}
-
 	up(&hbcall->sem);
+out:
+	up_write(&hb_callback_sem);
 }

Modified: trunk/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- trunk/fs/ocfs2/cluster/heartbeat.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/heartbeat.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -30,48 +30,15 @@
 
 #include "ocfs2_heartbeat.h"
 
-enum {
-	HB_NODE_STATE_INIT = 0,
-	HB_NODE_STATE_DOWN,
-	HB_NODE_STATE_UP
-};
-
-struct _heartbeat_ctxt
-{
-	int dummy;
-};
-
-typedef struct _hb_disk_slot
-{
-	struct inode *inode;
-	struct buffer_head *bh;
-	struct list_head list;
-	unsigned long last_time;
-	u16 margin;
-	u16 state;
-} hb_disk_slot;
-
-
-
-#define HB_THREAD_MS                  2000   // every 2 seconds
-
-
-enum {
-	HB_TYPE_DISK = 0,
-	HB_TYPE_NET
-};
-
-
 /* callback stuff */
-
 enum {
 	HB_NODE_DOWN_CB = 0,
 	HB_NODE_UP_CB,
-	HB_NODE_RESPONDED_CB,    // this one is very chatty
 	HB_NUM_CB
 };
 
-typedef void (hb_cb_func)(struct inode *, struct inode *, int, void *);
+struct nm_node;
+typedef void (hb_cb_func)(struct nm_node *, int, void *);
 
 struct hb_callback_func {
 	struct list_head	hc_item;
@@ -81,28 +48,18 @@
 	int			hc_type;
 };
 
-enum {
-	HB_Root = 1,
-	HB_Disk,
-	HB_WriteOpArraySize
-};
-
-
-// number of initial allowed misses 
-#define HB_INITIAL_DISK_MARGIN     60
-#define HB_INITIAL_NET_MARGIN      60
-
 // number of allowed misses in steady state
+#define HB_INITIAL_DISK_MARGIN     60
 #define HB_DISK_MARGIN             30
-#define HB_NET_MARGIN              30
 
+struct kset *hb_alloc_hb_set(void);
+void hb_free_hb_set(struct kset *kset);
 
 void hb_setup_callback(struct hb_callback_func *hc, int type, hb_cb_func *func,
 		      void *data, int priority);
 int hb_register_callback(struct hb_callback_func *hc);
 int hb_unregister_callback(struct hb_callback_func *hc); 
-int hb_fill_node_map(struct inode *group, void *map, int size);
+void hb_fill_node_map(unsigned long *map, unsigned bytes);
+void hb_init(void);
 
-
-
 #endif /* CLUSTER_HEARTBEAT_H */

Modified: trunk/fs/ocfs2/cluster/nodemanager.c
===================================================================
--- trunk/fs/ocfs2/cluster/nodemanager.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/nodemanager.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -1,12 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
- * nodemanager.c
+ * Copyright (C) 2004, 2005 Oracle.  All rights reserved.
  *
- * totally lame static node management placeholder
- *
- * Copyright (C) 2004 Oracle.  All rights reserved.
- *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -21,14 +17,10 @@
  * License along with this program; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel
  */
 
-
 #include "cl_compat.h"
 #include "util.h"
-#include "clcommon.h"
 
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -64,6 +56,7 @@
 #include <linux/pagemap.h>
 #include <linux/hash.h>
 #include <linux/bitops.h>
+#include <linux/kobject.h>
 
 #include <asm/uaccess.h>
 
@@ -73,116 +66,17 @@
 
 #include "compat_libfs.h"
 #include "transaction_file.h"
+#include "usysfs.h"
 
 #ifndef __user
 #define __user
 #endif
 
+/* for now we operate under the assertion that there can be only one
+ * cluster active at a time.  Changing this will require trickling
+ * cluster references throughout where nodes are looked up */
+static struct nm_cluster *nm_single_cluster = NULL;
 
-/*
- * This nm module is similar to nfsd/nfsctl.c in that it uses
- * transaction files (in /proc/cluster/nm) to communicate with
- * the kernel module instead of ioctls or other means.
- *
- * Files involved:
- *  /proc/cluster/nm/cluster - used to create/destroy cluster, adds 
- *                             nodes/groups to the cluster, queries info
- *                             about the cluster
- *  /proc/cluster/nm/group   - adds/removes nodes from a group, queries
- *                             info about a group
- *  /proc/cluster/nm/node    - changes info for a node, queries info about
- *                             a node
- *
- * This nm implementation basically allows this node to live in exactly one 
- * cluster.  All "clustered" nodes that are known to this node should be
- * added to the cluster, and all nodes should see the same list of nodes in
- * the same order at all times.  The "slot" number given to a node in this 
- * global cluster list is fixed and never changes.  Groups can be dynamically
- * created within a cluster (TODO: currently static only) and be made up of 
- * one or more nodes (listed at most once) in the global list.  A node may exist
- * in many groups.  Also, a group may have an optional disk UUID which is simply
- * stored for later use by the heartbeat service.  (The heartbeat service will
- * do disk heartbeating only for those groups with valid UUIDs.)  
- *
- * USAGE:
- * For our purposes, the nm service can be autoloaded by an fstab entry or 
- * manually through mount (mount -t nm none /proc/cluster/nm).  Once that is 
- * done, an init script (or single executable on an initrd) should be run to 
- * create the static cluster info, possibly from a file like /etc/nm.conf or 
- * similar.  We should probably create a "dlm" or "everyone" group (with NO disk
- * heartbeating) so that the dlm service can be used with the network only.  
- * This group should contain all known nodes.  After this is done, the net, hb 
- * and dlm modules can come up. The nm service is now ready for use, since 
- * groups don't need to be created till later.
- * 
- * A group services daemon can be written (by someone!? ;-) to run at this 
- * point. Since the "dlm" group has everything it needs for full dlmming (since
- * it uses only network), the dlm itself can be used to arbitrate for group 
- * creation, and additions/deletions from groups.  Callbacks should be 
- * registered with nm by other services that care on each of these events.  For
- * instance, heartbeat should register a callback with nm for group creation, 
- * and addition and deletion from a group so that it can make any necessary 
- * changes to its heartbeating (primarily so that it can begin/end disk 
- * heartbeat for any group/node that needs it).
- *   
- * NOTE NOTE NOTE !!!!:
- * This is intended to be a quickie implementation.  (translation: lame)  I do
- * not want to step on anyone's toes who may have implemented something wayyy 
- * better.  If something out there "wins", we will plug into that instead.  If 
- * nothing really takes off, we at least have a (lame) reference to work off of.
- * However, since this implementation exists solely to make ocfs2 work, and one
- * of the major advantages of ocfs version 1 was ease of setup, we don't want 
- * to move to something substantially more complicated than this (one conf 
- * file).
- *
- */ 
-
-
-
-/* globals */
-nm_cluster cluster;
-struct super_block *single_sb;
-char *nm_nodename;
-EXPORT_SYMBOL(nm_nodename);
-static spinlock_t nm_lock = SPIN_LOCK_UNLOCKED;
-static DECLARE_MUTEX(nm_cb_sem);
-struct list_head nm_callbacks[NM_NUM_CB];
-
-
-static void nm_teardown(void);
-static int nm_create_cluster(char *buf);
-static void nm_init_cluster(nm_cluster *cluster);
-int nm_create_node(char *buf, nm_op *data);
-int nm_name_cluster(char *buf, nm_op *data);
-int nm_destroy_cluster(char *buf);
-int nm_get_cluster_num_nodes(char *buf);
-int nm_get_cluster_num_groups(char *buf);
-int nm_get_node_info(char *buf, nm_op *data);
-int nm_get_group_info(char *buf, nm_op *data);
-nm_cluster *nm_get_cluster(void);
-struct inode *nm_get_node_by_name(char *node_name);
-static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx);
-
-/* support for adding files, dirs, hardlinks in /proc/cluster/nm/... */
-extern struct file_operations simple_dir_operations;
-extern struct inode_operations simple_dir_inode_operations;
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request);
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent, struct tree_descr *file, int ino);
-
-static ssize_t write_node(struct file *file, char *buf, size_t size);
-static ssize_t write_group(struct file *file, char *buf, size_t size);
-static ssize_t write_cluster(struct file *file, char *buf, size_t size);
-
-static u8 nm_get_group_index(struct inode *group, struct inode *inode, struct dentry **child);
-
-#define NM_HASH_BITS     7
-#define NM_HASH_SIZE     (1 << NM_HASH_BITS)
-#define NM_HASH_MASK     (NM_HASH_SIZE - 1)
-
-
-
 #define nmprintk(x, arg...)    printk("(nm:%d) " x, current->pid, ##arg)
 #define nmprintk0(x)           printk("(nm:%d) " x, current->pid)
 #if 0
@@ -190,1171 +84,603 @@
 #define nmprintk0(x)
 #endif
 
-static struct list_head *nm_ip_hash = NULL;
-static spinlock_t nm_ip_hash_lock;
+struct nm_cluster {
+	struct ukset	cl_ukset;
+	unsigned	cl_has_local:1;
+	u8		cl_local_node;
+	rwlock_t	cl_nodes_lock;
+	struct nm_node  *cl_nodes[NM_MAX_NODES];
+	struct rb_root	cl_node_ip_tree;
+};
 
-static int nm_init_ip_hash(void);
-static void nm_destroy_ip_hash(void);
+struct nm_node * nm_get_node_by_num(u8 node_num)
+{
+	struct nm_node *node = NULL;
 
+	if (node_num >= NM_MAX_NODES || nm_single_cluster == NULL)
+		goto out;
 
-static void nm_destroy_ip_hash(void)
-{
-	int i;
-	if (!nm_ip_hash)
-		return;
-	for (i=0; i<NM_HASH_SIZE; i++) {
-		/* TODO: cleanup */
-	}
-	free_page((unsigned long)nm_ip_hash);
+	read_lock(&nm_single_cluster->cl_nodes_lock);
+	node = nm_single_cluster->cl_nodes[node_num];
+	if (node) /* XXX get a ref */
+		;
+	read_unlock(&nm_single_cluster->cl_nodes_lock);
+out:
+	return node;
 }
+EXPORT_SYMBOL(nm_get_node_by_num);
 
-static int nm_init_ip_hash(void)
+static struct nm_node * nm_node_ip_tree_lookup(struct nm_cluster *cluster,
+					      u32 ip_needle,
+					      struct rb_node ***ret_p,
+					      struct rb_node **ret_parent)
 {
-	int i;
-	
-	if ((PAGE_SIZE / sizeof(struct list_head)) < NM_HASH_SIZE) {
-		nmprintk("eek!  hash size too big for this arch!\n");
-		BUG();
-	}
+        struct rb_node **p = &cluster->cl_node_ip_tree.rb_node;
+        struct rb_node *parent = NULL;
+	struct nm_node *node, *ret = NULL;
 
-	nm_ip_hash = (struct list_head *) __get_free_page(GFP_KERNEL);
-	if (!nm_ip_hash)
-		return -ENOMEM;
-	for (i=0; i<NM_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&nm_ip_hash[i]);
-	spin_lock_init(&nm_ip_hash_lock);
-	return 0;
-}
+        while (*p) {
+                parent = *p;
+                node = rb_entry(parent, struct nm_node, nd_ip_node);
 
+                if (ip_needle < node->nd_ipv4_address)
+                        p = &(*p)->rb_left;
+                else if (ip_needle > node->nd_ipv4_address)
+                        p = &(*p)->rb_right;
+                else {
+			ret = node;
+                        break;
+		}
+        }
+                                                                                
+        if (ret_p != NULL)
+                *ret_p = p;
+        if (ret_parent != NULL)
+                *ret_parent = parent;
 
-
-
-
-static inline int nm_find_next_slot(void *bitmap, int max, int request)
-{
-	int start = 0, slot_num;
-	if (request != NM_INVALID_SLOT_NUM)
-		start = request;
-	slot_num = find_next_zero_bit (bitmap, max, start);
-	if (slot_num >= max)
-		return -1;
-	if (request != NM_INVALID_SLOT_NUM && slot_num != request)
-		return -1;
-	set_bit(slot_num, bitmap);
-	return slot_num;
+        return ret;
 }
 
-
-
-
-static struct dentry * nm_add_file(struct super_block *s, struct dentry *parent,
-				   struct tree_descr *file, int ino)
+struct nm_node * nm_get_node_by_ip(u32 addr)
 {
-	struct qstr name;
-	struct dentry *dentry = ERR_PTR(-EINVAL);
-	struct inode *inode;
+	struct nm_node *node = NULL;
+	struct nm_cluster *cluster = nm_single_cluster;
 
-	if (!file->name)
+	if (cluster == NULL)
 		goto out;
-	name.name = file->name;
-	name.len = strlen(name.name);
-	nmprintk("adding file %.*s\n", name.len, name.name);
-	name.hash = full_name_hash(name.name, name.len);
-	dentry = d_alloc(parent, &name);
-	if (!dentry) {
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode = new_inode(s);
-	if (!inode) {
-		dput(dentry);
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode->i_mode = file->mode;
-	inode->i_uid = inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
-	inode->i_blocks = 0;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	if (file->mode & S_IFDIR) {
-		inode->i_op = &simple_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-	} else {
-		inode->i_fop = file->ops;
-	}		    
-	inode->i_ino = ino;
-	insert_inode_hash(inode);
-	d_add(dentry, inode);
 
+	read_lock(&cluster->cl_nodes_lock);
+	node = nm_node_ip_tree_lookup(cluster, addr, NULL, NULL);
+	if (node) /* XXX get a ref */
+		;
+	read_unlock(&cluster->cl_nodes_lock);
+
 out:
-	return dentry;
+	return node;
 }
+EXPORT_SYMBOL(nm_get_node_by_ip);
 
+void nm_node_put(struct nm_node *node)
+{
+	/* XXX do something */
+}
+EXPORT_SYMBOL(nm_node_put);
 
-static struct dentry * nm_add_link(struct super_block *s, struct dentry *parent,
-				   struct tree_descr *file, int ino)
+u8 nm_this_node(void)
 {
-	struct qstr name;
-	struct dentry *dentry = ERR_PTR(-EINVAL);
-	struct inode *inode;
+	u8 node_num = NM_MAX_NODES;
 
-	if (!file->name)
-		goto out;
-	name.name = file->name;
-	name.len = strlen(name.name);
-	nmprintk("adding link %.*s\n", name.len, name.name);
-	name.hash = full_name_hash(name.name, name.len);
-	dentry = d_alloc(parent, &name);
-	if (!dentry) {
-		nmprintk("failed to d_alloc\n");
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode = iget(s, ino);
-	if (!inode) {
-		nmprintk("failed to iget\n");
-		dput(dentry);
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	if (!inode->u.generic_ip) {
-		nmprintk("bad inode: %d\n", ino);
-		iput(inode);
-		dput(dentry);
-		dentry = ERR_PTR(-EINVAL);
-		goto out;
-	}
-	inode->i_nlink++;
-	d_add(dentry, inode);
+	if (nm_single_cluster && nm_single_cluster->cl_has_local)
+		node_num = nm_single_cluster->cl_local_node;
 
-out:
-	return dentry;
+	return node_num;
 }
+EXPORT_SYMBOL(nm_this_node);
 
+/* node usysfs bits */
 
+static struct nm_cluster *to_nm_cluster(struct kobject *kobj)
+{
+	return kobj ?
+		container_of(to_ukset(to_kset(kobj)), struct nm_cluster,
+			     cl_ukset)
+		: NULL;
+}
 
+static struct nm_node *to_nm_node(struct kobject *kobj)
+{
+	return kobj ? container_of(kobj, struct nm_node, nd_kobj) : NULL;
+}
 
+static void nm_node_release(struct kobject *kobj)
+{
+	struct nm_node *node = to_nm_node(kobj);
+	printk("releasing node %p\n", node);
+}
 
-/* cluster, node and group transaction files. 
- * here's where the actual work of nm takes place. */
+static ssize_t nm_node_num_read(struct nm_node *node, char *page)
+{
+	return sprintf(page, "%d\n", node->nd_num);
+}
 
-static int nm_create_cluster(char *buf)
+struct nm_cluster *to_nm_cluster_from_node(struct nm_node *node)
 {
-	int ret = -EINVAL;
-	
-	nmprintk("create cluster...\n");
-	
-	spin_lock(&nm_lock);
-	if (cluster.state == NM_CLUSTER_UP) {
-		ret = sprintf(buf, "%d: cluster already up\n", -EINVAL);
-	} else {
-		cluster.state = NM_CLUSTER_UP;
-		ret = sprintf(buf, "0: cluster state: UP");
-	}
-	spin_unlock(&nm_lock);
-	return ret;
+	/* through the first node_set .parent
+	 * mycluster/nodes/mynode == nm_cluster->nm_node_set->nm_node */
+	return to_nm_cluster(node->nd_kobj.parent->parent);
 }
 
-
-
-int nm_create_group(char *buf, nm_op *data)
+static ssize_t nm_node_num_write(struct nm_node *node, const char *page,
+				 size_t count)
 {
-	struct tree_descr desc;
-	struct dentry *dentry = NULL;
-	struct inode *inode = NULL;
-	int ino, group_num;
-	int ret = -EINVAL;
-	nm_group_inode_private *g = NULL;
+	struct nm_cluster *cluster = to_nm_cluster_from_node(node);
+	unsigned long tmp;
+	char *p = (char *)page;
 
-	nmprintk("create group...\n");
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
 
-	data->arg_u.gc.name[NM_MAX_NAME_LEN] = '\0';
-	inode = nm_get_group_by_name(data->arg_u.gc.name);
-	if (inode) {
-		ret = sprintf(buf, "%d: group %u (%s) already exists", -EEXIST, 
-			      nm_get_group_global_index(inode), 
-			      data->arg_u.gc.name);
-		iput(inode);
-		return ret;
-	}
+	if (tmp >= NM_MAX_NODES)
+		return -ERANGE;
 
-	group_num = data->arg_u.gc.group_num;
-	if (group_num > NM_INVALID_SLOT_NUM)
-		goto leave;
-
-	spin_lock(&cluster.bitmap_lock);
-	group_num = nm_find_next_slot(&(cluster.group_bitmap[0]), 255, 
-				      group_num);
-	spin_unlock(&cluster.bitmap_lock);
-
-	if (group_num < 0) {
-		nmprintk("out of group slots!\n");
-		goto leave;
+	write_lock(&cluster->cl_nodes_lock);
+	if (cluster->cl_nodes[tmp])
+		p = NULL;
+	else  {
+		cluster->cl_nodes[tmp] = node;
+		node->nd_num = tmp;
 	}
+	write_unlock(&cluster->cl_nodes_lock);
+	if (p == NULL)
+		return -EEXIST;
 
-	ino = group_num + NM_GROUP_INODE_START;
-
-	desc.name = data->arg_u.gc.name;
-	desc.ops = NULL;
-	desc.mode = S_IFDIR | 0755;
-	dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
-	if (IS_ERR(dentry))
-		goto leave;
-	inode = igrab(dentry->d_inode);
-	if (!inode) {
-		nmprintk("igrab failed!\n");
-		goto leave;
-	}
-		
-	g = kmalloc(sizeof(nm_group_inode_private), GFP_KERNEL);
-	if (!g) 
-		goto leave;
-
-	memset(g, 0, sizeof(nm_group_inode_private));
-	memcpy(g->disk.uuid, data->arg_u.gc.disk_uuid, CLUSTER_DISK_UUID_LEN);
-	spin_lock_init(&g->bitmap_lock);
-	if (g->disk.uuid[0])
-		g->state = NM_GROUP_NOT_READY;
-	else
-		g->state = NM_GROUP_READY;
-	g->inode = inode;
-	inode->u.generic_ip = g;
-
-	ret = sprintf(buf, "0: group %u (%s) added, uuid: %s", group_num,
-		      data->arg_u.gc.name, g->disk.uuid);
-	nm_do_callbacks(NM_GROUP_ADD_CB, inode, NULL, group_num);
-
-leave:
-	if (ret < 0) {
-		if (inode) {
-			if (inode->u.generic_ip)
-				kfree(inode->u.generic_ip);
-			iput(inode);
-		}
-		if (dentry)
-			dput(dentry);
-	}
-	return ret;
+	return count;
 }
-EXPORT_SYMBOL(nm_create_group);
+static ssize_t nm_node_ipv4_port_read(struct nm_node *node, char *page)
+{
+	return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port));
+}
 
-int nm_create_node(char *buf, nm_op *data)
+static ssize_t nm_node_ipv4_port_write(struct nm_node *node, const char *page,
+				 size_t count)
 {
-	struct tree_descr desc;
-	struct dentry *dentry = NULL;
-	struct inode *inode = NULL;
-	int ino, node_num, bucket;
-	int ret = -EINVAL;
-	nm_node_inode_private *n = NULL;
-	struct page *page = NULL;
+	unsigned long tmp;
+	char *p = (char *)page;
 
-	nmprintk("add cluster node ...\n");
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
 
-	data->arg_u.node.node_name[NM_MAX_NAME_LEN] = '\0';
-	inode = nm_get_node_by_name(data->arg_u.node.node_name);
-	if (inode) {
-		ret = sprintf(buf, "%d: node %u (%s) already exists", -EEXIST, 
-			      nm_get_node_global_index(inode), 
-			      data->arg_u.node.node_name);
-		iput(inode);
-		return ret;
-	}
+	if (tmp == 0)
+		return -EINVAL;
+	if (tmp >= (u16)-1)
+		return -ERANGE;
 
-	node_num = data->arg_u.node.node_num;
-	if (node_num > NM_INVALID_SLOT_NUM) {
-		nmprintk("bad node_num: %d\n", node_num);
-		goto leave;
-	}
+	node->nd_ipv4_port = htons(tmp);
 
-	spin_lock(&cluster.bitmap_lock);
-	node_num = nm_find_next_slot(&(cluster.node_bitmap[0]), 255, node_num);
-	spin_unlock(&cluster.bitmap_lock);
-
-	if (node_num < 0) {
-		nmprintk("out of node slots!\n");
-		goto leave;
-	}
-
-	ino = node_num + NM_NODE_INODE_START;
-
-	desc.name = data->arg_u.node.node_name;
-	desc.ops = NULL;
-	desc.mode = S_IFREG | S_IWUSR;
-	dentry = nm_add_file(single_sb, single_sb->s_root, &desc, ino);
-	if (IS_ERR(dentry)) {
-		nmprintk("bad dentry\n");
-		goto leave;
-	}
-	inode = igrab(dentry->d_inode);
-	if (!inode) {
-		nmprintk("igrab failed!\n");
-		goto leave;
-	}
-		
-	n = kmalloc(sizeof(nm_node_inode_private), GFP_KERNEL);
-	if (!n) {
-		nmprintk("could not kmalloc\n");
-		goto leave;
-	}
-	memcpy(&n->node, &data->arg_u.node, sizeof(nm_node_info));
-	INIT_LIST_HEAD(&n->ip_hash);
-
-	spin_lock_init(&n->net.sock_lock);
-	n->net.sock = NULL;
-	n->net.sock_refs = 0;
-	n->net.sock_pending = 0;
-	n->net.defer_release = 0;
-	INIT_LIST_HEAD(&n->net.pending_waiters);
-	init_waitqueue_head(&n->net.waitq);
-	INIT_LIST_HEAD(&n->net.handlers);
-	INIT_LIST_HEAD(&n->net.active_item);
-	n->net.page = NULL;
-	n->net.page_off = 0;
-
-	page = alloc_page(GFP_KERNEL);
-	if (page == NULL) {
-		nmprintk("page allocation failed\n");
-		goto leave;
-	}
-	n->net.page = page;
-
-	/* hash on first ip address */
-	spin_lock(&nm_ip_hash_lock);
-	bucket = hash_long(n->node.ifaces[0].addr_u.ip_addr4, NM_HASH_BITS);
-	list_add_tail(&n->ip_hash, &nm_ip_hash[bucket]);
-	spin_unlock(&nm_ip_hash_lock);
-	nmprintk("hashed ip %d.%d.%d.%d to bucket %d\n", 
-		 NIPQUAD(n->node.ifaces[0].addr_u.ip_addr4), bucket);
-	n->inode = inode;
-	inode->u.generic_ip = n;
-
-	ret = sprintf(buf, "0: node %u (%s) added", node_num, 
-		      n->node.node_name);
-	nm_do_callbacks(NM_NODE_ADD_CB, inode, NULL, node_num);
-
-leave:
-	if (ret < 0) {
-		if (page)
-			__free_page(page);
-		if (inode) {
-			if (inode->u.generic_ip)
-				kfree(inode->u.generic_ip);
-			iput(inode);
-		}
-		if (dentry)
-			dput(dentry);
-	}
-	return ret;
+	return count;
 }
 
-int nm_make_group_ready(struct inode *group)
+static ssize_t nm_node_ipv4_address_read(struct nm_node *node, char *page)
 {
-	nm_group_inode_private *g = group->u.generic_ip;
-	if (!g)
-		return -EINVAL;
-	g->state = NM_GROUP_READY;
-	return 0;
+	return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address));
 }
-EXPORT_SYMBOL(nm_make_group_ready);
 
-int nm_add_node_to_group(char *buf, nm_op *data)
+/* XXX this is acting as commit until commit really lands.. all this will be
+ * hoisted into the commit method */
+static ssize_t nm_node_ipv4_address_write(struct nm_node *node,
+					  const char *page,
+					  size_t count)
 {
-	struct tree_descr desc;
-	struct inode *inode = NULL;
-	struct dentry *dentry = NULL, *child = NULL;
-	nm_group_inode_private *g = NULL;
-	int group_num, slot_num;
-	int ret = -EINVAL;
-	u8 ino;
-	char tmpname[6];
+	struct nm_cluster *cluster = to_nm_cluster_from_node(node);
+	int ret, i;
+	struct rb_node **p, *parent;
+	unsigned int octets[4];
+	u32 ipv4_addr = 0; /* network order */
 
-	nmprintk("add node to group...\n");
+	ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[0], &octets[1],
+		     &octets[2], &octets[3]);
+	if (ret != 4)
+		return -EINVAL;
 
-	group_num = data->arg_u.gc.group_num;
-	ino = data->arg_u.gc.node_num;
-	slot_num = data->arg_u.gc.slot_num;
-
-	/* request a certain slot, or NM_INVALID_SLOT_NUM for any slot */
-	if (slot_num > NM_INVALID_SLOT_NUM)
-		goto leave;
-	
-	if (ino >= NM_INVALID_SLOT_NUM || group_num >= NM_INVALID_SLOT_NUM)
-		goto leave;
-
-       	inode = nm_get_group_by_num(group_num);
-	if (!inode)
-		goto leave;
-	if (list_empty(&inode->i_dentry))
-		goto leave;
-	dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias));
-	if (!dentry)
-		goto leave;
-	g = inode->u.generic_ip;
-	if (!g)
-		goto leave;
-
-	if (g->state == NM_GROUP_NOT_READY) {
-		ret = sprintf(buf, "%d: group disk has not been discovered.  "
-			      "cannot add nodes.", -EROFS);
-		goto leave;
+	for (i = 0; i < ARRAY_SIZE(octets); i++) {
+		if (octets[i] > 255)
+			return -ERANGE;
+		ipv4_addr |= octets[i] << (i * 8);
 	}
 
-	spin_lock(&g->bitmap_lock);
-	slot_num = nm_find_next_slot(&(g->slot_bitmap[0]), 255, slot_num);
-	spin_unlock(&g->bitmap_lock);
-	if (slot_num < 0)
-		goto leave;
-
-	/* create hardlink to ino with name "slot_num" */
-	sprintf(tmpname, "%03u", slot_num);
-	desc.name = &(tmpname[0]);
-	desc.ops = NULL;
-	desc.mode = 0;
-	child = nm_add_link(single_sb, dentry, &desc, 
-			    NM_NODE_INODE_START+ino);
-	if (IS_ERR(child)) {
-		nmprintk("error adding link for %s\n", tmpname);
-		child = NULL;
-		goto leave;
+	ret = 0;
+	write_lock(&cluster->cl_nodes_lock);
+	if (nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent))
+		ret = -EEXIST;
+	else {
+	        rb_link_node(&node->nd_ip_node, parent, p);
+		rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree);
 	}
+	write_unlock(&cluster->cl_nodes_lock);
+	if (ret)
+		return ret;
 
-	ret = sprintf(buf, "0: node %u added to group: %.*s", 
-		      ino, dentry->d_name.len, dentry->d_name.name);
+	memcpy(&node->nd_ipv4_address, &ipv4_addr, sizeof(ipv4_addr));
 
-	if (!igrab(child->d_inode))
-		goto leave;
-	nm_do_callbacks(NM_GROUP_NODE_ADD_CB, inode, child->d_inode, slot_num);
-	iput(child->d_inode);
-
-leave:
-	if (dentry)
-		dput(dentry);
-	if (child)
-		dput(child);
-	if (inode)
-		iput(inode);
-	return ret;
+	return count;
 }
-EXPORT_SYMBOL(nm_add_node_to_group);
+static ssize_t nm_node_local_read(struct nm_node *node, char *page)
+{
+	return sprintf(page, "%d\n", node->nd_local);
+}
 
-int nm_remove_node_from_group(struct inode *group, struct inode *node)
+static ssize_t nm_node_local_write(struct nm_node *node,
+					  const char *page,
+					  size_t count)
 {
-	struct dentry *child = NULL;
-	nm_group_inode_private *g = NULL;
-	int slot_num;
-	int ret = -EINVAL;
+	struct nm_cluster *cluster = to_nm_cluster_from_node(node);
+	unsigned long tmp;
+	char *p = (char *)page;
+	ssize_t ret;
 
-	nmprintk("remove node from group...\n");
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
 
-	slot_num = nm_get_group_index(group, node, &child);
+	tmp = !!tmp; /* boolean of whether this node wants to be local */
 
-	if (slot_num == NM_MAX_NODES || !child)
-		goto leave;
+	/* the only failure case is trying to set a new local node
+	 * when a different one is already set */
+	if (tmp && tmp == cluster->cl_has_local &&
+	    cluster->cl_local_node != node->nd_num)
+			return -EBUSY;
 
-	g = group->u.generic_ip;
-	if (!g)
-		goto leave;
-		
-	nmprintk("killing the dentry now!!\n");
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-	down(&group->i_zombie);
-	node->i_nlink--;
-	d_delete(child);
-	up(&group->i_zombie);
-#else
-	down(&group->i_sem);
-	node->i_nlink--;
-	d_delete(child);
-	up(&group->i_sem);
-#endif
-	nmprintk("done killing the dentry!!\n");
+	/* bring up the rx thread if we're setting the new local
+	 * node.  XXX make sure port/addr are set */
+	if (tmp && !cluster->cl_has_local) {
+		ret = net_start_rx_thread(node);
+		if (ret)
+			return ret;
+	}
 
+	if (!tmp && cluster->cl_has_local &&
+	    cluster->cl_local_node == node->nd_num) {
+		net_stop_rx_thread(node);
+		cluster->cl_local_node = 0;
+	}
 
-	if (!igrab(node))
-		goto leave;
-	nm_do_callbacks(NM_GROUP_NODE_DEL_CB, group, node, slot_num);
-	iput(node);
-	
-	spin_lock(&g->bitmap_lock);
-	clear_bit(slot_num, (void *)(&g->slot_bitmap[0]));
-	spin_unlock(&g->bitmap_lock);
+	node->nd_local = tmp;
+	if (node->nd_local) {
+		cluster->cl_has_local = tmp;
+		cluster->cl_local_node = node->nd_num;
+	}
 
-	ret = 0;
-
-leave:
-	if (child)
-		dput(child);
-	return ret;
+	return count;
 }
 
+struct nm_node_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct nm_node *, char *);
+	ssize_t (*store)(struct nm_node *, const char *, size_t);
+};
 
+static struct nm_node_attribute nm_node_attr_num = {
+	.attr	= { .name = "num", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_num_read,
+	.store	= nm_node_num_write,
+};
+static struct nm_node_attribute nm_node_attr_ipv4_port = {
+	.attr	= { .name = "ipv4_port", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_ipv4_port_read,
+	.store	= nm_node_ipv4_port_write,
+};
+static struct nm_node_attribute nm_node_attr_ipv4_address = {
+	.attr	= { .name = "ipv4_address", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_ipv4_address_read,
+	.store	= nm_node_ipv4_address_write,
+};
+static struct nm_node_attribute nm_node_attr_local = {
+	.attr	= { .name = "local", .mode = S_IRUGO | S_IWUSR },
+	.show	= nm_node_local_read,
+	.store	= nm_node_local_write,
+};
+static struct attribute *nm_node_default_attrs[] = {
+	&nm_node_attr_num.attr,
+	&nm_node_attr_ipv4_port.attr,
+	&nm_node_attr_ipv4_address.attr,
+	&nm_node_attr_local.attr,
+	NULL,
+};
 
-int nm_name_cluster(char *buf, nm_op *data)
+static ssize_t nm_node_show(struct kobject *kobj,
+			    struct attribute *attr,
+			    char *page)
 {
-	int ret = -EINVAL;
+	struct nm_node *node = to_nm_node(kobj);
+	struct nm_node_attribute *nm_node_attr =
+		container_of(attr, struct nm_node_attribute, attr);
+	ssize_t ret = 0;
 
-	nmprintk("name cluster...\n");
-	spin_lock(&nm_lock);
-	if (cluster.state == NM_CLUSTER_UP) {
-		ret = sprintf(buf, "%d: cluster name could not be set.  "
-			      "cluster already up.", -EINVAL);
-		goto leave;
-	}
-	memset(cluster.name, 0, NM_MAX_NAME_LEN+1);
-	memcpy(cluster.name, data->arg_u.name, NM_MAX_NAME_LEN);
-	ret = sprintf(buf, "0: cluster name set: %s", cluster.name);
-leave:
-	spin_unlock(&nm_lock);
+	if (nm_node_attr->show)
+		ret = nm_node_attr->show(node, page);
 	return ret;
 }
 
-int nm_destroy_cluster(char *buf)
+static ssize_t nm_node_store(struct kobject *kobj,
+			      struct attribute *attr,
+			      const char *page, size_t count)
 {
-	int ret;
-	nmprintk("destroy cluster...\n");
+	struct nm_node *node = to_nm_node(kobj);
+	struct nm_node_attribute *nm_node_attr =
+		container_of(attr, struct nm_node_attribute, attr);
+	ssize_t ret = -EINVAL;
 
-	/* TODO */
-	spin_lock(&nm_lock);
-	nm_init_cluster(&cluster);
-	ret = sprintf(buf, "0: rudely destroyed cluster!!!");
-	spin_unlock(&nm_lock);
+	if (nm_node_attr->store)
+		ret = nm_node_attr->store(node, page, count);
 	return ret;
 }
 
+struct sysfs_ops nm_node_sysfs_ops = {
+	.show	= &nm_node_show,
+	.store	= &nm_node_store,
+};
 
-int nm_get_cluster_num_nodes(char *buf)
-{
-	int num_nodes=0, i;
-	
-	nmprintk("get cluster num nodes...\n");
+static struct ukobj_type nm_node_type = {
+	.ktype	= {
+		.release	= nm_node_release,
+		.sysfs_ops	= &nm_node_sysfs_ops,
+		.default_attrs	= nm_node_default_attrs,
+	},
+	.make_object = usysfs_make_no_object,
+	.owner	= THIS_MODULE,
+};
 
-	spin_lock(&cluster.bitmap_lock); 
-	for (i=0; i<ARRAY_SIZE(cluster.node_bitmap); i++)
-		num_nodes += hweight_long(cluster.node_bitmap[i]);
-	spin_unlock(&cluster.bitmap_lock);
+/* node set */
 
-	return sprintf(buf, "0: %d", num_nodes);
-}
+struct nm_node_set {
+	struct ukset ns_ukset;
+	/* some stuff? */
+};
 
-int nm_get_cluster_num_groups(char *buf)
+#if 0
+static struct nm_node_set *to_nm_node_set(struct kset *kset)
 {
-	int num_groups=0, i;
-	
-	nmprintk("get cluster num groups...\n");
-
-	spin_lock(&cluster.bitmap_lock);
-	for (i=0; i<ARRAY_SIZE(cluster.group_bitmap); i++)
-		num_groups += hweight_long(cluster.group_bitmap[i]);
-	spin_unlock(&cluster.bitmap_lock);
-
-	return sprintf(buf, "0: %d", num_groups);
+	return kset ? 
+		container_of(to_ukset(kset), struct nm_node_set, ns_ukset) 
+		: NULL;
 }
+#endif
 
-int nm_get_group_num_nodes(struct inode *group)
+static struct kobject *nm_node_set_make_object(struct kset *kset,
+					      const char *name)
 {
-	int num_nodes=0, i;
-	nm_group_inode_private *g;
-	
-	nmprintk("get group num nodes...\n");
-	
-	g = group->u.generic_ip;
-	if (!g)
-		return -EINVAL;
+	struct nm_node *node = NULL;
+	struct nm_cluster *cluster = to_nm_cluster(kset->kobj.parent);
+	struct kobject *ret = NULL;
+	net_inode_private *nip;
 
-	spin_lock(&g->bitmap_lock);
-	for (i=0; i<ARRAY_SIZE(g->slot_bitmap); i++)
-		num_nodes += hweight_long(g->slot_bitmap[i]);
-	spin_unlock(&g->bitmap_lock);
+	printk("trying to make a node object under cluster %p\n", cluster);
 
-	return num_nodes;
-}
+	if (strlen(name) > NM_MAX_NAME_LEN)
+		goto out; /* ENAMETOOLONG */
 
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx)
-{
-	nm_group_inode_private *priv;
-	int next;
+	node = kcalloc(1, sizeof(struct nm_node), GFP_KERNEL);
+	if (node == NULL)
+		goto out; /* ENOMEM */
 
-	if (*idx >= 255)
-		return NULL;
-	priv = group->u.generic_ip;
-	if (!priv)
-		return NULL;
-	next = find_next_bit(priv->slot_bitmap, 255, *idx);
-	if (next >= 255)
-		return NULL;
-	*idx = next;
-	return util_rarray_idx_to_slot(&priv->disk.slots, next);
-}
-EXPORT_SYMBOL(nm_iterate_group_disk_slots);
+	strcpy(node->nd_name, name); /* use kobj.name instead? */
+	node->nd_num = NM_MAX_NODES;
 
-int nm_get_node_info(char *buf, nm_op *data)
-{
-	int ret, tmpret, i;
-	nm_node_inode_private *priv;
-	nm_network_iface *n;
-	struct inode *inode = NULL;
-	struct dentry *dentry;
-	u8 node_num;
-	u16 vers;
-
-	ret = -EINVAL;
-	node_num = data->arg_u.index;
-	inode = nm_get_node_by_num(node_num);
-	if (inode) {
-		dentry = list_entry(inode->i_dentry.next, struct dentry, 
-				    d_alias);
-		priv = inode->u.generic_ip;
-		ret = sprintf(buf, "0: global_index=%u\n"
-			           "name=%.*s\n",
-				priv->node.node_num, dentry->d_name.len, 
-				dentry->d_name.name);
-		buf += ret;
-		for (i=0; i<NM_MAX_IFACES; i++) {
-			n = &priv->node.ifaces[i];
-			vers = ntohs(n->ip_version);
-			nmprintk("ip_version=%u, vers=%u\n", 
-				 n->ip_version, vers);
-			if (vers!=4 && vers!=6)
-				continue;
-			/* TODO: how to print ipv6? */
-			tmpret = sprintf(buf, "iface%d.port=%u\n"
-				            "iface%d.version=%d\n"
-					    "iface%d.addr=%d.%d.%d.%d\n",
-				      i, ntohs(n->ip_port), i, vers, i,
-				      NIPQUAD(n->addr_u.ip_addr4));
-			buf += tmpret;
-			ret += tmpret;
-		}
-		iput(inode);
+	/* this should be somewhere else */
+	nip = &node->nd_net_inode_private;
+	spin_lock_init(&nip->sock_lock);
+	INIT_LIST_HEAD(&nip->pending_waiters);
+	init_waitqueue_head(&nip->waitq);
+	INIT_LIST_HEAD(&nip->handlers);
+	INIT_LIST_HEAD(&nip->active_item);
+	nip->page = alloc_page(GFP_KERNEL);
+	if (nip->page == NULL) {
+		nmprintk("page allocation failed\n");
+		goto out; /* ENOMEM */
 	}
-	return ret;
-}
 
-int nm_get_group_info(char *buf, nm_op *data)
-{
-	int ret, tmpret;
-	nm_group_inode_private *g = NULL;
-	struct inode *inode = NULL;
-	u8 group_num;
-	struct dentry *dentry, *child;
+	kobject_set_name(&node->nd_kobj, name);
+	node->nd_kobj.ktype = &nm_node_type.ktype;
+	kobject_init(&node->nd_kobj);
 
-	ret = -EINVAL;
-	group_num = data->arg_u.index;
-	inode = nm_get_group_by_num(group_num);
-	if (inode) {
-		g = inode->u.generic_ip;
-		dentry = list_entry(inode->i_dentry.next, struct dentry, 
-				    d_alias);
-		ret = sprintf(buf, "0: group_num=%u\n"
-		        	   "name=%.*s\n"
-				   "disk_uuid=%s\n",
-			      group_num, dentry->d_name.len, 
-			      dentry->d_name.name, g->disk.uuid);
-		buf += ret;
+	ret = &node->nd_kobj;
 
-		spin_lock(&dcache_lock);
-		list_for_each_entry(child, &dentry->d_subdirs, d_child) {
-			tmpret = sprintf(buf, "%.*s\n", child->d_name.len, 
-					 child->d_name.name);
-			buf += tmpret;
-			ret += tmpret;
-		}
-		spin_unlock(&dcache_lock);
-		iput(inode);
-	}
-	return ret;
-}
+out:
+	if (ret == NULL)
+		kfree(node);
 
-	
-
-static ssize_t write_cluster(struct file *file, char *buf, size_t size)
-{
-	nm_op *data;
-	int ret;
-	u8 me;
-	
-	nmprintk("write_cluster\n");
-
-        if (size < sizeof(*data))
-                return -EINVAL;
-        data = (nm_op *) buf;
-	if (data->magic != NM_OP_MAGIC)
-		return -EINVAL;
-
-	switch (data->opcode) {
-		case NM_OP_CREATE_CLUSTER:
-			ret = nm_create_cluster(buf);
-			break;
-		case NM_OP_CREATE_GROUP:
-			ret = nm_create_group(buf, data);
-			break;
-		case NM_OP_NAME_CLUSTER:
-			ret = nm_name_cluster(buf, data);
-			break;
-		case NM_OP_DESTROY_CLUSTER:
-			ret = nm_destroy_cluster(buf);
-			break;
-		case NM_OP_ADD_CLUSTER_NODE:
-			ret = nm_create_node(buf, data);
-			break;
-		case NM_OP_GET_CLUSTER_NUM_NODES:
-			ret = nm_get_cluster_num_nodes(buf);
-			break;
-		case NM_OP_GET_GLOBAL_NODE_NUM:
-			ret = 0;
-			me = nm_this_node(NULL);
-			if (me >= NM_MAX_NODES)
-				ret = -EINVAL;
-			ret = sprintf(buf, "%d: %u", ret, me);
-			break;
-		default:
-			ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL, 
-				      data->opcode);
-			break;
-	}
-	nmprintk("leaving!\n");
 	return ret;
 }
 
-static ssize_t write_node(struct file *file, char *buf, size_t size)
+static void nm_node_set_drop_object(struct kset *kset, struct kobject *kobj)
 {
-	nm_op *data;
-	int ret;
-	
-	nmprintk("write_node\n");
+	struct nm_node *node = to_nm_node(kobj);
+	struct nm_cluster *cluster = to_nm_cluster(node->nd_kobj.parent);
+	int node_not_in_nodes_array = 0;
 
-        if (size < sizeof(*data))
-                return -EINVAL;
-        data = (nm_op *) buf;
-	if (data->magic != NM_OP_MAGIC)
-		return -EINVAL;
+	/* please don't try this yet, needs proper refcounts of nodes too */
+	BUG();
 
-	switch (data->opcode) {
-		case NM_OP_GET_NODE_INFO:
-			ret = nm_get_node_info(buf, data);
-			break;
-		default:
-			ret = sprintf(buf, "%d: bad opcode: %u", -EINVAL, 
-				      data->opcode);
-			break;
-	}
-	nmprintk("leaving!\n");
-	return ret;
-}
+	if (cluster->cl_has_local)
+		net_stop_rx_thread(node);
 
-static ssize_t write_group(struct file *file, char *buf, size_t size)
-{
-	nm_op *data;
-	int ret;
-	
-	nmprintk("write_group\n");
+	/* XXX sloppy */
+	if (node->nd_ipv4_address)
+		rb_erase(&node->nd_ip_node, &cluster->cl_node_ip_tree);
 
-        if (size < sizeof(*data))
-                return -EINVAL;
-        data = (nm_op *) buf;
-	if (data->magic != NM_OP_MAGIC)
-		return -EINVAL;
+	/* XXX call into net to stop this node from trading messages */
 
-	nmprintk("opcode is %u, add_group is %u\n", data->opcode, 
-		 NM_OP_ADD_GROUP_NODE);
-	switch (data->opcode) {
-		case NM_OP_GET_GROUP_INFO:
-			ret = nm_get_group_info(buf, data);
-			break;
-
-		case NM_OP_ADD_GROUP_NODE:
-			ret = nm_add_node_to_group(buf, data);
-			break;
-
-		default:
-			ret = sprintf(buf, "%d: bad opcode: %u", 
-				      -EINVAL, data->opcode);
-			break;
+	write_lock(&cluster->cl_nodes_lock);
+	if (node->nd_num != NM_MAX_NODES) {
+		if (cluster->cl_nodes[node->nd_num] != node)
+			node_not_in_nodes_array = 1;
+		else  {
+			cluster->cl_nodes[node->nd_num] = NULL;
+			node->nd_num = NM_MAX_NODES;
+		}
 	}
-	nmprintk("leaving!\n");
-	return ret;
-}
+	write_unlock(&cluster->cl_nodes_lock);
 
+	BUG_ON(node_not_in_nodes_array);
 
-
-struct inode * nm_get_group_by_num(u8 group_num)
-{
-	struct inode *inode = iget(single_sb, group_num + NM_GROUP_INODE_START);
-	if (!inode)
-		return NULL;
-	if (!inode->u.generic_ip) {
-		iput(inode);
-		return NULL;
-	}
-	return inode;
-}	
-EXPORT_SYMBOL(nm_get_group_by_num);
-
-struct inode * nm_get_node_by_num(u8 node_num)
-{
-	struct inode *inode = iget(single_sb, node_num + NM_NODE_INODE_START);
-	if (!inode)
-		return NULL;
-	if (!inode->u.generic_ip) {
-		iput(inode);
-		return NULL;
-	}
-	return inode;
+	kobject_put(kobj);
 }
-EXPORT_SYMBOL(nm_get_node_by_num);
 
-/* ipv4 only for now... */
-struct inode * nm_get_node_by_ip(u32 addr)
-{
-	int bucket;
-	struct list_head *iter;
-	nm_node_inode_private *priv;
-	struct inode *ret = NULL;
-	
-	bucket = hash_long(addr, NM_HASH_BITS);
+static struct ukobj_type nm_node_set_type = {
+	.ktype = {
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_object	= nm_node_set_make_object,
+	.drop_object	= nm_node_set_drop_object,
+	.owner		= THIS_MODULE,
+};
 
-	spin_lock(&nm_ip_hash_lock);
-	list_for_each(iter, &nm_ip_hash[bucket]) {
-		priv = list_entry(iter, nm_node_inode_private, ip_hash);
-		if (priv->node.ifaces[0].addr_u.ip_addr4 == addr) {
-			ret = igrab(priv->inode);
-			break;
-		}
-		    
-	}
-	spin_unlock(&nm_ip_hash_lock);
-	return ret;
-}
-EXPORT_SYMBOL(nm_get_node_by_ip);
+/* cluster */
 
-nm_cluster * nm_get_cluster(void)
+static void nm_cluster_release(struct kobject *kobj)
 {
-	return &cluster;
-}
+	struct nm_cluster *cluster = to_nm_cluster(kobj);
 
-struct inode * nm_get_group_node_by_index(struct inode *group, u8 index)
-{
-	struct dentry *dentry = NULL, *parent;
-	struct inode *inode = NULL;
-	char tmpname[6];
+	printk("releasing cluster %p\n", cluster);
 
-	if (list_empty(&group->i_dentry))
-		return NULL;
-	parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
-	if (!parent)
-		return NULL;
-	
-	sprintf(tmpname, "%03u", index);
-	dentry = lookup_one_len(tmpname, parent, strlen(tmpname));
-	if (!IS_ERR(dentry)) {
-		inode = dentry->d_inode;
-		if (inode) {
-			inode = igrab(inode);
-			if (!inode->u.generic_ip || !S_ISREG (inode->i_mode)) {
-				nmprintk("bad inode!\n");
-				iput(inode);
-				inode = NULL;
-			}
-		}
-		if (!inode)
-			dput(dentry);
-	}
-	dput(parent);
-	return inode;
+	kfree(cluster->cl_ukset.default_sets);
+	kfree(cluster);
 }
-EXPORT_SYMBOL(nm_get_group_node_by_index);
 
-struct inode * __nm_get_node_by_name(const char *node_name, int dir)
-{
-	struct dentry *dentry = NULL;
-	struct inode *inode = NULL;
+static struct ukobj_type nm_cluster_type = {
+	.ktype	= {
+		.release	= nm_cluster_release,
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_object = usysfs_make_no_object,
+	.owner	= THIS_MODULE,
+};
 
-	NM_ASSERT(node_name);
-	NM_ASSERT(single_sb);
-	NM_ASSERT(single_sb->s_root);
+/* cluster set */
 
-	dentry = lookup_one_len(node_name, single_sb->s_root, 
-				strlen(node_name));
-	if (!IS_ERR(dentry)) {
-		inode = dentry->d_inode;
-		if (inode) {
-			inode = igrab(inode);
-			if (!inode->u.generic_ip ||
-		    	(dir && !S_ISDIR (inode->i_mode)) ||
-		    	(!dir && !S_ISREG (inode->i_mode))) {
-				nmprintk("bad inode!\n");
-				iput(inode);
-				inode = NULL;
-			}
-		}
-	}
-	return inode;
-}
-EXPORT_SYMBOL(__nm_get_node_by_name);
+struct nm_cluster_set {
+	struct ukset cs_ukset;
+	/* some stuff? */
+};
 
-
-/* 
- * if group is NULL: return the global index for this node
- * if group is non NULL: return the index within the group of this node
- *
- * NOTE: currently getting the group index is slow
- *       will need to change this somehow
- */
-u8 nm_this_node(struct inode *group)
+#if 0
+static struct nm_cluster_set *to_nm_cluster_set(struct kset *kset)
 {
-	struct inode *inode = NULL;
-	struct dentry *child = NULL;
-	u8 node_num = NM_MAX_NODES;
-
-       	inode = nm_get_node_by_name(nm_nodename);
-	if (inode && inode->u.generic_ip) {
-		if (group)
-			node_num = nm_get_group_index(group, inode, &child);
-		else 
-			node_num = nm_get_node_global_index(inode);
-
-	}
-	iput(inode);
-	dput(child);
-	//nmprintk("for group=%p, this node is %u\n", group, node_num);
-	return node_num;
+	return kset ?
+		container_of(to_ukset(kset), struct nm_cluster_set, cs_ukset)
+	       : NULL;
 }
-EXPORT_SYMBOL(nm_this_node);
+#endif
 
-/* slow */
-static u8 nm_get_group_index(struct inode *group, struct inode *inode, 
-			      struct dentry **child)
+static struct kset *nm_cluster_set_make_kset(struct kset *kset,
+					     const char *name)
 {
-	struct dentry *tmp = NULL, *parent = NULL;
-	u8 slot_num = NM_MAX_NODES;
-	struct list_head *iter;
-	char tmpname[6];
-	char *err;
+	struct nm_cluster *cluster = NULL;
+	struct nm_node_set *ns = NULL;
+	struct kset *hb_kset = NULL, *ret = NULL;
+	void *defs = NULL;
 
-	*child = NULL;
-	parent = NULL;
-	if (list_empty(&group->i_dentry))
-		goto leave;
-	parent = dget(list_entry(group->i_dentry.next, struct dentry, d_alias));
-	if (!parent)
-		goto leave;
-		
-	spin_lock(&dcache_lock);
-	list_for_each(iter, &parent->d_subdirs) {
-		tmp = list_entry(iter, struct dentry, d_child);
-		if (tmp->d_inode == inode)
-			break;
-		tmp = NULL;
-	}
-	if (tmp)
-		dget_locked(tmp);
-	spin_unlock(&dcache_lock);
+	printk("trying to make a cluster object\n");
 
-	if (!tmp || tmp->d_name.len > 3)
-		goto leave;
-	strncpy(tmpname, tmp->d_name.name, tmp->d_name.len);
-	tmpname[tmp->d_name.len] = '\0';
-	err=NULL;
-	slot_num = simple_strtoul(tmpname, &err, 10);
-	
-	if (*err != '\0')
-		slot_num = NM_MAX_NODES;  // error
-	else
-		*child = dget(tmp);  // done, get extra ref for child
-		
-leave:
-	dput(parent);
-	dput(tmp);
+	/* this runs under the parent dir's i_sem; there can be only
+	 * one caller in here at a time */
+	if (nm_single_cluster)
+		goto out; /* ENOSPC */
 
-	return slot_num;
-}
+	cluster = kcalloc(1, sizeof(struct nm_cluster), GFP_KERNEL);
+	ns = kcalloc(1, sizeof(struct nm_node_set), GFP_KERNEL);
+	defs = kcalloc(3, sizeof(struct kset *), GFP_KERNEL);
+	hb_kset = hb_alloc_hb_set();
+	if (cluster == NULL || ns == NULL || hb_kset == NULL || defs == NULL)
+		goto out;
 
-int nm_register_callback(int type, void (*func)(void *, void *, u8))
-{
-	nm_callback_func *f;
+	ukset_init_type_name(&cluster->cl_ukset, name, &nm_cluster_type.ktype);
+	ukset_init_type_name(&ns->ns_ukset, "nodes", &nm_node_set_type.ktype);
 
-	if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
-		return -EINVAL;
-	f = kmalloc(sizeof(nm_callback_func), GFP_KERNEL);
-	if (f == NULL)
-		return -ENOMEM;
-	memset(f, 0, sizeof(nm_callback_func));
-	f->func = func;
-	down(&nm_cb_sem);
-	list_add_tail(&f->list, &nm_callbacks[type]);
-	up(&nm_cb_sem);
-	return 0;
-}
-EXPORT_SYMBOL(nm_register_callback);
+	cluster->cl_ukset.default_sets = defs;
+	cluster->cl_ukset.default_sets[0] = &ns->ns_ukset.kset;
+	cluster->cl_ukset.default_sets[1] = hb_kset;
+	cluster->cl_ukset.default_sets[2] = NULL;
+	rwlock_init(&cluster->cl_nodes_lock);
+	cluster->cl_node_ip_tree = RB_ROOT;
 
-int nm_unregister_callback(int type, void (*func)(void *, void *, u8))
-{
-	struct list_head *iter, *tmpiter;
-	int ret = -EINVAL;
-	nm_callback_func *f;
+	ret = &cluster->cl_ukset.kset;
+	nm_single_cluster = cluster;
 
-	if (type < NM_NODE_ADD_CB || type > NM_GROUP_NODE_DEL_CB)
-		return ret;
-
-	down(&nm_cb_sem);
-	list_for_each_safe(iter, tmpiter, &nm_callbacks[type]) {
-		f = list_entry (iter, nm_callback_func, list);
-		if (f->func == func) {
-			list_del(&f->list);
-			kfree(f);
-			ret = 0;
-			break;
-		}
+out:
+	if (ret == NULL) {
+		kfree(cluster);
+		kfree(ns);
+		hb_free_hb_set(hb_kset);
+		kfree(defs);
 	}
-	up(&nm_cb_sem);
+
 	return ret;
 }
-EXPORT_SYMBOL(nm_unregister_callback);
 
-static void nm_do_callbacks(int type, void *ptr1, void *ptr2, u8 idx)
+static void nm_cluster_set_drop_object(struct kset *kset, struct kobject *kobj)
 {
-	struct list_head *iter;
-	nm_callback_func *f;
-	
-	down(&nm_cb_sem);
-	list_for_each(iter, &nm_callbacks[type]) {
-		f = list_entry (iter, nm_callback_func, list);
-		(f->func) (ptr1, ptr2, idx);
-	}
-	up(&nm_cb_sem);
-}
+	struct nm_cluster *cluster = to_nm_cluster(kobj);
 
+	BUG_ON(nm_single_cluster != cluster);
+	nm_single_cluster = NULL;
 
-static void nm_teardown(void)
-{
-	remove_proc_entry("cluster/nm", NULL);
-	remove_proc_entry("cluster", NULL);
+	kobject_put(kobj);
 }
 
-static void nm_init_cluster(nm_cluster *cluster)
-{
-	int i;
-	memset(cluster, 0, sizeof(nm_cluster));
-	cluster->state = NM_CLUSTER_DOWN;
-	spin_lock_init(&cluster->bitmap_lock);
-	
-	for (i=NM_NODE_ADD_CB; i<=NM_GROUP_NODE_DEL_CB; i++)
-		INIT_LIST_HEAD(&nm_callbacks[i]);
-}
+static struct ukobj_type nm_cluster_set_type = {
+	.ktype = {
+		.sysfs_ops	= NULL, /* no attributes */
+	},
+	.make_kset	= nm_cluster_set_make_kset,
+	.drop_object	= nm_cluster_set_drop_object,
+	.owner	= THIS_MODULE,
+};
 
+static struct nm_cluster_set nm_cluster_set = {
+	.cs_ukset = {
+		.kset = {
+			.kobj = {
+				.name = "cluster",
+				.ktype = &nm_cluster_set_type.ktype,
+			},
+		},
+	},
+};
 
-
-
-
-/*----------------------------------------------------------------------------*/
-/*
- *	populating the filesystem.
- */
-static int nm_fill_super(struct super_block * sb, void * data, int silent)
+static void __exit exit_nm(void)
 {
-	int ret, sz;
-	struct TA_write_ops *ops;
-	static struct tree_descr nm_files[] = {
-		[NM_Cluster] = {".cluster", &transaction_ops, S_IWUSR},
-		[NM_Node] = {".node", &transaction_ops, S_IWUSR},
-		[NM_Group] = {".group", &transaction_ops, S_IWUSR},
-		/* last one */ {""}
-	};
-	
-	sz = sizeof(nm_files) / sizeof(struct tree_descr);
-	ops = kmalloc(sizeof(struct TA_write_ops) +
-		      (sz * sizeof(ops->write_op[0])), 
-		      GFP_KERNEL);
-	if (!ops)
-		return -ENOMEM;
-
-	ops->num_ops = sz;
-	ops->write_op[NM_Cluster] = write_cluster;
-	ops->write_op[NM_Node] = write_node;
-	ops->write_op[NM_Group] = write_group;
-
-	single_sb = NULL;
-	nmprintk("calling simple_fill_super...\n");
-	ret = simple_fill_super(sb, 0x98675309, nm_files);
-	if (ret >= 0) {
-		TA_GENERIC_SB_MEMBER(sb) = ops;
-		single_sb = sb;
-	} else {
-		kfree(ops);
-	}
-	return ret;
+	nmprintk("unloading nm module\n");
+	/* XXX sync with hb callbacks and shut down hb? */
+	net_unregister_hb_callbacks();
+	usysfs_unregister_subsystem(&nm_cluster_set.cs_ukset.kset);
 }
 
-/* blindly copied from ocfs2 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static struct super_block *nm_get_sb(struct file_system_type *fs_type,
-				     int flags,
-				     const char *dev_name,
-				     void *data)
+static int __init init_nm(void)
 {
-	return get_sb_single(fs_type, flags, data, nm_fill_super);
-}
+	int ret;
 
-static struct file_system_type nm_fs_type = {
-	.owner          = THIS_MODULE,
-	.name           = "nm",
-	.get_sb         = nm_get_sb,
-	.kill_sb        = kill_anon_super,
-};
-#else
-static struct super_block *nm_read_super(struct super_block *sb,
-					 void *data,
-					 int silent)
-{
-	nmprintk("welcome to nm_read_super!!!\n");
-	return (nm_fill_super(sb, data, silent) < 0) ? NULL : sb;
-}
+	hb_init();
+	ret = net_register_hb_callbacks();
+	if (ret)
+		goto out;
 
-static DECLARE_FSTYPE (nm_fs_type, "nm", nm_read_super, FS_SINGLE|FS_LITTER);
-#endif
-
-static int __init init_nm(void)
-{
-	int retval;
-	nm_nodename = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
-	if (nm_nodename==NULL) {
-		nmprintk("could not allocate a few bytes for nodename!\n");
-		return -ENOMEM;
+	ukset_init(&nm_cluster_set.cs_ukset);
+	ret = usysfs_register_subsystem(&nm_cluster_set.cs_ukset.kset);
+	if (ret) {
+		printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
+		goto out;
 	}
-	strcpy(nm_nodename, system_utsname.nodename);
-	nmprintk("loading nm module: nodename is %s\n", nm_nodename);
 
-	if (nm_init_ip_hash() < 0) {
-		nmprintk("failed to allocate node IP hash\n");
-		return -ENOMEM;
-	}
+out:
+	if (ret)
+		exit_nm();
 
-	nm_init_cluster(&cluster);
-
-	if (proc_mkdir("cluster", 0)) {
-		if (proc_mkdir("cluster/nm", 0)) {
-		}
-	}
-	nmprintk("calling register_filesystem\n");
-	retval = register_filesystem(&nm_fs_type);
-	nmprintk("done calling register_filesystem: ret=%d\n", retval);
-	if (retval)
-		nm_teardown();
-	return retval;
+	return ret;
 }
 
-static void __exit exit_nm(void)
-{
-	nm_teardown();
-	unregister_filesystem(&nm_fs_type);
-	nm_destroy_ip_hash();
-	kfree(nm_nodename);
-	nmprintk("unloading nm module\n");
-}
-
-
-
-
 MODULE_LICENSE("GPL");
 module_init(init_nm)
 module_exit(exit_nm)

Modified: trunk/fs/ocfs2/cluster/nodemanager.h
===================================================================
--- trunk/fs/ocfs2/cluster/nodemanager.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/nodemanager.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -33,54 +33,22 @@
 #include "ocfs2_nodemanager.h"
 
 
-
-struct _nm_ctxt
-{
-	int dummy;
-};
-
-enum {
-	NM_CLUSTER_DOWN=0,
-	NM_CLUSTER_UP
-};
-
-enum {
-	NM_GROUP_NOT_READY=0,
-	NM_GROUP_READY
-};
-
-enum {
-	NM_Root = 1,
-	NM_Cluster,
-	NM_Node,
-	NM_Group,
-};
-
-typedef struct _nm_cluster
-{
-	char name[NM_MAX_NAME_LEN+1];
-	int state;
-	spinlock_t bitmap_lock;
-	unsigned long group_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-	unsigned long node_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_cluster;
-
-
-typedef struct _nm_group_inode_private
-{
-	struct inode *inode;
-	struct list_head net_list;
-	struct list_head disk_list;
-	cluster_disk disk;
-	int state;
-	spinlock_t bitmap_lock;
-	unsigned long slot_bitmap[BITS_TO_LONGS(NM_MAX_NODES)];
-} nm_group_inode_private;
-
 /* TODO: move this */
+/*
+ * this stores the per-socket state for each socket that we associate
+ * with a node.  for remote nodes this is a socket that is established
+ * on demand and trades messages.  For a local node this is just a listening
+ * socket that spawns message sockets from other nodes.
+ */
 struct sock;
+/* this is still called net_inode_private for hysterical raisins.  one
+ * has to draw the cleanup line somewhere.. */
 typedef struct _net_inode_private
 {
+	/* only used by the local node. */
+	struct task_struct	*rx_thread;
+	/* the rest is for remote nodes */
+
 	/* sockets themselves don't seem to have a nice way to refcount them
 	 * above sock_release.  one could use iget/iput, but that seems
 	 * to interact poory with sock_release() itself calling iput. */
@@ -97,99 +65,33 @@
 	struct page 		*page;
 	size_t			page_off;
 
+
 	void			(*orig_state_change)(struct sock *sk);
 	void                    (*orig_error_report)(struct sock *sk);
 	void			(*orig_data_ready)(struct sock *sk, int bytes);
 } net_inode_private;
 
-typedef struct _nm_node_inode_private
-{
-	struct inode *inode;
-	nm_node_info node;
-	struct list_head ip_hash;
-	net_inode_private net;
-} nm_node_inode_private;
+struct nm_node {
+	struct kobject		nd_kobj; 
+	char			nd_name[NM_MAX_NAME_LEN+1]; /* replace? */
+	__u8			nd_num;
+	/* only one address per node, as attributes, for now.  both
+	 * in network order */
+	__u32			nd_ipv4_address;
+	__u16			nd_ipv4_port;
+	struct rb_node		nd_ip_node;
+	/* there can be only one local node for now */
+	int			nd_local;
 
-
-/* callback stuff */
-
-enum {
-	NM_NODE_ADD_CB = 0,
-	NM_NODE_DEL_CB,
-	NM_GROUP_ADD_CB,
-	NM_GROUP_DEL_CB,
-	NM_GROUP_NODE_ADD_CB,
-	NM_GROUP_NODE_DEL_CB,
-	NM_NUM_CB
+	/* we're making simple assertions that a node can only have one network
+	 * identity and report at one place in a heartbeat */
+	net_inode_private	nd_net_inode_private;
 };
 
-typedef void (nm_cb_func)(void *, void *, u8);
+u8 nm_this_node(void);
 
-typedef struct _nm_callback_func
-{
-	struct list_head list;
-	nm_cb_func *func;
-	//void (*func)(void *, void *, u8);
-} nm_callback_func;
+struct nm_node * nm_get_node_by_num(u8 node_num);
+struct nm_node * nm_get_node_by_ip(u32 addr);
+void nm_node_put(struct nm_node *node);
 
-
-
-
-u8 nm_this_node(struct inode *group);
-nm_cluster * nm_get_cluster(void);
-int nm_register_callback(int type, void (*func)(void *, void *, u8));
-int nm_unregister_callback(int type, void (*func)(void *, void *, u8));
-int nm_get_group_num_nodes(struct inode *group);
-int nm_make_group_ready(struct inode *group);
-void * nm_iterate_group_disk_slots(struct inode *group, int *idx);
-int nm_remove_node_from_group(struct inode *group, struct inode *node);
-int nm_create_group(char *buf, nm_op *data);
-int nm_add_node_to_group(char *buf, nm_op *data);
-
-extern char *nm_nodename;
-
-
-struct inode * nm_get_group_by_num(u8 group_num);
-struct inode * nm_get_node_by_num(u8 node_num);
-struct inode * __nm_get_node_by_name(const char *node_name, int dir);
-struct inode * nm_get_node_by_ip(u32 addr);
-struct inode * nm_get_group_node_by_index(struct inode *group, u8 index);
-
-static inline struct inode * nm_get_node_by_name(char *node_name)
-{
-	return __nm_get_node_by_name(node_name, 0);
-}
-static inline struct inode * nm_get_group_by_name(const char *group_name)
-{
-	return __nm_get_node_by_name(group_name, 1);
-}
-
-
-static inline int nm_get_node_global_index(struct inode *node)
-{
-	return (node->i_ino - NM_NODE_INODE_START);
-}
-static inline int nm_get_group_global_index(struct inode *group)
-{
-	return (group->i_ino - NM_GROUP_INODE_START);
-}
-
-static inline int nm_valid_ino(int ino)
-{
-#if 0
-	// these should never be referred to in kernel
-	if (ino >= NM_Cluster && ino <= NM_Group)
-		return 1;
-#endif
-	if (ino >= NM_NODE_INODE_START &&
-	    ino < NM_NODE_INODE_START + NM_MAX_NODES)
-		return 1;
-	if (ino >= NM_GROUP_INODE_START &&
-	    ino < NM_GROUP_INODE_START + NM_MAX_NODES)
-		return 1;
-	return 0;
-}
-
-
-	
 #endif /* CLUSTER_NODEMANAGER_H */

Modified: trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h
===================================================================
--- trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/ocfs2_nodemanager.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -43,29 +43,6 @@
 #define NM_GROUP_INODE_START    200000
 #define NM_NODE_INODE_START     100000
 
-
-
-typedef struct _nm_network_iface
-{
-	__u16 ip_port;			/* for simplicity, just define exactly one port for this if */
-	__u16 ip_version;
-	union {
-		__u32 ip_addr4;		/* IPv4 address in NBO */
-		__u32 ip_addr6[4];	/* IPv6 address in NBO */
-	} addr_u;
-} nm_network_iface;
-
-typedef struct _nm_node_info 
-{
-	__u8  node_num;
-	__u8  pad1;
-	__u16 pad2;
-	__u32 pad3;
-	char node_name[NM_MAX_NAME_LEN+1];
-	char pad4[63];
-	nm_network_iface ifaces[NM_MAX_IFACES];
-} nm_node_info;
-
 /* transaction file nm_op stuff */
 
 #define NM_OP_MAGIC      0xbeaf
@@ -101,7 +78,7 @@
 	union {
 		__u8 index;
 		char name[NM_MAX_NAME_LEN+1];
-		nm_node_info node;
+//		nm_node_info node;
 		nm_group_change gc;
 	} arg_u;
 } nm_op;

Deleted: trunk/fs/ocfs2/cluster/ocfs2_tcp.h
===================================================================
--- trunk/fs/ocfs2/cluster/ocfs2_tcp.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/ocfs2_tcp.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -1,46 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_tcp.h
- *
- * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef _OCFS2_TCP_H
-#define _OCFS2_TCP_H
-
-typedef struct _gsd_ioc
-{
-	int fd;
-	int namelen;
-	char name[NM_MAX_NAME_LEN+1];
-	int status;
-} gsd_ioc;
-
-typedef struct _net_ioc
-{
-	__u32 status;
-} net_ioc;
-
-#define  NET_IOC_MAGIC          'O'
-#define  NET_IOC_ACTIVATE       _IOR(NET_IOC_MAGIC, 1, net_ioc)
-#define  NET_IOC_GETSTATE       _IOR(NET_IOC_MAGIC, 2, net_ioc)
-#define  GSD_IOC_CREATE_GROUP   _IOR(NET_IOC_MAGIC, 3, gsd_ioc)
-#define  GSD_IOC_ADD_GROUP_NODE _IOR(NET_IOC_MAGIC, 4, gsd_ioc)
-
-#endif /* _OCFS2_TCP_H */

Modified: trunk/fs/ocfs2/cluster/tcp.c
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/tcp.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -63,17 +63,22 @@
  *      - handers must be callable from bh context
  * but it really depends on what the semantics and messages are.
  *
- * XXX we should resolve these before release
+ * asap
+ * 	- only have lookup succeed for active nodes (fully configured)
+ * 	- only initiate connections if rx thread is running?
+ * 	- don't allow node rmdir if it has socket and rx thread is running
+ * 	- tear down all node sockets on rx thread exit
+ * 	- have rx thread stop active tx and wait for them
+ * 	- make sure ->net.page gets torn down with net_inode_private
+ * 	- tear down sockets on exit.. via removing their inodes?
+ *
+ * XXX
  * 	- disable preemt before calling rx handler when debugging
  * 	- find explicit stack call to drain rx queue
  * 	- add trivial version trading message at the start of a conn
  * 	- go nuts adding static
  * 	- nsc waiting is buggy, should be on socket.. wake w/err if socket dies
  * 	- compare socks in attach_sock so both size don't close
- * 	- implement net_remove_handlers
- * 	- make sure ->net.page gets torn down with net_inode_private
- * 	- tear down sockets on exit.. via removing their inodes?
- * 	- simplify rx thread exit path (completion, etc)
  */
 
 #include <linux/module.h>
@@ -101,7 +106,6 @@
 #include "util.h"
 
 
-#include "gsd.h"
 #include "heartbeat.h"
 #include "tcp.h"
 #include "nodemanager.h"
@@ -145,10 +149,6 @@
 #define sk_state_change		state_change
 #endif
 
-static u16 ip_version, ip_port;
-static struct inode *net_inode = NULL;
-static u8 net_node_num;
-
 /* all this state should eventually be brought up by object activation
  * and tied to that object rather than being globally valid at insmod */
 static spinlock_t net_handler_lock = SPIN_LOCK_UNLOCKED;
@@ -159,6 +159,7 @@
 static spinlock_t net_active_lock = SPIN_LOCK_UNLOCKED;
 static LIST_HEAD(net_active_list);
 
+/* XXX someday we'll need better accounting */
 static struct task_struct *net_recv_task = NULL;
 
 static inline void net_abort_status_return(net_status_ctxt *nsc)
@@ -169,27 +170,16 @@
 	spin_unlock(&net_status_lock);
 }
 
-static int net_register_hb_callbacks(void);
-static void net_unregister_hb_callbacks(void);
-
 /////////////////////
-static void net_shutdown(void);
-static int net_startup(void);
-static int __init net_driver_entry (void);
-static int net_init_driver(void);
-static void __exit net_driver_exit (void);
 static int net_add_handler(net_msg_handler *nmh);
-static void net_remove_handlers(void);
-static struct socket *net_init_tcp_recv_sock(void);
+static struct socket *net_init_tcp_recv_sock(u16 port);
 static int net_receive_thread(void *data);
 static int net_receive(void);
 static void net_try_accept(struct socket *sock);
 static int net_process_message(struct socket *sock, net_msg *hdr);
-static int net_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
 
-static int net_sock_addref_or_connect(struct inode *inode,
-				      struct socket **sock_ret);
-static void net_sock_decref(struct inode *inode, int error);
+static int net_sock_addref_or_connect(u8 node_num, struct socket **sock_ret);
+static void net_sock_decref(struct socket *sock, int error);
 
 //////////////////////
 
@@ -213,161 +203,19 @@
 	spin_unlock(&net_handler_lock);
 }
 
-
-DECLARE_MUTEX(net_state_lock);
-u32 net_driver_state = NET_DRIVER_UNINITED;
-u32 net_num_dispatched = 0;
-
-
-/*
- * net_driver_entry()
- *
- * Driver entry point. Called on insmod.
- */
-static int __init net_driver_entry (void)
+int net_start_rx_thread(struct nm_node *node)
 {
-	struct proc_dir_entry *de;
-	de = proc_mkdir("cluster/net", 0);
-	if (!de)
-		return -1;
-	de->proc_fops->ioctl = net_ioctl;
-
-	if (net_register_hb_callbacks())
-		return -1;
-
-	netprintk0("Loaded net Driver module\n");
-	return 0;
-}				/* net_driver_entry */
-
-static int net_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-		     unsigned long arg)
-{
-	net_ioc data;
-	int ret = 0;
-	struct file *file = NULL;
-
-	if (_IOC_TYPE (cmd) != NET_IOC_MAGIC) {
-		ret = -ENOTTY;
-		goto exit_ioctl;
-	}
-
-	switch (cmd) {
-	    case NET_IOC_ACTIVATE:
-		    memset(&data, 0, sizeof(net_ioc));
-		    down(&net_state_lock);
-		    data.status = net_driver_state;
-		    if (net_driver_state == NET_DRIVER_UNINITED) {
-			    ret = net_init_driver();
-			    if (ret < 0) {
-				    netprintk("error trying to activate net driver: %d\n", ret);
-				    data.status = NET_DRIVER_UNINITED;
-			    } else {
-				    netprintk0("activated net driver!\n");
-				    net_driver_state = data.status = NET_DRIVER_READY;
-			    }
-		    }
-		    up(&net_state_lock);
-
-		    ret = copy_to_user ((net_ioc *) arg, &data,
-					sizeof (net_ioc));
-		    break;
-	    case NET_IOC_GETSTATE:
-		    memset(&data, 0, sizeof(net_ioc));
-		    down(&net_state_lock);
-		    data.status = net_driver_state;
-		    up(&net_state_lock);
-		    ret = copy_to_user ((net_ioc *) arg, &data,
-					sizeof (net_ioc));
-		    break;
-		    
-	    case GSD_IOC_CREATE_GROUP:
-	    case GSD_IOC_ADD_GROUP_NODE:
-		    ret = gsd_ioctl(inode, filp, cmd, arg);
-		    break;
-	    default:
-		    ret = -ENOTTY;
-		    break;
-	}
-
-exit_ioctl:
-
-	if (file)
-		fput(file);
-
-	return ret;
-}				/* net_ioctl */
-
-static int net_init_driver(void)
-{
-	nm_node_info *info;
-	nm_node_inode_private *priv;
-
-	/* get the global node number for this node */
-	net_node_num = nm_this_node(NULL);
-	if (net_node_num >= NM_MAX_NODES) {
-		netprintk0("local nm node number not initialized!\n");
-		return -1;
-	}
-	net_inode = nm_get_node_by_num(net_node_num);
-	if (!net_inode) {
-		netprintk0("local nm node inode not initialized!\n");
-		return -1;
-	}
-	priv = (nm_node_inode_private *)net_inode->u.generic_ip;
-	if (!priv) {
-		iput(net_inode);
-		netprintk0("local nm node info not initialized!\n");
-		return -1;
-	}
-	info = &priv->node;
-	ip_version = info->ifaces[0].ip_version;
-	ip_port = info->ifaces[0].ip_port;
-
-	if (net_startup() < 0)
-		return -1;
-
-	if (gsd_setup() < 0)
-		return -1;
-
-	return 0;
-}				/* net_init_driver*/
-
-
-/*
- * net_driver_exit()
- *
- * Called on rmmod
- */
-static void __exit net_driver_exit (void)
-{
-	down(&net_state_lock);
-	if (net_driver_state == NET_DRIVER_READY) {
-		netprintk0("shutting down network\n");
-		net_shutdown();
-		netprintk0("removing all net driver handlers\n");
-		net_remove_handlers();
-		gsd_teardown();
-		if (net_inode)
-			iput(net_inode);
-		net_driver_state = NET_DRIVER_UNINITED;
-	}
-	up(&net_state_lock);
-	remove_proc_entry("cluster/net", NULL);
-	net_unregister_hb_callbacks();
-	netprintk0("Unloading net driver module\n");
-	return;
-}				/* net_driver_exit */
-
-
-static int net_startup(void)
-{
 	struct socket *sock;
+	net_inode_private *net = &node->nd_net_inode_private;
 	int ret = 0;
 
+	BUG_ON(net->rx_thread != NULL);
+	BUG_ON(net_recv_task != NULL);
+
 	/* if the thread was setting up the rx socket we'd like to have it
 	 * communicate errors back to us here.  us setting up the socket
 	 * and passing it to the thread is easier */
-	sock = net_init_tcp_recv_sock();
+	sock = net_init_tcp_recv_sock(node->nd_ipv4_port);
 	if (IS_ERR(sock)) {
 		ret = PTR_ERR(sock);
 		goto out;
@@ -375,10 +223,11 @@
 
 	netprintk0("starting net receive thread...\n");
 
-	net_recv_task = kthread_run(net_receive_thread, sock, "netrecv");
-	if (IS_ERR(net_recv_task)) {
-		ret = PTR_ERR(net_recv_task);
-		net_recv_task = NULL;
+	net->rx_thread = kthread_run(net_receive_thread, sock,
+				     "netrecv-%s", node->nd_name);
+	if (IS_ERR(net->rx_thread)) {
+		ret = PTR_ERR(net->rx_thread);
+		net->rx_thread = NULL;
 		netprintk("unable to launch net receive thread, error=%ld\n",
 			  (long)ret);
 		goto out;
@@ -386,6 +235,7 @@
 
 	/* once the thread is running it has ownership of the sock */
 	sock = NULL;
+	net_recv_task = net->rx_thread;
 
 out:
 	if (sock)
@@ -393,12 +243,21 @@
 	return 0;
 }
 
-static void net_shutdown(void)
+void net_stop_rx_thread(struct nm_node *node)
 {
-	if (net_recv_task) {
+	net_inode_private *net = &node->nd_net_inode_private;
+	if (net->rx_thread) {
 		netprintk("waiting for net thread to exit....\n");
-		kthread_stop(net_recv_task);
+		kthread_stop(net->rx_thread);
+		net->rx_thread = NULL;
+		net_recv_task = NULL;
 	}
+
+	/* XXX if we stop the thread we've cut off the rx path for all the
+	 * nodes.. we should walk their net_inode_privates and tear down their
+	 * sockets.   tx shouldn't bring up a conn if there is no
+	 * rx thread and rmdir should sync with the rx therad and tx 
+	 * references.. ugh. */
 }
 
 static int net_rx_should_wake(struct socket *sock)
@@ -534,16 +393,6 @@
 	return ret;
 }	      
 
-
-/* TODO Fix */
-static void net_remove_handlers(void)
-{
-	/* TODO: make an iterator in nm for running over each global inode
-	 * do I have this already?  then call destroy on each.  last put
-	 * will do the work.  doesnt matter if it's slow.  this is only
-	 * on shutdown... */
-}
-
 static int net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
 {
 	int ret;
@@ -613,7 +462,7 @@
 }
 
 int net_send_message_iov(u32 msg_type, u32 key, struct iovec *caller_iov,
-			 size_t caller_iovlen, struct inode *inode,
+			 size_t caller_iovlen, u8 target_node,
 			 int *status)
 {
 	int ret;
@@ -626,13 +475,8 @@
 	struct iovec *iov = NULL;
 	struct socket *sock = NULL;
 
-	BUG_ON(current == net_recv_task);
+	BUG_ON(net_recv_task && (current == net_recv_task));
 
-	if (!inode || !inode->u.generic_ip) {
-		netprintk0("bad inode, cannot send message\n");
-		ret = -EINVAL;
-		goto out;
-	}
 	if (caller_iovlen == 0) {
 		netprintk0("bad iovec array length\n");
 		ret = -EINVAL;
@@ -648,7 +492,7 @@
 		goto out;
 	}
 
-	ret = net_sock_addref_or_connect(inode, &sock);
+	ret = net_sock_addref_or_connect(target_node, &sock);
 	if (ret)
 		goto out;
 
@@ -688,8 +532,7 @@
 	nsc.msg_num = msg->msg_num;
 	nsc.sys_status = NET_ERR_NONE;
 	nsc.status = 0;
-	/* XXX: Should be using group index here. */
-	nsc.target_node = nm_get_node_global_index(inode);
+	nsc.target_node = target_node;
 
 	init_waitqueue_entry(&sleep, current);
 	add_wait_queue(&nsc.wq, &sleep);
@@ -732,7 +575,7 @@
 	if (cleanup_wq)
 		remove_wait_queue(&nsc.wq, &sleep);
 	if (sock)
-		net_sock_decref(inode, cleanup_sock);
+		net_sock_decref(sock, cleanup_sock);
 	if (iov)
 		kfree(iov);
 	if (msg)
@@ -758,13 +601,14 @@
  *   - status will not be set on return code != 0
  */
 int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
-		     struct inode *inode, int *status)
+		     u8 target_node, int *status)
 {
 	struct iovec iov = {
 		.iov_base = data,
 		.iov_len = len,
 	};
-	return net_send_message_iov(msg_type, key, &iov, 1, inode, status);
+	return net_send_message_iov(msg_type, key, &iov, 1,
+				    target_node, status);
 }
 EXPORT_SYMBOL(net_send_message);
 
@@ -887,9 +731,7 @@
 
 static int net_receive(void)
 {
-	struct inode *inode;
 	LIST_HEAD(snapshot_list);
-	nm_node_inode_private *priv;
 	net_inode_private *net;
 	struct socket *sock;
 	net_msg *hdr;
@@ -915,14 +757,13 @@
 		list_del_init(&net->active_item);
 		spin_unlock_bh(&net_active_lock);
 
-		priv = container_of(net, nm_node_inode_private, net);
-	       	inode = priv->inode;
 		sock = NULL;
 
 		err = 0;
 		read_eagain = 0;
 		read_some = 0;
 
+		/* basically a manual addref that doesn't connect :/ */
 		spin_lock_bh(&net->sock_lock);
 		if (net->sock && !net->sock_pending) {
 			sock = net->sock;
@@ -1009,9 +850,9 @@
 		spin_unlock_bh(&net_active_lock);
 
 		netprintk("net %p finished reading with %d\n", net, err);
-		if (err < 0 && err != -EAGAIN) {
+		if (sock && err < 0 && err != -EAGAIN) {
 			netprintk("socket saw err %d, closing\n", err);
-			net_sock_decref(inode, err);
+			net_sock_decref(sock, err);
 		}
 	}
 
@@ -1064,8 +905,11 @@
 	netprintk("node %u died, killed %d messages\n", node, num_kills);
 }
 
-static void net_hb_node_down_cb(struct inode *group,
-				struct inode *node,
+/* this callback is registered on insmod and torn down on rmmod.  
+ * the list and locks that it uses to kill messages are statically
+ * defined so it should be ok.. it just has to carefully be called
+ * after hb is ready and before hb is torn down */
+static void net_hb_node_down_cb(struct nm_node *node,
 				int node_num,
 				void *data)
 {
@@ -1075,7 +919,7 @@
 static struct hb_callback_func	*net_hb_down = NULL;
 #define NET_HB_NODE_DOWN_PRI     (0x1)
 
-static int net_register_hb_callbacks(void)
+int net_register_hb_callbacks(void)
 {
 	net_hb_down = kmalloc(sizeof(*net_hb_down), GFP_KERNEL);
 	if (!net_hb_down)
@@ -1087,7 +931,7 @@
 	return hb_register_callback(net_hb_down);
 }
 
-static void net_unregister_hb_callbacks(void)
+void net_unregister_hb_callbacks(void)
 {
 	int status;
 
@@ -1153,7 +997,6 @@
 	if (syserr != NET_ERR_NONE)
 		goto out_respond;
 
-	net_num_dispatched++;
 	handler_status = (hnd->func)(hdr, sizeof(net_msg) + hdr->data_len, hnd->data);
 
 out_respond:
@@ -1301,7 +1144,7 @@
 	state_change(sk);
 }	
 
-static int net_start_connect(net_inode_private *net, nm_node_info *node)
+static int net_start_connect(net_inode_private *net, u32 addr, u16 port)
 {
 	struct socket *sock = NULL;
 	struct sock *sk;
@@ -1325,9 +1168,9 @@
 	}
 	
 	memset (&remoteaddr, 0, sizeof (remoteaddr));
-	remoteaddr.sin_family = net_ip_version_to_family(node->ifaces[0].ip_version);
-	remoteaddr.sin_addr.s_addr = node->ifaces[0].addr_u.ip_addr4;
-	remoteaddr.sin_port = node->ifaces[0].ip_port;
+	remoteaddr.sin_family = AF_INET;
+	remoteaddr.sin_addr.s_addr = addr;
+	remoteaddr.sin_port = port;
 
 	net->sock = sock;
 
@@ -1380,19 +1223,14 @@
 	set_fs(oldfs);
 }
 
-static void net_sock_decref(struct inode *inode, int error)
+static void net_sock_decref(struct socket *sock, int error)
 {
 	net_inode_private *net = NULL;
-	nm_node_inode_private *priv;
-	struct socket *sock = NULL;
 	int release = 0;
 
-	priv = inode->u.generic_ip;
-	if (!priv) {
-		netprintk("bad inode %p\n", inode);
-		return;
-	}
-	net = &priv->net;
+	/* we hold a ref, this should be stable */
+	net = sock->sk->sk_user_data;
+	BUG_ON(net == NULL);
 
 	spin_lock_bh(&net->sock_lock); 
 
@@ -1441,24 +1279,23 @@
 	return empty;
 }
 
-static int net_sock_addref_or_connect(struct inode *inode,
-				      struct socket **sock_ret)
+static int net_sock_addref_or_connect(u8 target_node, struct socket **sock_ret)
 {
-	nm_node_inode_private *priv;
-	nm_node_info *node;
+	struct nm_node *node = NULL;
 	net_inode_private *net = NULL;
 	struct socket *sock = NULL;
 	int ret = 0, wait = 0, set_pending = 0;
 	struct waiting_for_sock wfs;
 
-	priv = inode->u.generic_ip;
-	if (!priv) {
-		netprintk("bad inode %p\n", inode);
+	/* XXX think about passing refs around.. */
+	node = nm_get_node_by_num(target_node);
+	if (node == NULL) {
+		netprintk("node %u unknown\n", target_node);
 		ret = -EINVAL;
 		goto out;
 	}
-	net = &priv->net;
-	node = &priv->node;
+	/* XXX verify that node is fully configured, rx thread is going */
+	net = &node->nd_net_inode_private;
 
 	spin_lock_bh(&net->sock_lock); 
 	if (net->sock && !net->sock_pending) {
@@ -1481,7 +1318,8 @@
 	spin_unlock_bh(&net->sock_lock); 
 
 	if (set_pending) {
-		ret = net_start_connect(net, node);
+		ret = net_start_connect(net, node->nd_ipv4_address,
+					node->nd_ipv4_port);
 		if (ret)
 			goto out;
 	}
@@ -1518,6 +1356,8 @@
 	}
 	if (sock)
 		*sock_ret = sock;
+	if (node)
+		nm_node_put(node);
 
 	BUG_ON(ret == 0 && sock == NULL);
 	netprintk("addref for net %p gave %d\n", net, ret);
@@ -1529,8 +1369,7 @@
 	int error, slen;
 	struct sockaddr_in sin;
 	struct socket *new_sock = NULL;
-	struct inode *inode = NULL;
-	nm_node_inode_private *priv;
+	struct nm_node *node = NULL;
 
 	BUG_ON(sock == NULL);
 	error = sock_create_lite(sock->sk->sk_family,
@@ -1555,24 +1394,21 @@
 	netprintk("attempt to connect from %u.%u.%u.%u:%04x\n", 
 		NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
 
-	inode = nm_get_node_by_ip(sin.sin_addr.s_addr);
-	if (inode == NULL) {
+	node = nm_get_node_by_ip(sin.sin_addr.s_addr);
+	if (node == NULL) {
 		netprintk0("connect from unknown host...\n");
 		net_send_error(new_sock, NET_UNKNOWN_HOST);
 		goto out;
 	}
 
-	priv = inode->u.generic_ip;
-	BUG_ON(priv == NULL);
+	netprintk("connect from known host: %s\n", node->nd_name);
 
-	netprintk("connect from known host: %s\n", priv->node.node_name);
-
 	if (ntohs(sin.sin_port) >= 1024)
 		netprintk("warning: connect from unprivileged port: "
 			  "%u.%u.%u.%u:%d\n", NIPQUAD(sin.sin_addr.s_addr),
 			  ntohs(sin.sin_port));
 
-	error = net_attach_sock(&priv->net, new_sock);
+	error = net_attach_sock(&node->nd_net_inode_private, new_sock);
 	if (error == -EEXIST)
 		net_send_error(new_sock, NET_ALREADY_CONNECTED);
 
@@ -1582,36 +1418,33 @@
 			net_sock_drain(new_sock);
 			sock_release(new_sock);
 		}
-		if (inode)
-			iput(inode);
 	}
+	if (node)
+		nm_node_put(node);
 	return;
 }
 
-static struct socket *net_init_tcp_recv_sock(void)
+static struct socket *net_init_tcp_recv_sock(u16 port)
 {
 	struct sockaddr_in sin;
 	struct socket *sock;
 	int error;
 
-	error = sock_create(net_ip_version_to_family(ip_version),
-			     SOCK_STREAM, IPPROTO_TCP,
-			     &sock);
+	error = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (error < 0) {
 		netprintk("unable to create socket, error=%d\n", error);
 		goto bail;
 	}
 
 	memset(&sin, 0, sizeof(sin));
-	sin.sin_family = net_ip_version_to_family(ip_version);
+	sin.sin_family = PF_INET;
 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
-	sin.sin_port = ip_port;
+	sin.sin_port = port;
 
-	error = sock->ops->bind(sock, (struct sockaddr *)&sin,
-				sizeof(sin));
+	error = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
 	if (error < 0) {
 		netprintk ("unable to bind socket to port %d, error=%d\n", 
-			ntohs(ip_port), error);
+			ntohs(port), error);
 		goto bail;
 	}
 
@@ -1629,7 +1462,3 @@
 	BUG_ON(sock == NULL);
 	return sock;
 }
-
-MODULE_LICENSE("GPL");
-module_init (net_driver_entry);
-module_exit (net_driver_exit);

Modified: trunk/fs/ocfs2/cluster/tcp.h
===================================================================
--- trunk/fs/ocfs2/cluster/tcp.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/cluster/tcp.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -41,17 +41,11 @@
 #include <linux/in.h>
 
 /*
- * FIXME: no need for clcommon.h and nodemanager.h except for
+ * FIXME: no need for nodemanager.h except for
  * NM_MAX_NAME_LEN...shouldn't that be something or somewhere else?
  */
-#include "clcommon.h"
 #include "nodemanager.h"
-#include "ocfs2_tcp.h"
 
-
-#define NET_DISP_THREAD_MS   5000   /* TODO */
-#define NET_RECV_THREAD_MS   5000   /* TODO */
-
 enum net_system_error {
 	NET_ERR_NONE = 0,
 	NET_ERR_NO_HNDLR,
@@ -155,24 +149,6 @@
 }
 
 
-static inline int net_ip_version_to_family(u16 ip_version)
-{
-	printk("ip_version passed: %u, host byteorder: %u\n", ip_version, ntohs(ip_version));
-	return PF_INET;
-	switch (ntohs(ip_version)) {
-		case 4:
-			return PF_INET;
-		case 6:
-			return PF_INET6;
-		default:
-			BUG();
-	}
-
-	return 4;
-}
-
-
-
 /* TODO: figure this out.... */
 static inline int net_link_down(int err, struct socket *sock)
 {
@@ -213,33 +189,16 @@
 int net_register_handler(u32 msg_type, u32 key, int flags, 
 			 u32 max_len, net_msg_handler_func *func, void *data);
 int net_init_tcp_sock(struct inode *inode);
-int net_send_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *inode, int *status);
+int net_send_message(u32 msg_type, u32 key, void *data, u32 len,
+		     u8 target_node, int *status);
 int net_send_message_iov(u32 msg_type, u32 key, struct iovec *iov,
-			 size_t iovlen, struct inode *inode, int *status);
+			 size_t iovlen, u8 target_node, int *status);
 int net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, struct inode *group);
 net_msg_handler * net_lookup_handler(u32 msg_type, u32 key);
 
-#define GSD_MESSAGE   130
-#define GSD_ACTION_ADD_GROUP        (0x01)
-#define GSD_ACTION_ADD_GROUP_NODE   (0x02)
+int net_register_hb_callbacks(void);
+void net_unregister_hb_callbacks(void);
+int net_start_rx_thread(struct nm_node *node);
+void net_stop_rx_thread(struct nm_node *node);
 
-typedef struct _gsd_message
-{
-	u8 from;
-	u8 action;
-	u8 namelen;
-	u8 pad1;
-	u32 pad2;
-	u8 name[NM_MAX_NAME_LEN];
-} gsd_message;
-
-static inline void gsd_message_to_net(gsd_message *g)
-{
-	/* do nothing */
-}
-static inline void gsd_message_to_host(gsd_message *g)
-{
-	/* do nothing */
-}
-
 #endif /* CLUSTER_TCP_H */

Modified: trunk/fs/ocfs2/dlm/dlmast.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmast.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmast.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -42,7 +42,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -101,18 +100,18 @@
 	DLM_ASSERT(lksb);
 
 	/* only updates if this node masters the lockres */
-	if (res->owner == dlm->group_index) {
+	if (res->owner == dlm->node_num) {
 
 		spin_lock(&res->spinlock);
 		/* check the lksb flags for the direction */
 		if (lksb->flags & DLM_LKSB_GET_LVB) {
 			dlmprintk("getting lvb from lockres for %s node\n",
-				  lock->ml.node == dlm->group_index ? "master" :
+				  lock->ml.node == dlm->node_num ? "master" :
 				  "remote");
 			memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
 		} else if (lksb->flags & DLM_LKSB_PUT_LVB) {
 			dlmprintk("setting lvb from lockres for %s node\n",
-				  lock->ml.node == dlm->group_index ? "master" :
+				  lock->ml.node == dlm->node_num ? "master" :
 				  "remote");
 			memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
 		}
@@ -136,7 +135,7 @@
 	DLM_ASSERT(lksb);
 	fn = lock->ast;
 	DLM_ASSERT(fn);
-	DLM_ASSERT(lock->ml.node == dlm->group_index);
+	DLM_ASSERT(lock->ml.node == dlm->node_num);
 
 	dlm_update_lvb(dlm, res, lock);
 	(*fn)(lock->astdata);
@@ -155,7 +154,7 @@
 	DLM_ASSERT(res);
 	lksb = lock->lksb;
 	DLM_ASSERT(lksb);
-	DLM_ASSERT(lock->ml.node != dlm->group_index);
+	DLM_ASSERT(lock->ml.node != dlm->node_num);
 
 	ret = 0;
 	dlm_update_lvb(dlm, res, lock);
@@ -172,7 +171,7 @@
 	dlm_bastlockfunc_t *fn = lock->bast;
 	dlmprintk0("\n");
 
-	DLM_ASSERT(lock->ml.node == dlm->group_index);
+	DLM_ASSERT(lock->ml.node == dlm->node_num);
 	DLM_ASSERT(fn);
 	
 	(*fn)(lock->astdata, blocked_type);
@@ -242,7 +241,7 @@
 	}
 
 	/* cannot get a proxy ast message if this node owns it */
-	DLM_ASSERT(res->owner != dlm->group_index);
+	DLM_ASSERT(res->owner != dlm->node_num);
 
 	dlmprintk("lockres %.*s\n", res->lockname.len, res->lockname.name);
 	if (!dlm_is_recovery_lock(past->name, past->namelen))
@@ -329,7 +328,6 @@
 {
 	int ret = 0;
 	dlm_proxy_ast past;
-	struct inode *inode = NULL;
 	struct iovec iov[2];
 	size_t iovlen = 1;
 
@@ -338,7 +336,7 @@
 		  msg_type, blocked_type);
 
 	memset(&past, 0, sizeof(dlm_proxy_ast));
-	past.node_idx = dlm->group_index;
+	past.node_idx = dlm->node_num;
 	past.type = msg_type;
 	past.blocked_type = blocked_type;
 	past.namelen = res->lockname.len;
@@ -355,14 +353,9 @@
 		iovlen++;
 	}
 
-	ret = -EINVAL;
-	inode = nm_get_group_node_by_index(dlm->group, lock->ml.node);
-	if (inode) {
-		dlm_proxy_ast_to_net(&past);
-		ret = net_send_message_iov(DLM_PROXY_AST_MSG, dlm->key, 
-					   iov, iovlen, inode, NULL);
-		iput(inode);
-	}
+	dlm_proxy_ast_to_net(&past);
+	ret = net_send_message_iov(DLM_PROXY_AST_MSG, dlm->key, iov, iovlen,
+				   lock->ml.node, NULL);
 	if (ret < 0)
 		dlmprintk("(%d) dlm_send_proxy_ast: returning %d\n", 
 			  current->pid, ret);

Modified: trunk/fs/ocfs2/dlm/dlmconvert.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmconvert.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmconvert.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -42,7 +42,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -189,7 +188,7 @@
 		  res->lockname.name, dlm_lock_mode_name(type));
 	/* immediately grant the new lock type */
 	lock->lksb->status = DLM_NORMAL;
-	if (lock->ml.node == dlm->group_index)
+	if (lock->ml.node == dlm->node_num)
 		dlmprintk0("doing in-place convert for nonlocal lock\n");
 	lock->ml.type = type;
 	status = DLM_NORMAL;
@@ -306,7 +305,6 @@
 					   dlm_lock_resource *res, 
 					   dlm_lock *lock, int flags, int type)
 {
-	struct inode *inode = NULL;
 	dlm_convert_lock convert;
 	int tmpret;
 	dlm_status ret;
@@ -317,7 +315,7 @@
 	dlmprintk0("\n");
 
 	memset(&convert, 0, sizeof(dlm_convert_lock));
-	convert.node_idx = dlm->group_index;
+	convert.node_idx = dlm->node_num;
 	convert.requested_type = type;
 	convert.cookie = lock->ml.cookie;
 	convert.namelen = res->lockname.len;
@@ -334,21 +332,16 @@
 		iovlen++;
 	}
 
-	ret = DLM_NOLOCKMGR;
-	inode = nm_get_group_node_by_index(dlm->group, res->owner);
-	if (inode) {
-		dlm_convert_lock_to_net(&convert);
-		tmpret = net_send_message_iov(DLM_CONVERT_LOCK_MSG, dlm->key, 
-					      iov, iovlen, inode, &status);
-		if (tmpret >= 0) {
-			// successfully sent and received
-			ret = status;  // this is already a dlm_status
-		} else {
-			dlmprintk("error occurred in net_send_message: %d\n", 
-				  tmpret);
-			ret = dlm_err_to_dlm_status(tmpret);
-		}
-		iput(inode);
+	dlm_convert_lock_to_net(&convert);
+	tmpret = net_send_message_iov(DLM_CONVERT_LOCK_MSG, dlm->key, 
+				      iov, iovlen, res->owner, &status);
+	if (tmpret >= 0) {
+		// successfully sent and received
+		ret = status;  // this is already a dlm_status
+	} else {
+		dlmprintk("error occurred in net_send_message: %d\n", 
+			  tmpret);
+		ret = dlm_err_to_dlm_status(tmpret);
 	}
 
 	return ret;

Modified: trunk/fs/ocfs2/dlm/dlmfs.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmfs.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmfs.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -40,7 +40,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/nodemanager.h"
 #include "cluster/heartbeat.h"
 #include "cluster/tcp.h"

Modified: trunk/fs/ocfs2/dlm/dlmfs_compat.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmfs_compat.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmfs_compat.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -5,7 +5,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
 

Modified: trunk/fs/ocfs2/dlm/dlmlock.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmlock.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmlock.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -42,7 +42,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -197,7 +196,6 @@
 					       dlm_lock_resource *res, 
 					       dlm_lock *lock, int flags)
 {
-	struct inode *inode = NULL;
 	dlm_create_lock create;
 	int tmpret, status = 0;
 	dlm_status ret;
@@ -205,29 +203,23 @@
 	dlmprintk0("\n");
 
 	memset(&create, 0, sizeof(create));
-	create.node_idx = dlm->group_index;
+	create.node_idx = dlm->node_num;
 	create.requested_type = lock->ml.type;
 	create.cookie = lock->ml.cookie;
 	create.namelen = res->lockname.len;
 	create.flags = flags;
 	strncpy(create.name, res->lockname.name, create.namelen);
 
-	ret = DLM_NOLOCKMGR;
-	inode = nm_get_group_node_by_index(dlm->group, res->owner);
-	if (inode) {
-		dlm_create_lock_to_net(&create);
-		tmpret = net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, 
-					  &create, sizeof(create), 
-					  inode, &status);
-		if (tmpret >= 0) {
-			// successfully sent and received
-			ret = status;  // this is already a dlm_status
-		} else {
-			dlmprintk("error occurred in net_send_message: %d\n", 
-				  tmpret);
-			ret = dlm_err_to_dlm_status(tmpret);
-		}
-		iput(inode);
+	dlm_create_lock_to_net(&create);
+	tmpret = net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create,
+				  sizeof(create), res->owner, &status);
+	if (tmpret >= 0) {
+		// successfully sent and received
+		ret = status;  // this is already a dlm_status
+	} else {
+		dlmprintk("error occurred in net_send_message: %d\n", 
+			  tmpret);
+		ret = dlm_err_to_dlm_status(tmpret);
 	}
 
 	return ret;

Modified: trunk/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmaster.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmmaster.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -42,7 +42,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -243,8 +242,8 @@
 	/* copy off the node_map and register hb callbacks on our copy */
 	memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
 	memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
-	clear_bit(dlm->group_index, mle->vote_map);
-	clear_bit(dlm->group_index, mle->node_map);
+	clear_bit(dlm->node_num, mle->vote_map);
+	clear_bit(dlm->node_num, mle->node_map);
 
 	/* attach the mle to the domain node up/down events */
 	__dlm_mle_attach_hb_events(dlm, mle);
@@ -273,14 +272,11 @@
 
 
 void dlm_mle_node_down(dlm_ctxt *dlm, dlm_master_list_entry *mle,
-		       struct inode *group, struct inode *node, int idx)
+		       struct nm_node *node, int idx)
 {
 	DLM_ASSERT(mle);
 	DLM_ASSERT(dlm);
 
-	if (dlm->group != group)
-		return;
-
 	spin_lock(&mle->spinlock);
 
 	if (!test_bit(idx, mle->node_map))
@@ -298,14 +294,11 @@
 }
 
 void dlm_mle_node_up(dlm_ctxt *dlm, dlm_master_list_entry *mle,
-		       struct inode *group, struct inode *node, int idx)
+		     struct nm_node *node, int idx)
 {
 	DLM_ASSERT(mle);
 	DLM_ASSERT(dlm);
 
-	if (dlm->group != group)
-		return;
-
 	spin_lock(&mle->spinlock);
 
 #if 0	
@@ -365,7 +358,7 @@
 {
 	assert_spin_locked(&res->spinlock);
 
-	if (owner == dlm->group_index)
+	if (owner == dlm->node_num)
 		atomic_inc(&dlm->local_resources);
 	else if (owner == DLM_LOCK_RES_OWNER_UNKNOWN)
 		atomic_inc(&dlm->unknown_resources);
@@ -384,7 +377,7 @@
 	if (owner == res->owner)
 		return;
 
-	if (res->owner == dlm->group_index)
+	if (res->owner == dlm->node_num)
 		atomic_dec(&dlm->local_resources);
 	else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN)
 		atomic_dec(&dlm->unknown_resources);
@@ -590,7 +583,7 @@
 		/* caller knows it's safe to assume it's not mastered elsewhere
 		 * DONE!  return right away */
 		spin_lock(&res->spinlock);
-		dlm_change_lockres_owner(dlm, res, dlm->group_index);
+		dlm_change_lockres_owner(dlm, res, dlm->node_num);
 		__dlm_insert_lock(dlm, res);
 		spin_unlock(&res->spinlock);
 		spin_unlock(&dlm->spinlock);
@@ -730,11 +723,11 @@
 		/* have all nodes responded? */
 		if (voting_done) {
 			bit = find_next_bit(mle->maybe_map, NM_MAX_NODES, 0);
-			if (dlm->group_index <= bit) {
+			if (dlm->node_num <= bit) {
 				/* my node number is lowest.
 			 	 * now tell other nodes that I am 
 				 * mastering this. */
-				mle->master = dlm->group_index;
+				mle->master = dlm->node_num;
 				assert = 1;
 				sleep = 0;
 			}
@@ -766,7 +759,7 @@
 
 	ret = 0;   /* done */	
 	if (assert) {
-		m = dlm->group_index;
+		m = dlm->node_num;
 		ret = dlm_do_assert_master(dlm, res->lockname.name, 
 					   res->lockname.len, mle->vote_map);
 		if (ret) {
@@ -809,13 +802,12 @@
 
 static int dlm_do_master_request(dlm_master_list_entry *mle, int to)
 {
-	struct inode *inode = NULL;
 	dlm_ctxt *dlm = mle->dlm;
 	dlm_master_request request;
 	int ret, response=0;
 
 	memset(&request, 0, sizeof(request));
-	request.node_idx = dlm->group_index;
+	request.node_idx = dlm->node_num;
 	if (mle->type == DLM_MLE_BLOCK) {
 		request.namelen = mle->u.name.len;
 		strncpy(request.name, mle->u.name.name, request.namelen);
@@ -825,53 +817,47 @@
 			request.namelen);
 	}
 
-	ret = -EINVAL;
-	inode = nm_get_group_node_by_index(dlm->group, to);
-	if (inode) {
-		dlm_master_request_to_net(&request);
-		ret = net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, 
-				       &request, sizeof(request), 
-				       inode, &response);
-		iput(inode);
-		if (ret >= 0) {
-			spin_lock(&mle->spinlock);
-			switch (response) {
-				case DLM_MASTER_RESP_YES:
-					set_bit(to, mle->response_map);
-					// dlmprintk("woot!  node %u is the "
-					// "master!\n", to);
-					mle->master = to;
-					break;
-				case DLM_MASTER_RESP_NO:
-					// dlmprintk("node %u is not the "
-					// "master, not in-progress\n", to);
-					set_bit(to, mle->response_map);
-					break;
-				case DLM_MASTER_RESP_MAYBE:
-					// dlmprintk("node %u is not the "
-					// "master, but IS in-progress\n", to);
-					set_bit(to, mle->response_map);
-					set_bit(to, mle->maybe_map);
-					break;
-				case DLM_MASTER_RESP_ERROR:
-					dlmprintk("node %u hit an -ENOMEM!  "
-						  "try everything again\n", to);
-					mle->error = 1;
-					break;
-				default:
-					dlmprintk("bad response! %u\n", 
-						  response);
-					ret = -EINVAL;
-					break;
-			}
-			spin_unlock(&mle->spinlock);
-		} else {
-			dlmprintk("net_send_message returned %d!\n", ret);
-		}
-	} else {
-		dlmprintk("nm_get_group_node_by_index failed to find inode "
-			  "for node %d!\n", to);
-	}	
+	dlm_master_request_to_net(&request);
+	ret = net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request,
+			       sizeof(request), to, &response);
+	if (ret < 0)  {
+		dlmprintk("net_send_message returned %d!\n", ret);
+		goto out;
+	}
+
+	spin_lock(&mle->spinlock);
+	switch (response) {
+		case DLM_MASTER_RESP_YES:
+			set_bit(to, mle->response_map);
+			// dlmprintk("woot!  node %u is the "
+			// "master!\n", to);
+			mle->master = to;
+			break;
+		case DLM_MASTER_RESP_NO:
+			// dlmprintk("node %u is not the "
+			// "master, not in-progress\n", to);
+			set_bit(to, mle->response_map);
+			break;
+		case DLM_MASTER_RESP_MAYBE:
+			// dlmprintk("node %u is not the "
+			// "master, but IS in-progress\n", to);
+			set_bit(to, mle->response_map);
+			set_bit(to, mle->maybe_map);
+			break;
+		case DLM_MASTER_RESP_ERROR:
+			dlmprintk("node %u hit an -ENOMEM!  "
+				  "try everything again\n", to);
+			mle->error = 1;
+			break;
+		default:
+			dlmprintk("bad response! %u\n", 
+				  response);
+			ret = -EINVAL;
+			break;
+	}
+	spin_unlock(&mle->spinlock);
+
+out:
 	return ret;
 }
 
@@ -923,16 +909,16 @@
 		 * the node that called us */
 		memcpy(nodemap, dlm->domain_map, sizeof(nodemap));
 		clear_bit(request->node_idx, nodemap);
-		clear_bit(dlm->group_index, nodemap);
+		clear_bit(dlm->node_num, nodemap);
 		while ((bit = find_next_bit(nodemap, NM_MAX_NODES,
-				    dlm->group_index)) < NM_MAX_NODES) {
+				    dlm->node_num)) < NM_MAX_NODES) {
 			clear_bit(bit, nodemap);
 		}
 		spin_unlock(&dlm->spinlock);
 
 		/* take care of the easy cases up front */
 		spin_lock(&res->spinlock);
-		if (res->owner == dlm->group_index) {
+		if (res->owner == dlm->node_num) {
 			spin_unlock(&res->spinlock);
 			// dlmprintk0("this node is the master\n");
 			response = DLM_MASTER_RESP_YES;
@@ -1067,7 +1053,6 @@
 int dlm_do_assert_master(dlm_ctxt *dlm, const char *lockname, 
 			 unsigned int namelen, void *nodemap)
 {
-	struct inode *inode = NULL;
 	dlm_assert_master assert;
 	int to, tmpret;
 	dlm_node_iter iter;
@@ -1083,24 +1068,13 @@
 	while ((to = dlm_node_iter_next(&iter)) >= 0) {
 		// dlmprintk("sending assert master to %d\n", to);
 		memset(&assert, 0, sizeof(assert));
-		assert.node_idx = dlm->group_index;
+		assert.node_idx = dlm->node_num;
 		assert.namelen = namelen;
 		strncpy(assert.name, lockname, namelen);
 
-		inode = nm_get_group_node_by_index(dlm->group, to);
-		if (!inode) {
-			tmpret = -EINVAL;
-			dlmprintk("could not get nm info for node %d!  "
-				  "need to retry this whole thing\n", to);
-			ret = tmpret;
-			break;
-		}
-
 		dlm_assert_master_to_net(&assert);
 		tmpret = net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 
-					  &assert, sizeof(assert), inode, NULL);
-		iput(inode);
-
+					  &assert, sizeof(assert), to, NULL);
 		if (tmpret < 0) {
 			// TODO
 			// dlmprintk("assert_master returned %d!\n", tmpret);

Modified: trunk/fs/ocfs2/dlm/dlmmod.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmod.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmmod.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -43,7 +43,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -82,7 +81,6 @@
 LIST_HEAD(dlm_domains);
 spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED;
 DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
-u8 dlm_global_index = NM_MAX_NODES;
 static spinlock_t dlm_cookie_lock = SPIN_LOCK_UNLOCKED;
 static u64 dlm_next_cookie = 1;
 
@@ -196,10 +194,6 @@
 	if (status < 0)
 		return -1;
 
-	dlm_global_index = nm_this_node(NULL);
-	if (dlm_global_index == NM_MAX_NODES)
-		return -1;
-
 	status = dlm_register_net_handlers();
 	if (status)
 		return -1;
@@ -307,7 +301,7 @@
 		res = dlm_lockres_grab(dlm, lock->lockres);
 		down_read(&dlm->recovery_sem);
 
-		if (res->owner == dlm->group_index)
+		if (res->owner == dlm->node_num)
 			status = dlmconvert_master(dlm, res, lock, flags, mode);
 		else 
 			status = dlmconvert_remote(dlm, res, lock, flags, mode);
@@ -350,7 +344,7 @@
 		lock->ml.type = mode;
 		lock->ml.convert_type = LKM_IVMODE;
 		lock->ml.highest_blocked = LKM_IVMODE;
-		lock->ml.node = dlm->group_index;
+		lock->ml.node = dlm->node_num;
 		lock->ast = ast;
 		lock->bast = bast;
 		lock->astdata = data;
@@ -371,7 +365,7 @@
 			}
 		}
 
-		if (res->owner == dlm->group_index)
+		if (res->owner == dlm->node_num)
 			status = dlmlock_master(dlm, res, lock, flags);
 		else 
 			status = dlmlock_remote(dlm, res, lock, flags);
@@ -439,7 +433,7 @@
 	DLM_ASSERT(res);
 	dlmprintk("lock=%p res=%p\n", lock, res);
 
-	if (res->owner == dlm->group_index) {
+	if (res->owner == dlm->node_num) {
 		status = dlmunlock_master(dlm, res, lock, lksb, flags, 
 					  &call_ast);
 		dlmprintk("done calling dlmunlock_master: returned %d, "
@@ -597,9 +591,6 @@
 	if (dlm->name)
 		kfree(dlm->name);
 
-	if (dlm->group)
-		iput(dlm->group);
-
 	kfree(dlm);
 }
 
@@ -783,31 +774,22 @@
 				    unsigned int node)
 {
 	int status;
-	struct inode *node_inode;
 	dlm_exit_domain leave_msg;
 
 	dlmprintk("Asking node %u if we can leave the domain %s me = %u\n",
-		  node, dlm->name, dlm->group_index);
+		  node, dlm->name, dlm->node_num);
 
-	node_inode = nm_get_group_node_by_index(dlm->group, node);
-	if (!node_inode) {
-		status = -EINVAL;
-		dlmprintk("Could not get inode for node %u!\n", node);
-		goto bail;
-	}
-
 	memset(&leave_msg, 0, sizeof(leave_msg));
-	leave_msg.node_idx = dlm->group_index;
+	leave_msg.node_idx = dlm->node_num;
 
 	dlm_exit_domin_to_net(&leave_msg);
 
 	status = net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
-				  &leave_msg, sizeof(leave_msg), node_inode,
+				  &leave_msg, sizeof(leave_msg), node,
 				  NULL);
-	iput(node_inode);
+
 	dlmprintk("status return %d from net_send_message\n", status);
 
-bail:
 	return status;
 }
 
@@ -822,7 +804,7 @@
 
 	spin_lock(&dlm->spinlock);
 	/* Clear ourselves from the domain map */
-	clear_bit(dlm->group_index, dlm->domain_map);
+	clear_bit(dlm->node_num, dlm->domain_map);
 	while ((node = find_next_bit(dlm->domain_map, NM_MAX_NODES, 0)) 
 	       != -1) {
 		if (node > NM_MAX_NODES)
@@ -1026,27 +1008,18 @@
 				    unsigned int node)
 {
 	int status;
-	struct inode *node_inode;
 	dlm_cancel_join cancel_msg;
 
-	node_inode = nm_get_group_node_by_index(dlm->group, node);
-	if (!node_inode) {
-		status = -EINVAL;
-		dlmprintk("Could not get inode for node %u!\n", node);
-		goto bail;
-	}
-
 	memset(&cancel_msg, 0, sizeof(cancel_msg));
-	cancel_msg.node_idx = dlm->group_index;
+	cancel_msg.node_idx = dlm->node_num;
 	cancel_msg.name_len = strlen(dlm->name);
 	strncpy(cancel_msg.domain, dlm->name, cancel_msg.name_len);
 
 	dlm_cancel_join_to_net(&cancel_msg);
 
 	status = net_send_message(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
-				  &cancel_msg, sizeof(cancel_msg), node_inode,
+				  &cancel_msg, sizeof(cancel_msg), node,
 				  NULL);
-	iput(node_inode);
 	if (status < 0) {
 		dlmprintk("net_send_message returned %d!\n", status);
 		goto bail;
@@ -1074,7 +1047,7 @@
 		if (node >= NM_MAX_NODES)
 			break;
 
-		if (node == dlm->group_index)
+		if (node == dlm->node_num)
 			continue;
 
 		tmpstat = dlm_send_one_join_cancel(dlm, node);
@@ -1095,27 +1068,18 @@
 {
 	int status, retval;
 	dlm_query_join_request join_msg;
-	struct inode *node_inode;
 
 	dlmprintk("querying node %d\n", node);
 
-	node_inode = nm_get_group_node_by_index(dlm->group, node);
-	if (!node_inode) {
-		status = -EINVAL;
-		dlmprintk("Could not get inode for node %u!\n", node);
-		goto bail;
-	}
-
 	memset(&join_msg, 0, sizeof(join_msg));
-	join_msg.node_idx = dlm->group_index;
+	join_msg.node_idx = dlm->node_num;
 	join_msg.name_len = strlen(dlm->name);
 	strncpy(join_msg.domain, dlm->name, join_msg.name_len);
 
 	dlm_query_join_request_to_net(&join_msg);
 
 	status = net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
-				  sizeof(join_msg), node_inode, &retval);
-	iput(node_inode);
+				  sizeof(join_msg), node, &retval);
 	if (status < 0 && status != -ENOPROTOOPT && status != -ENOTCONN) {
 		dlmprintk("net_send_message returned %d!\n", status);
 		goto bail;
@@ -1152,33 +1116,22 @@
 				    unsigned int node)
 {
 	int status;
-	struct inode *node_inode;
 	dlm_assert_joined assert_msg;
 
 	dlmprintk("Sending join assert to node %u\n", node);
 
-	node_inode = nm_get_group_node_by_index(dlm->group, node);
-	if (!node_inode) {
-		status = -EINVAL;
-		dlmprintk("Could not get inode for node %u!\n", node);
-		goto bail;
-	}
-
 	memset(&assert_msg, 0, sizeof(assert_msg));
-	assert_msg.node_idx = dlm->group_index;
+	assert_msg.node_idx = dlm->node_num;
 	assert_msg.name_len = strlen(dlm->name);
 	strncpy(assert_msg.domain, dlm->name, assert_msg.name_len);
 
 	dlm_assert_joined_to_net(&assert_msg);
 
 	status = net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
-				  &assert_msg, sizeof(assert_msg), node_inode,
-				  NULL);
-	iput(node_inode);
+				  &assert_msg, sizeof(assert_msg), node, NULL);
 	if (status < 0)
 		dlmprintk("net_send_message returned %d!\n", status);
 
-bail:
 	return status;
 }
 
@@ -1194,7 +1147,7 @@
 		if (node >= NM_MAX_NODES)
 			break;
 
-		if (node == dlm->group_index)
+		if (node == dlm->node_num)
 			continue;
 
 		do {
@@ -1250,7 +1203,7 @@
 
 static int dlm_try_to_join_domain(dlm_ctxt *dlm)
 {
-	int status, tmpstat, node;
+	int status = 0, tmpstat, node;
 	struct domain_join_ctxt *ctxt;
 	enum dlm_query_join_response response;
 
@@ -1265,17 +1218,12 @@
 	/* group sem locking should work for us here -- we're already
 	 * registered for heartbeat events so filling this should be
 	 * atomic wrt getting those handlers called. */
-	status = hb_fill_node_map(dlm->group, dlm->live_nodes_map,
-				  sizeof(dlm->live_nodes_map));
-	if (status < 0) {
-		dlmprintk("I couldn't fill my node map!\n");
-		goto bail;
-	}
+	hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map));
 
 	spin_lock(&dlm->spinlock);
 	memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map));
 
-	__dlm_set_joining_node(dlm, dlm->group_index);
+	__dlm_set_joining_node(dlm, dlm->node_num);
 
 	spin_unlock(&dlm->spinlock);
 
@@ -1285,7 +1233,7 @@
 		if (node >= NM_MAX_NODES)
 			break;
 
-		if (node == dlm->group_index)
+		if (node == dlm->node_num)
 			continue;
 
 		status = dlm_request_join(dlm, node, &response);
@@ -1314,7 +1262,7 @@
 	spin_lock(&dlm->spinlock);
 	memcpy(dlm->domain_map, ctxt->yes_resp_map,
 	       sizeof(ctxt->yes_resp_map));
-	set_bit(dlm->group_index, dlm->domain_map);
+	set_bit(dlm->node_num, dlm->domain_map);
 	spin_unlock(&dlm->spinlock);
 
 	dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
@@ -1467,17 +1415,11 @@
 }
 
 static dlm_ctxt *dlm_alloc_ctxt(const char *domain,
-				struct inode *group,
 				u32 key)
 {
 	int i;
 	dlm_ctxt *dlm = NULL;
 
-	/* if for some reason we can't get a reference on the group
-	 * inode (required) then don't even try the rest. */
-	if (!igrab(group))
-		goto leave;
-
 	dlm = kmalloc(sizeof(dlm_ctxt), GFP_KERNEL);
 	if (!dlm) {
 		dlmprintk0("could not allocate dlm_ctxt\n");
@@ -1508,6 +1450,7 @@
 
 	strcpy(dlm->name, domain);
 	dlm->key = key;
+	dlm->node_num = nm_this_node();
 
 	spin_lock_init(&dlm->spinlock);
 	spin_lock_init(&dlm->master_lock);
@@ -1526,10 +1469,6 @@
 	INIT_LIST_HEAD(&dlm->mle_hb_events);
 	init_rwsem(&dlm->recovery_sem);
 
-	/* this eats the reference we got above. */
-	dlm->group = group;
-	dlm->group_index = nm_this_node(group);
-
 	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
 	init_waitqueue_head(&dlm->dlm_join_events);
 
@@ -1555,23 +1494,19 @@
  * dlm_register_domain: one-time setup per "domain"
  */
 dlm_ctxt * dlm_register_domain(const char *domain,
-			       const char *group_name,
 			       u32 key)
 {
 	int ret;
 	dlm_ctxt *dlm = NULL;
 	dlm_ctxt *new_ctxt = NULL;
-	struct inode *group = NULL;
 
 	if (strlen(domain) > NM_MAX_NAME_LEN) {
 		dlmprintk0("domain name length too long\n");
 		goto leave;
 	}
 
-	group = nm_get_group_by_name(group_name);
-	if (!group) {
-		dlmprintk("no nm group %s for domain %s!\n", 
-			  group_name, domain);
+	if (nm_this_node() == NM_MAX_NODES) {
+		dlmprintk0("a local node has not been configured\n");
 		goto leave;
 	}
 
@@ -1607,7 +1542,7 @@
 	if (!new_ctxt) {
 		spin_unlock(&dlm_domain_lock);
 
-		new_ctxt = dlm_alloc_ctxt(domain, group, key);
+		new_ctxt = dlm_alloc_ctxt(domain, key);
 		if (new_ctxt)
 			goto retry;
 		goto leave;
@@ -1632,9 +1567,6 @@
 	if (new_ctxt)
 		dlm_free_ctxt_mem(new_ctxt);
 
-	if (group)
-		iput(group);
-
 	return dlm;
 }
 EXPORT_SYMBOL(dlm_register_domain);
@@ -1682,8 +1614,8 @@
 	struct list_head *bucket;
 	int i;
 
-	printk("dlm_ctxt: %s, group=%u, key=%u\n", 
-		  dlm->name, dlm->group_index, dlm->key);
+	printk("dlm_ctxt: %s, node=%u, key=%u\n", 
+		  dlm->name, dlm->node_num, dlm->key);
 	printk("some bug here... should not have to check for this...\n");
 	if (!dlm || !dlm->name) {
 		printk("wtf... dlm=%p\n", dlm);

Modified: trunk/fs/ocfs2/dlm/dlmmod.h
===================================================================
--- trunk/fs/ocfs2/dlm/dlmmod.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmmod.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -213,9 +213,8 @@
 	spinlock_t spinlock;
 	struct rw_semaphore recovery_sem;
 	char *name;
-	struct inode *group;
+	u8 node_num;
 	u32 key;
-	u8  group_index;
 	u8  joining_node;
 	wait_queue_head_t dlm_join_events;
 	unsigned long live_nodes_map[BITS_TO_LONGS(NM_MAX_NODES)];
@@ -718,7 +717,6 @@
 }
 
 dlm_ctxt * dlm_register_domain(const char *domain,
-			       const char *group_name,
 			       u32 key);
 void dlm_unregister_domain(dlm_ctxt *dlm);
 void dlm_get(dlm_ctxt *dlm);
@@ -783,8 +781,8 @@
 
 int dlm_nm_init(dlm_ctxt *dlm);
 int dlm_heartbeat_init(dlm_ctxt *dlm);
-void dlm_hb_node_down_cb(struct inode *group, struct inode *node, int idx, void *data);
-void dlm_hb_node_up_cb(struct inode *group, struct inode *node, int idx, void *data);
+void dlm_hb_node_down_cb(struct nm_node *node, int idx, void *data);
+void dlm_hb_node_up_cb(struct nm_node *node, int idx, void *data);
 int dlm_hb_node_dead(dlm_ctxt *dlm, int node);
 int __dlm_hb_node_dead(dlm_ctxt *dlm, int node);
 
@@ -806,9 +804,9 @@
 void dlm_init_lock(dlm_lock *newlock, int type, u8 node, u64 cookie);
 
 void dlm_mle_node_down(dlm_ctxt *dlm, dlm_master_list_entry *mle,
-		       struct inode *group, struct inode *node, int idx);
+		       struct nm_node *node, int idx);
 void dlm_mle_node_up(dlm_ctxt *dlm, dlm_master_list_entry *mle,
-		       struct inode *group, struct inode *node, int idx);
+		       struct nm_node *node, int idx);
 int dlm_do_assert_master(dlm_ctxt *dlm, const char *lockname, 
 			 unsigned int namelen, void *nodemap);
 

Modified: trunk/fs/ocfs2/dlm/dlmrecovery.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmrecovery.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -41,7 +41,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -84,7 +83,7 @@
 				res->state |= DLM_LOCK_RES_RECOVERING;
 				list_del(&res->recovering);
 				list_add_tail(&res->recovering, &dlm->reco.resources);
-			} else if (res->owner == dlm->group_index) {
+			} else if (res->owner == dlm->node_num) {
 				list_for_each_safe(iter2, tmpiter, &res->granted) {
 					lock = list_entry (iter2, dlm_lock, list);
 					if (lock->ml.node == dead_node) {
@@ -114,7 +113,7 @@
 }
 
 
-void dlm_hb_node_down_cb(struct inode *group, struct inode *node, int idx, void *data)
+void dlm_hb_node_down_cb(struct nm_node *node, int idx, void *data)
 {
 	dlm_ctxt *dlm = data;
 	dlm_master_list_entry *mle;
@@ -136,7 +135,7 @@
 	/* notify any mles attached to the heartbeat events */
 	list_for_each(iter, &dlm->mle_hb_events) {
 		mle = list_entry(iter, dlm_master_list_entry, hb_events);
-		dlm_mle_node_down(dlm, mle, group, node, idx);
+		dlm_mle_node_down(dlm, mle, node, idx);
 	}
 
 	if (!test_bit(idx, dlm->domain_map)) {
@@ -162,7 +161,7 @@
 	dlm_put(dlm);
 }
 
-void dlm_hb_node_up_cb(struct inode *group, struct inode *node, int idx, void *data)
+void dlm_hb_node_up_cb(struct nm_node *node, int idx, void *data)
 {
 	dlm_ctxt *dlm = data;
 	dlm_master_list_entry *mle;
@@ -178,7 +177,7 @@
 	/* notify any mles attached to the heartbeat events */
 	list_for_each(iter, &dlm->mle_hb_events) {
 		mle = list_entry(iter, dlm_master_list_entry, hb_events);
-		dlm_mle_node_up(dlm, mle, group, node, idx);
+		dlm_mle_node_up(dlm, mle, node, idx);
 	}
 
 	spin_unlock(&dlm->spinlock);

Modified: trunk/fs/ocfs2/dlm/dlmthread.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmthread.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmthread.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -44,7 +44,6 @@
 #include "util.h"
 
 #include "cluster/cl_compat.h"
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -54,7 +53,6 @@
 
 extern spinlock_t dlm_domain_lock;
 extern struct list_head dlm_domains;
-extern u8 dlm_global_index;
 
 static int dlm_thread(void *data);
 struct task_struct *dlm_thread_task;
@@ -119,7 +117,7 @@
 	/* Since we can't migrate locks yet, for now we only handle
 	 * non locally mastered locks. */
 	spin_lock(&lockres->spinlock);
-	master = lockres->owner == dlm->group_index;
+	master = lockres->owner == dlm->node_num;
 	spin_unlock(&lockres->spinlock);
 
 	dlmprintk("purging lockres %.*s, master = %d\n", lockres->lockname.len,
@@ -185,9 +183,8 @@
 void dlm_shuffle_lists(dlm_ctxt *dlm, dlm_lock_resource *res)
 {
 	dlm_lock *lock, *target;
-	struct list_head *iter, *tmpiter;
+	struct list_head *iter;
 	struct list_head *head;
-	s8 hi;
 	int can_grant = 1;
 
 	dlmprintk("shuffle res %.*s\n", res->lockname.len, res->lockname.name);
@@ -329,7 +326,7 @@
 		spin_lock(&res->spinlock);
 
 		/* don't shuffle secondary queues */
-		if ((res->owner == dlm->group_index) &&
+		if ((res->owner == dlm->node_num) &&
 		    !(res->state & DLM_LOCK_RES_DIRTY)) {
 			list_add_tail(&res->dirty, &dlm->dirty_list);
 			res->state |= DLM_LOCK_RES_DIRTY;
@@ -400,7 +397,7 @@
 		dlmprintk0("delivering an ast for this lockres\n");
 
 		list_del_init(&lock->ast_list);
-		if (lock->ml.node != dlm->group_index) {
+		if (lock->ml.node != dlm->node_num) {
 			if (dlm_do_remote_ast(dlm, res, lock) < 0)
 				dlmprintk("eek\n");
 		} else
@@ -422,7 +419,7 @@
 
 		dlmprintk("delivering a bast for this lockres "
 			  "(blocked = %d\n", hi);
-		if (lock->ml.node != dlm->group_index) {
+		if (lock->ml.node != dlm->node_num) {
 			if (dlm_send_proxy_bast(dlm, res, lock, hi) < 0)
 				dlmprintk0("eeek\n");
 		} else
@@ -453,7 +450,7 @@
 			spin_lock(&res->spinlock);
 			list_del_init(&res->dirty);
 			res->state &= ~DLM_LOCK_RES_DIRTY;
-			BUG_ON(res->owner != dlm->group_index);
+			BUG_ON(res->owner != dlm->node_num);
 			spin_unlock(&res->spinlock);
 
 			dlm_shuffle_lists(dlm, res);

Modified: trunk/fs/ocfs2/dlm/dlmunlock.c
===================================================================
--- trunk/fs/ocfs2/dlm/dlmunlock.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/dlmunlock.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -42,7 +42,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
@@ -91,9 +90,9 @@
 		  flags & LKM_VALBLK);
 
 	if (master_node)
-		DLM_ASSERT(res->owner == dlm->group_index);
+		DLM_ASSERT(res->owner == dlm->node_num);
 	else
-		DLM_ASSERT(res->owner != dlm->group_index);
+		DLM_ASSERT(res->owner != dlm->node_num);
 
 	spin_lock(&dlm->spinlock);
 	/* We want to be sure that we're not freeing a lock
@@ -202,7 +201,6 @@
 						 dlm_lockstatus *lksb, 
 						 int flags)
 {
-	struct inode *inode = NULL;
 	dlm_unlock_lock unlock;
 	int tmpret;
 	dlm_status ret;
@@ -213,7 +211,7 @@
 	dlmprintk0("\n");
 
 	memset(&unlock, 0, sizeof(unlock));
-	unlock.node_idx = dlm->group_index;
+	unlock.node_idx = dlm->node_num;
 	unlock.flags = flags;
 	unlock.cookie = lock->ml.cookie;
 	unlock.namelen = res->lockname.len;
@@ -229,27 +227,21 @@
 		iovlen++;
 	}
 
-	ret = DLM_NOLOCKMGR;
-	lksb->status = DLM_NOLOCKMGR;
-	inode = nm_get_group_node_by_index(dlm->group, res->owner);
-	if (inode) {
-		dlm_unlock_lock_to_net(&unlock);
-		tmpret = net_send_message_iov(DLM_UNLOCK_LOCK_MSG, dlm->key, 
-					      iov, iovlen, inode, &status);
-		if (tmpret >= 0) {
-			// successfully sent and received
-			if (status == DLM_CANCELGRANT)
-				ret = DLM_NORMAL;
-			else
-				ret = status;
-			lksb->status = status;
-		} else {
-			dlmprintk("error occurred in net_send_message: %d\n",
-				  tmpret);
-			ret = dlm_err_to_dlm_status(tmpret);
-			lksb->status = ret;
-		}
-		iput(inode);
+	dlm_unlock_lock_to_net(&unlock);
+	tmpret = net_send_message_iov(DLM_UNLOCK_LOCK_MSG, dlm->key, 
+				      iov, iovlen, res->owner, &status);
+	if (tmpret >= 0) {
+		// successfully sent and received
+		if (status == DLM_CANCELGRANT)
+			ret = DLM_NORMAL;
+		else
+			ret = status;
+		lksb->status = status;
+	} else {
+		dlmprintk("error occurred in net_send_message: %d\n",
+			  tmpret);
+		ret = dlm_err_to_dlm_status(tmpret);
+		lksb->status = ret;
 	}
 
 	return ret;

Modified: trunk/fs/ocfs2/dlm/userdlm.c
===================================================================
--- trunk/fs/ocfs2/dlm/userdlm.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlm/userdlm.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -37,7 +37,6 @@
 
 #include "util.h"
 
-#include "cluster/clcommon.h"
 #include "cluster/nodemanager.h"
 #include "cluster/heartbeat.h"
 #include "cluster/tcp.h"
@@ -637,7 +636,7 @@
 
 	snprintf(domain, name->len + 1, "%.*s", name->len, name->name);
 
-	dlm = dlm_register_domain(domain, domain, dlm_key);
+	dlm = dlm_register_domain(domain, dlm_key);
 
 	kfree(domain);
 	return dlm;

Modified: trunk/fs/ocfs2/dlmglue.c
===================================================================
--- trunk/fs/ocfs2/dlmglue.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/dlmglue.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -34,7 +34,6 @@
 #include <linux/kthread.h>
 
 #include <cluster/util.h>
-#include <cluster/clcommon.h>
 #include <cluster/heartbeat.h>
 #include <cluster/nodemanager.h>
 #include <cluster/tcp.h>
@@ -1617,10 +1616,10 @@
 
 	/* used by the dlm code to make message headers unique, each
 	 * node in this domain must agree on this. */
-	dlm_key = crc32(0, osb->group_name, strlen(osb->group_name));
+	dlm_key = crc32(0, osb->uuid_str, strlen(osb->uuid_str));
 
-	/* for now, group_name == domain */
-	dlm = dlm_register_domain(osb->group_name, osb->group_name, dlm_key);
+	/* for now, uuid == domain */
+	dlm = dlm_register_domain(osb->uuid_str, dlm_key);
 	if (!dlm) {
 		/* This is a best guess on return value... */
 		status = -ENOMEM;

Modified: trunk/fs/ocfs2/heartbeat.c
===================================================================
--- trunk/fs/ocfs2/heartbeat.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/heartbeat.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -32,8 +32,8 @@
 #include <linux/highmem.h>
 
 #include <cluster/util.h>
-#include <cluster/clcommon.h>
 #include <cluster/heartbeat.h>
+#include <cluster/nodemanager.h>
 #include <dlm/dlmcommon.h>
 
 #include "ocfs_log.h"
@@ -53,6 +53,7 @@
 #define OCFS2_HB_NODE_DOWN_PRI     (0x0000002)
 #define OCFS2_HB_NODE_UP_PRI	   OCFS2_HB_NODE_DOWN_PRI
 
+#if 0
 static void ocfs2_hb_node_down_cb(struct inode *group,
 				  struct inode *node,
 				  int node_num,
@@ -61,6 +62,7 @@
 				struct inode *node,
 				int node_num,
 				void *data);
+#endif
 
 static inline void __ocfs_node_map_set_bit(ocfs_node_map *map,
 					   int bit);
@@ -79,16 +81,12 @@
 	ocfs_node_map_init(&osb->umount_map);
 }
 
-static void ocfs2_hb_node_down_cb(struct inode *group,
-				  struct inode *node,
+static void ocfs2_hb_node_down_cb(struct nm_node *node,
 				  int node_num,
 				  void *data)
 {
 	ocfs_super *osb = data;
 
-	if (osb->group_inode != group)
-		return;
-
 	OCFS_ASSERT(osb->node_num != node_num);
 
 	printk("ocfs2: node down event for %d\n", node_num);
@@ -104,16 +102,12 @@
 	ocfs_recovery_thread(osb, node_num);
 }
 
-static void ocfs2_hb_node_up_cb(struct inode *group,
-				struct inode *node,
+static void ocfs2_hb_node_up_cb(struct nm_node *node,
 				int node_num,
 				void *data)
 {
 	ocfs_super *osb = data;
 
-	if (osb->group_inode != group)
-		return;
-
 	OCFS_ASSERT(osb->node_num != node_num);
 
 	printk("ocfs2: node up event for %d\n", node_num);

Modified: trunk/fs/ocfs2/ocfs.h
===================================================================
--- trunk/fs/ocfs2/ocfs.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/ocfs.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -43,7 +43,6 @@
 #endif
 
 #include "cluster/util.h"
-#include "cluster/clcommon.h"
 #include "cluster/nodemanager.h"
 #include "cluster/heartbeat.h"
 #include "cluster/tcp.h"
@@ -331,6 +330,7 @@
 	u64 bitmap_blkno;
 	u32 bitmap_cpg;
 	u8 *uuid;
+	char *uuid_str;
 	u8 *vol_label;
 	u64 first_cluster_group_blkno;
 	u32 fs_generation;
@@ -373,8 +373,6 @@
 	ocfs_alloc_stats alloc_stats;
 	char dev_str[20];		/* "major,minor" of the device */
 
-	char *group_name;
-	struct inode *group_inode;
 	dlm_ctxt *dlm;
 	ocfs2_lock_res super_lockres;
 

Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/super.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -43,7 +43,6 @@
 #include <linux/inet.h>
 
 #include <cluster/util.h>
-#include <cluster/clcommon.h>
 #include <cluster/nodemanager.h>
 #include <dlm/dlmcommon.h>
 
@@ -129,12 +128,12 @@
 #endif /* Linux 2.4 stuff */
 
 
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name);
+static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id);
 static int __init ocfs_driver_entry (void);
 static void __exit ocfs_driver_exit (void);
 static void ocfs_put_super (struct super_block *sb);
 static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
-			      char **group_name, struct inode *root);
+			      struct inode *root);
 static void ocfs_dismount_volume(struct super_block *sb);
 static int ocfs_initialize_mem_lists (void);
 static void ocfs_free_mem_lists (void);
@@ -151,7 +150,7 @@
 static int ocfs_init_global_system_inodes(ocfs_super *osb);
 static int ocfs_init_local_system_inodes(ocfs_super *osb);
 static int ocfs_release_system_inodes(ocfs_super *osb);
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name);
+static int ocfs2_fill_local_node_info(ocfs_super *osb);
 static int ocfs2_complete_mount_recovery(ocfs_super *osb);
 static int ocfs_check_volume(ocfs_super * osb);
 static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
@@ -316,12 +315,11 @@
 	u32 uid = current->fsuid;
 	u32 gid = current->fsgid;
 	int reclaim_id;
-	char *group_name = NULL;
 	ocfs_super *osb = NULL;
 
 	LOG_ENTRY_ARGS ("%p, %p, %i", sb, data, silent);
 
-	if (ocfs_parse_options (data, &uid, &gid, &reclaim_id, &group_name) != 0) {
+	if (ocfs_parse_options (data, &uid, &gid, &reclaim_id) != 0) {
 		status = -EINVAL;
 		LOG_ERROR_STR ("ocfs_read_super: bad mount option");
 		goto read_super_error;
@@ -334,7 +332,7 @@
 	/* this is needed to support O_LARGE_FILE */
 	sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE;
 
-	status = ocfs_mount_volume (sb, reclaim_id, &group_name, NULL);
+	status = ocfs_mount_volume (sb, reclaim_id, NULL);
 	if (status < 0)
 		goto read_super_error;
 
@@ -376,9 +374,6 @@
 			LOG_EXIT_STATUS(status);
 	}
 
-	if (group_name)
-		kfree(group_name);
-
 	LOG_EXIT_STATUS(status);
 	return status;		
 
@@ -391,9 +386,6 @@
 	if (inode)
 		iput (inode);
 
-	if (group_name)
-		kfree(group_name);
-
 	LOG_EXIT_STATUS(status);
 	return status;
 }
@@ -430,12 +422,11 @@
  *
  * e.g., gid=9999,uid=9999,[no]cache,reclaimid
  */
-static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id, char **group_name)
+static int ocfs_parse_options (char *options, u32 * uid, u32 * gid, int * reclaim_id)
 {
 	char *c;
 	char *value;
 	int ret = 1;
-	int size;
 
 	LOG_ENTRY ();
 	
@@ -476,24 +467,6 @@
 			}
 		} else if (!strcmp (c, "reclaimid")) {
 			*reclaim_id = 1;
-		} else if (!strcmp(c, "group")) {
-			if (!value || !*value) {
-				LOG_ERROR_STR
-					("group option requires an argument");
-				goto bail;
-			}
-			LOG_TRACE_ARGS("group name passed = %s\n", value);
-
-			size = strlen(value) + 1;
-			*group_name = kmalloc(size, GFP_KERNEL);
-			if (!(*group_name)) {
-				LOG_ERROR_STATUS(-ENOMEM);
-				goto bail;
-			}
-			memset(*group_name, 0, size);
-			printk("ocfs2: group name passed = %s, size = %d\n",
-			       value, size);
-			strcpy(*group_name, value);
 		} else {
 			LOG_ERROR_ARGS ("Invalid mount option: %s", c);
 			goto bail;
@@ -844,44 +817,24 @@
 	return 0;
 }
 
-static int ocfs2_fill_local_node_info(ocfs_super *osb, char **group_name)
+/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
+static int ocfs2_fill_local_node_info(ocfs_super *osb)
 {
-	int status, i;
-	struct inode *group = NULL;
-	char *p;
+	int status;
 
-	if (group_name) {
-		osb->group_name = *group_name;
-		*group_name = NULL;
-	} else {
-		osb->group_name = kmalloc(NM_MAX_NAME_LEN + 1, GFP_KERNEL);
-		if (!osb->group_name) {
-			status = -ENOMEM;
-			LOG_ERROR_STATUS(status);
-			goto bail;
-		}
-		memset(osb->group_name, 0, NM_MAX_NAME_LEN + 1);
-		for (i = 0, p = osb->uuid; i < MAX_VOL_ID_LENGTH; i++, p += 2)
-			sprintf(p, "%02X", osb->uuid[i]);
-	}
-
-	group = nm_get_group_by_name(osb->group_name);
-	if (!group) {
-		printk("ocfs2: could not join group \"%s\"\n",
-		       osb->group_name);
-		status = -EINVAL;
+	/* XXX hold a ref on the node while mounte?  easy enough, if
+	 * desirable. */
+	osb->node_num = nm_this_node();
+	if (osb->node_num == NM_MAX_NODES) {
+		printk("ocfs2: could not find this host's node number\n");
+		status = -ENOENT;
 		goto bail;
 	}
 
-	osb->group_inode = group;
-	osb->node_num = nm_this_node(group);
+	printk("ocfs2: I am node %d\n", osb->node_num);
 
-	printk("ocfs2: I am node %d, a member of group %s\n", osb->node_num,
-	       osb->group_name);
-
 	status = 0;
 bail:
-
 	return status;
 }
 
@@ -890,7 +843,7 @@
  *
  */
 static int ocfs_mount_volume (struct super_block *sb, int reclaim_id,
-			      char **group_name, struct inode *root)
+			      struct inode *root)
 {
 	int status, sector_size;
 	int unlock_super = 0;
@@ -930,7 +883,7 @@
 		goto leave;
 	}
 
-	status = ocfs2_fill_local_node_info(osb, group_name);
+	status = ocfs2_fill_local_node_info(osb);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
@@ -1062,6 +1015,8 @@
 
 	ocfs2_put_slot(osb);
 
+	ocfs_release_system_inodes(osb);
+
 	ocfs2_dlm_shutdown(osb);
 
 	ocfs2_clear_hb_callbacks(osb);
@@ -1072,13 +1027,37 @@
 		MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev),
 		OcfsGlobalCtxt.node_name, osb->node_num);
 
-	ocfs_release_system_inodes(osb);
-
 	ocfs_delete_osb (osb);
 	kfree(osb);
 	sb->s_dev = 0;
 }				/* ocfs_dismount_volume */
 
+static int osb_setup_uuid(ocfs_super *osb, const unsigned char *uuid,
+			  unsigned uuid_bytes)
+{
+	int i, ret;
+	char *ptr;
+
+	BUG_ON(uuid_bytes != MAX_VOL_ID_LENGTH);
+
+	osb->uuid_str = kcalloc(1, MAX_VOL_ID_LENGTH*2 + 1, GFP_KERNEL);
+	if (osb->uuid_str == NULL)
+		return -ENOMEM;
+
+	memcpy(osb->uuid, uuid, MAX_VOL_ID_LENGTH);
+
+	for (i = 0, ptr = osb->uuid_str; i < MAX_VOL_ID_LENGTH; i++) {
+		/* print with null */
+		ret = snprintf(ptr, 3, "%02X", uuid[i]);
+		if (ret != 2) /* drop super cleans up */
+			return -EINVAL;
+		/* then only advace past the last char */
+		ptr += 2;
+	}
+
+	return 0;
+}
+
 /*
  * ocfs_initialize_osb()
  *
@@ -1205,21 +1184,21 @@
 		goto bail;
 	}
 
+	if (osb_setup_uuid(osb, di->id2.i_super.s_uuid,
+			   sizeof(di->id2.i_super.s_uuid))) {
+		LOG_ERROR_ARGS("Out of memory trying to setup our uuid.\n"); 
+		status = -ENOMEM;
+		goto bail;
+	}
+
 	strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
 	osb->vol_label[63] = '\0';
-	memcpy(osb->uuid, di->id2.i_super.s_uuid, MAX_VOL_ID_LENGTH);
 	osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
 	osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
 	osb->first_cluster_group_blkno = le64_to_cpu(di->id2.i_super.s_first_cluster_group);
 	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
 	printk("vol_label: %s\n", osb->vol_label);
-	{
-		int ttt;
-		printk("uuid: ");
-		for (ttt=0; ttt<16; ttt++)
-		printk("%02x ", di->id2.i_super.s_uuid[ttt]);
-		printk("\n");
-	}
+	printk("uuid: %s\n", osb->uuid_str);
 	printk("root_blkno=%llu, system_dir_blkno=%llu\n", osb->root_blkno, osb->system_dir_blkno);
 
 	atomic_set (&osb->vol_state, VOLUME_INIT);
@@ -1500,19 +1479,15 @@
 	if (osb->slot_info)
 		ocfs2_free_slot_info(osb->slot_info);
 
-	if (osb->group_inode)
-		iput(osb->group_inode);
-
 	/* FIXME
 	 * This belongs in journal shutdown, but because we have to
 	 * allocate osb->journal at the start of ocfs_initalize_osb(),
 	 * we free it here.
 	 */
 	kfree(osb->journal);
-	if (osb->group_name)
-		kfree(osb->group_name);
 	if (osb->local_alloc_copy)
 		kfree(osb->local_alloc_copy);
+	kfree(osb->uuid_str);
 	memset (osb, 0, sizeof (ocfs_super));
 
 	LOG_EXIT ();

Modified: trunk/fs/ocfs2/vote.c
===================================================================
--- trunk/fs/ocfs2/vote.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/ocfs2/vote.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -32,7 +32,6 @@
 #include <linux/kthread.h>
 
 #include <cluster/util.h>
-#include <cluster/clcommon.h>
 #include <cluster/heartbeat.h>
 #include <cluster/nodemanager.h>
 #include <cluster/tcp.h>
@@ -223,7 +222,6 @@
 	u64 blkno;
 	enum ocfs2_vote_request request;
 	struct inode *inode = NULL;
-	struct inode *remote_node;
 	ocfs2_msg_hdr *hdr = &msg->v_hdr;
 	ocfs2_response_msg response;
 
@@ -304,21 +302,15 @@
 	response.r_hdr.h_node_num = htonl(osb->node_num);
 	response.r_response = htonl(vote_response);
 
-	remote_node = nm_get_node_by_num(node_num);
-	if (!remote_node) {
-		LOG_ERROR_ARGS("Couldn't get inode for node %u!\n", node_num);
-	} else {
-		net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
-					      osb->net_key,
-					      &response,
-					      sizeof(ocfs2_response_msg),
-					      remote_node,
-					      NULL);
-		if (net_status < 0)
-			LOG_ERROR_ARGS("message to node %u fails with error "
-				       "%d!\n", node_num, net_status);
-		iput(remote_node);
-	}
+	net_status = net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
+				      osb->net_key,
+				      &response,
+				      sizeof(ocfs2_response_msg),
+				      node_num,
+				      NULL);
+	if (net_status < 0)
+		LOG_ERROR_ARGS("message to node %u fails with error "
+			       "%d!\n", node_num, net_status);
 
 	if (inode)
 		iput(inode);
@@ -510,7 +502,6 @@
 {
 	int status, i, remote_err;
 	ocfs2_net_wait_ctxt *w = NULL;
-	struct inode *remote_node;
 	int dequeued = 0;
 
 	LOG_ENTRY();
@@ -534,21 +525,13 @@
 				       i);
 			ocfs_node_map_set_bit(osb, &w->n_node_map, i);
 
-			remote_node = nm_get_node_by_num(i);
-			if (!remote_node) {
-				status = -EINVAL;
-				LOG_ERROR_STATUS(status);
-				goto bail;
-			}
-
 			remote_err = 0;
 			status = net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
 						  osb->net_key,
 						  request,
 						  sizeof(*request),
-						  remote_node,
+						  i,
 						  &remote_err);
-			iput(remote_node);
 			if (status == -ETIMEDOUT) {
 				LOG_TRACE_ARGS("remote node %d timed out!\n",
 					       i);

Modified: trunk/fs/usysfs/dir.c
===================================================================
--- trunk/fs/usysfs/dir.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/usysfs/dir.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -385,6 +385,7 @@
 			continue;
 		list_del_init(&sd->s_sibling);
 		child = sd->s_dentry;
+		/* And now we're faking rmdir.  Viro must hate me */
 		down(&child->d_inode->i_sem);
 		usysfs_drop_set(sd->s_element);
 		child->d_inode->i_flags |= S_DEAD;
@@ -449,6 +450,7 @@
 	int i;
 
 	if (ukset && ukset->default_sets) {
+		/* FYI, we're faking mkdir here */
 		down(&dentry->d_inode->i_sem);
 
 		for (i = 0; ukset->default_sets[i]; i++) {

Modified: trunk/fs/usysfs/mount.c
===================================================================
--- trunk/fs/usysfs/mount.c	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/usysfs/mount.c	2005-03-18 06:01:10 UTC (rev 2006)
@@ -66,9 +66,17 @@
 {
 	kset_init_copy(&ukset->kset);
 }
-
 EXPORT_SYMBOL_GPL(ukset_init);
 
+void ukset_init_type_name(struct ukset *ukset, const char *name,
+			  struct kobj_type *ktype)
+{
+	kobject_set_name(&ukset->kset.kobj, name);
+	ukset->kset.kobj.ktype = ktype;
+	ukset_init(ukset);
+}
+EXPORT_SYMBOL(ukset_init_type_name);
+
 static int usysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;

Modified: trunk/fs/usysfs/usysfs.h
===================================================================
--- trunk/fs/usysfs/usysfs.h	2005-03-18 04:25:45 UTC (rev 2005)
+++ trunk/fs/usysfs/usysfs.h	2005-03-18 06:01:10 UTC (rev 2006)
@@ -85,7 +85,9 @@
 	return kset ? container_of(kset, struct ukset, kset) : NULL;
 }
 
-extern void ukset_init(struct ukset *ukset);
+void ukset_init(struct ukset *ukset);
+void ukset_init_type_name(struct ukset *ukset, const char *name,
+			  struct kobj_type *ktype);
 
 
 /**



More information about the Ocfs2-commits mailing list