[Ocfs2-commits] jlbec commits r1128 - in trunk: . src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Jun 17 17:06:35 CDT 2004


Author: jlbec
Date: 2004-06-17 16:06:33 -0500 (Thu, 17 Jun 2004)
New Revision: 1128

Added:
   trunk/src/alloc.h
   trunk/src/bitmap.h
   trunk/src/buffer_head_io.c
   trunk/src/dcache.h
   trunk/src/dir.h
   trunk/src/dlm.h
   trunk/src/extmap.h
   trunk/src/file.h
   trunk/src/heartbeat.h
   trunk/src/inode.h
   trunk/src/ioctl.h
   trunk/src/journal.h
   trunk/src/lockres.h
   trunk/src/namei.h
   trunk/src/nm.h
   trunk/src/ocfs.h
   trunk/src/ocfs1_fs_compat.h
   trunk/src/ocfs2_fs.h
   trunk/src/ocfs_buffer_head.h
   trunk/src/ocfs_compat.h
   trunk/src/ocfs_journal.h
   trunk/src/ocfs_log.h
   trunk/src/proc.h
   trunk/src/super.h
   trunk/src/symlink.h
   trunk/src/sysfile.h
   trunk/src/util.h
   trunk/src/ver.h
   trunk/src/volcfg.h
   trunk/src/vote.h
Removed:
   trunk/src/inc/
   trunk/src/io.c
Modified:
   trunk/Config.make.in
   trunk/TODO
   trunk/configure.in
   trunk/on_disk_changes
   trunk/src/Makefile
   trunk/src/alloc.c
   trunk/src/bitmap.c
   trunk/src/dcache.c
   trunk/src/dir.c
   trunk/src/dlm.c
   trunk/src/extmap.c
   trunk/src/file.c
   trunk/src/heartbeat.c
   trunk/src/inode.c
   trunk/src/ioctl.c
   trunk/src/journal.c
   trunk/src/lockres.c
   trunk/src/namei.c
   trunk/src/nm.c
   trunk/src/proc.c
   trunk/src/super.c
   trunk/src/symlink.c
   trunk/src/sysfile.c
   trunk/src/util.c
   trunk/src/ver.c
   trunk/src/volcfg.c
   trunk/src/vote.c
Log:

o Land format-changes branch to trunk.
	- Complete reorganization of the disk format.
	- See http://oss.oracle.com/projects/ocfs2/documentation/ocfs2-whats-new.txt for details.



Modified: trunk/Config.make.in
===================================================================
--- trunk/Config.make.in	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/Config.make.in	2004-06-17 21:06:33 UTC (rev 1128)
@@ -60,7 +60,6 @@
 OCFS_LARGEIO = @OCFS_LARGEIO@
 OCFS_AIO = @OCFS_AIO@
 OCFS_MEMDEBUG = @OCFS_MEMDEBUG@
-OCFS_DBG_LOCKRES = @OCFS_DBG_LOCKRES@
 OCFS_TRACE = @OCFS_TRACE@
 OCFS_PROCESSOR = @OCFS_PROCESSOR@
 

Modified: trunk/TODO
===================================================================
--- trunk/TODO	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/TODO	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,5 +1,6 @@
 * figure out what to do about old style cdsl. Either we error when we see it
   (in the module), or we provide an fsck to find and fix them.
+	- Probably should be handle by the conversion.
 
 * fsck must be able to replay the journal
 
@@ -13,13 +14,6 @@
 * Make bitmap free functions do their job without relocking the bitmaps for
   each record.
 
-* get rid of osb->curr_trans_id as it was never used (always zero)
-
-* get rid of all the:
-	changeSeqNum = osb->curr_trans_id; 
-	DISK_LOCK_SEQNUM(fe) = changeSeqNum;
-  code as it is equally useless.
-
 * make slabs for: ocfs_journal_handle, and ocfs_journal_copyout and maybe
   ocfs_journal_lock
 
@@ -36,6 +30,8 @@
 * Local alloc structure needs to be turned into a file entry so creating an
   inode for it is trivial. This way we can use the inodes locking primitives
   instead of special casing stuff and using osb->local_alloc_sem.
+	- It's an inode now, but we need to finish making it use inode
+	  locking primatives.
 
 * We need to be able to delete stuff from the inode alloc system file(s).
 

Modified: trunk/configure.in
===================================================================
--- trunk/configure.in	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/configure.in	2004-06-17 21:06:33 UTC (rev 1128)
@@ -60,6 +60,9 @@
   i386|i486|i586|i686|i786|k6|k7)
     OCFS_PROCESSOR="i686"
     ;;
+  s390x)
+    OCFS_PROCESSOR="s390x"
+    ;;
 esac
 
 AC_SUBST(OCFS_PROCESSOR)
@@ -107,15 +110,6 @@
 AC_SUBST(OCFS_MEMDEBUG)
 AC_MSG_RESULT($enable_memdebug)
 
-AC_MSG_CHECKING(for lockres debugging)
-AC_ARG_ENABLE(lockres-debug, [  --enable-lockres-debug=[yes/no]     Turn on lock resource debugging [default=no]],,enable_lockres_debug=no)
-OCFS_DBG_LOCKRES=
-if test "x$enable_lockres_debug" = "xyes"; then
-  OCFS_DBG_LOCKRES=yes
-fi
-AC_SUBST(OCFS_DBG_LOCKRES)
-AC_MSG_RESULT($enable_lockres_debug)
-
 AC_MSG_CHECKING(for tracing)
 AC_ARG_ENABLE(trace, [  --enable-trace=[yes/no]         Turn on tracing [default=yes]],,enable_trace=yes)
 OCFS_TRACE=

Modified: trunk/on_disk_changes
===================================================================
--- trunk/on_disk_changes	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/on_disk_changes	2004-06-17 21:06:33 UTC (rev 1128)
@@ -10,3 +10,12 @@
 Dir index became a used/unused bytemap for file entries in a dir node
 File entry for bitmap (potentially)
 added fe_off field to publish/vote stuff (net and disk)
+
+All fe fields renamed and moved around.  There is *no* commonality with
+ocfsv1.
+
+extent_group is now extent_header.  There is now no longer a difference
+between EXTENT_DATA and EXTENT_HEADER.  All extent headers have
+EXTENT_HEADER_SIGNATURE.  h_tree_depth of -1 means that the extent_recs
+point to actual data.  Note that the v1->v2 converter must set
+h_tree_depth to -1 on those blocks, as v1 did not.

Modified: trunk/src/Makefile
===================================================================
--- trunk/src/Makefile	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/Makefile	2004-06-17 21:06:33 UTC (rev 1128)
@@ -33,10 +33,6 @@
 GLOBAL_DEFINES += -DOCFS_LINUX_MEM_DEBUG -DDEBUG_SLAB_ALLOCS
 endif
 
-ifdef OCFS_DBG_LOCKRES
-GLOBAL_DEFINES += -DOCFS_DBG_LOCKRES
-endif
-
 ifdef OCFS_AIO
 GLOBAL_DEFINES += -DAIO_ENABLED
 endif
@@ -99,7 +95,6 @@
 endif
 
 ifeq ($(OCFS_PROCESSOR),ppc64)
-  GLOBAL_DEFINES += -D__LP64__
   MACH_CFLAGS += -m64 -fsigned-char -fno-builtin -msoft-float -mminimal-toc
   LDADD += -m elf64ppc
 endif
@@ -114,40 +109,67 @@
 ifeq ($(OCFS_PROCESSOR),i586)
   GLOBAL_DEFINES += -D__ILP32__
 endif
+ifeq ($(OCFS_PROCESSOR),s390x)
+  GLOBAL_DEFINES += -D__LP64__
+endif
 
 DEFINES += $(BASE_DEFINES) $(GLOBAL_DEFINES)
 
-CFILES =		\
-	alloc.c		\
-	bitmap.c	\
-	dcache.c	\
-	dir.c		\
-	dlm.c		\
-	extmap.c	\
-	file.c		\
-	heartbeat.c	\
-	inode.c		\
-	io.c		\
-	ioctl.c		\
-	journal.c	\
-	lockres.c	\
-	namei.c		\
-	nm.c		\
-	proc.c		\
-	super.c		\
-	symlink.c	\
-	sysfile.c	\
-	util.c		\
-	ver.c		\
-	volcfg.c	\
+CFILES =			\
+	alloc.c			\
+	bitmap.c		\
+	buffer_head_io.c	\
+	dcache.c		\
+	dir.c			\
+	dlm.c			\
+	extmap.c		\
+	file.c			\
+	heartbeat.c		\
+	inode.c			\
+	ioctl.c			\
+	journal.c		\
+	lockres.c		\
+	namei.c			\
+	nm.c			\
+	proc.c			\
+	super.c			\
+	symlink.c		\
+	sysfile.c		\
+	util.c			\
+	ver.c			\
+	volcfg.c		\
 	vote.c
 
-HFILES = \
-	inc/ocfs.h		\
-	inc/ocfs_log.h		\
-	inc/ocfs_journal.h	\
-	inc/io.h		\
-	inc/proto.h
+HFILES =			\
+	ocfs2_fs.h		\
+	ocfs1_fs_compat.h	\
+	ocfs.h			\
+	ocfs_log.h		\
+	ocfs_compat.h		\
+	ocfs_journal.h		\
+	ocfs_buffer_head.h	\
+	alloc.h			\
+	bitmap.h		\
+	dcache.h		\
+	dir.h			\
+	dlm.h			\
+	extmap.h		\
+	file.h			\
+	heartbeat.h		\
+	inode.h			\
+	ioctl.h			\
+	journal.h		\
+	lockres.h		\
+	namei.h			\
+	nm.h			\
+	proc.h			\
+	super.h			\
+	symlink.h		\
+	sysfile.h		\
+	util.h			\
+	ver.h			\
+	volcfg.h		\
+	vote.h
 
 OBJS = $(subst .c,.o,$(CFILES))
 
@@ -162,11 +184,14 @@
 ifndef KERNEL_26
 # Building for a 2.4.x kernel
 
+# Make dependancies work
+$(OBJS): $(HFILES)
+
 ifneq ($(OCFS_PROCESSOR),x86_64)
 WARNINGS += -Wmissing-prototypes -Wmissing-declarations
 endif
 
-INCLUDES = -Iinc -I$(KERNELINC) -I$(GCCINC)
+INCLUDES = -I. -I$(KERNELINC) -I$(GCCINC)
 
 CFLAGS = $(OPTS) $(MACH_CFLAGS) -pipe -nostdinc -fno-strict-aliasing \
 	-fno-common -fomit-frame-pointer $(MODVERSIONS) $(WARNINGS)
@@ -194,12 +219,7 @@
 clean-ocfs:
 	rm -f *.o *.p *.s
 
-DIST_RULES = dist-subdircreate
 
-dist-subdircreate:
-	$(TOPDIR)/mkinstalldirs $(DIST_DIR)/inc
-
-
 else # ifndef KERNEL_26
 # The 2.6.x kernel makefile
 

Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/alloc.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * alloc.c
  *
  * Extent allocs and frees
@@ -24,50 +26,71 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
-#ifdef __KERNEL__
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#else
-#include <debugocfs.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "bitmap.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "inode.h"
+#include "util.h"
+#include "sysfile.h"
+#include "file.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 #ifndef USERSPACE_TOOL
 
 
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_ALLOC
 
-static int ocfs_kill_this_tree(ocfs_super *osb, struct buffer_head *extent_grp_bh, 
-			       ocfs_journal_handle *handle, struct inode *inode);
-static int ocfs_allocate_new_data_node (ocfs_super * osb, 
-				 ocfs_file_entry * FileEntry,
-				 __u64 actualDiskOffset, __u64 actualLength, 
-				 struct buffer_head * extent_header_bh, 
-				 __u64 * NewExtentOffset, 
-				 ocfs_journal_handle *handle, struct inode *inode);
+static int ocfs_kill_this_tree(ocfs_super *osb,
+			       struct buffer_head *extent_grp_bh, 
+			       ocfs_journal_handle *handle,
+			       struct inode *inode);
+static int ocfs_allocate_new_data_node(ocfs_super *osb, 
+				       ocfs2_dinode *fe,
+				       u32 new_clusters, 
+				       struct buffer_head *eb_bh, 
+				       u64 *new_eb_blkno, 
+      				       ocfs_journal_handle *handle,
+				       struct inode *inode);
 
-static int ocfs_grow_extent_tree (ocfs_super * osb, struct buffer_head *fe_bh,
-				  ocfs_journal_handle *handle, __u64 disk_off,
-				  __u64 length, struct inode *inode);
+static int ocfs_grow_extent_tree(ocfs_super *osb,
+				 struct buffer_head *fe_bh,
+				 ocfs_journal_handle *handle,
+				 u64 blkno,
+			       	 u32 new_clusters, struct inode *inode);
 
-static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, 
-				  __u8 *freeExtent, 
+static int _squish_extent_entries(ocfs_super *osb,
+				  ocfs2_extent_rec *extarr, 
+				  __u16 *freeExtent, 
 				  ocfs_journal_handle *handle,
-				  __u64 FileSize, int flag, 
+				  u32 num_clusters, int flag, 
 				  struct inode *inode);
 
-static int ocfs_fix_extent_group(ocfs_super *osb, struct buffer_head *group_bh, struct inode *inode);
+static int ocfs_fix_extent_block(ocfs_super *osb,
+				 struct buffer_head *eb_bh,
+				 struct inode *inode);
 
 static int ocfs_split_this_tree(ocfs_super * osb, 
-				struct buffer_head *extent_grp_bh, 
+				struct buffer_head *eb_bh, 
 				ocfs_journal_handle *handle,
-				ocfs_file_entry *fe, 
+				ocfs2_dinode *fe, 
 				struct inode *inode);
 
-static int ocfs_update_last_ext_ptr(ocfs_super *osb, ocfs_file_entry *fe, struct inode *inode);
+static int ocfs_update_last_eb_blk(ocfs_super *osb, ocfs2_dinode *fe,
+				   struct inode *inode);
 
 static int ocfs_free_vol_block (ocfs_super * osb, ocfs_journal_handle *handle,
 				ocfs_free_rec * FreeLog, __u32 NodeNum, 
@@ -86,13 +109,27 @@
 					 ocfs_bitmap_free_head **f, 
 					 struct buffer_head *local_alloc_bh, 
 					 int in_recovery);
-static __u32 ocfs_alloc_count_bits(ocfs_local_alloc *alloc);
-static void ocfs_clear_local_alloc(ocfs_local_alloc *alloc);
-static int ocfs_find_space_from_local(ocfs_super *osb, __u32 bitswanted, 
+static __u32 ocfs_alloc_count_bits(ocfs2_dinode *alloc);
+static void ocfs_clear_local_alloc(ocfs2_dinode *alloc);
+static int ocfs_find_space_from_local(ocfs_super *osb, __u32 bitswanted,
 				      __u64 * bitoff, __u64 * bitcount, 
 				      ocfs_journal_handle *handle);
-static int ocfs_local_find_clear_bits(ocfs_super *osb, ocfs_local_alloc *alloc, __u32 numbits);
+static int ocfs_local_find_clear_bits(ocfs_super *osb,
+				      ocfs2_dinode *alloc,
+				      __u32 numbits);
+static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
+			      u64 blkno);
 
+static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
+			      u64 blkno)
+{
+	u64 start_blkno = ext->e_blkno;
+
+	start_blkno += ocfs_clusters_to_blocks(inode->i_sb,
+					       ext->e_clusters);
+	return (start_blkno == blkno);
+}
+
 /* this is a good candidate for an inline function. */
 int ocfs_process_bitmap_free_head(ocfs_super *osb, ocfs_bitmap_free_head *f) 
 {
@@ -191,7 +228,6 @@
 	struct inode **extnode_inode = NULL;
 	struct inode *vol_inode = NULL;
 	__u32 tmp_indx;
-	__u64 lock_id;
 	struct buffer_head *globalbh = NULL;
 	ocfs_journal_handle *handle = NULL;
 	int credits = 33; /* one for each potential sysfile fe. This
@@ -210,12 +246,12 @@
 	} while (0)
 
 	ALLOC_BLOCK(free_ext_node,
-		    OCFS_MAXIMUM_NODES * sizeof (ocfs_free_rec *), status);
+		    osb->max_nodes * sizeof (ocfs_free_rec *), status);
 	ALLOC_BLOCK(extnode_inode,
-		    OCFS_MAXIMUM_NODES * sizeof (struct inode *), status);
+		    osb->max_nodes * sizeof (struct inode *), status);
 
 	/* init */
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+	for (i = 0; i < osb->max_nodes; i++) {
 		free_ext_node[i] = NULL;
 		extnode_inode[i] = NULL;
 	}
@@ -251,8 +287,8 @@
 			    }
 			    tmp_log = free_vol_bits;
 
-			    credits += 1 + free_log->update[i].length / 
-				    OCFS_BITS_IN_CHUNK;
+			    credits += ocfs_blocks_for_bits(osb->sb,
+							    free_log->update[i].length);
 			    break;
 
 		    default:
@@ -289,11 +325,10 @@
 
 	/* Get all the locks we need. do global bitmap last to
 	 * preserve lock ordering with extend/create */
-	lock_id = (OCFS_FILE_FILE_ALLOC_BITMAP * osb->sect_size) +
-		  osb->vol_layout.root_int_off;
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) {
+	for (i = 0; i < osb->max_nodes; i++) {
 		if (free_ext_node[i] != NULL) {
-			extnode_inode[i] = ocfs_iget(osb, lock_id);
+			extnode_inode[i] = 
+				ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, i);
 			if (!extnode_inode[i]) {
 				status = -EINVAL;
 				LOG_ERROR_STATUS (status);
@@ -318,7 +353,7 @@
 	}
 
 	if (free_vol_bits != NULL) {
-		vol_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
+		vol_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
 		if (!vol_inode) {
 			status = -EINVAL;
 			LOG_ERROR_STATUS (status);
@@ -326,9 +361,9 @@
 		}
 		down_write(&OCFS_I(vol_inode)->ip_io_sem);
 
-		status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
-					    FLAG_FILE_CREATE,
-					    &globalbh, vol_inode);
+		status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+					   FLAG_FILE_CREATE,
+					   &globalbh, vol_inode);
 		if (status < 0) {
 			up_write(&OCFS_I(vol_inode)->ip_io_sem);
 			iput(vol_inode);
@@ -343,20 +378,21 @@
 
 	/* free vol block */
 	if (free_vol_bits != NULL)
-		ocfs_free_vol_block (osb, handle, free_vol_bits, -1, 
-				     DISK_ALLOC_VOLUME);
+		ocfs_free_vol_block(osb, handle, free_vol_bits, -1, 
+				    DISK_ALLOC_VOLUME);
 
 	/* We can potentiallly loose some allocation for dirNodes or extent */
 	/* nodes but they should not be much...  */
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+	for (i = 0; i < osb->max_nodes; i++) {
 		if (free_ext_node[i] != NULL)
-			ocfs_free_vol_block (osb, handle, free_ext_node[i], i,
-					     DISK_ALLOC_EXTENT_NODE);
+			ocfs_free_vol_block(osb, handle,
+					    free_ext_node[i], i,
+					    DISK_ALLOC_EXTENT_NODE);
 	}
 
 	/* release all locks */
 	if (free_vol_bits) {
-		ocfs_file_entry *bm_lock;
+		ocfs2_dinode *bm_lock;
 
 		status = ocfs_journal_access(handle, globalbh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
@@ -366,7 +402,8 @@
 		}
 
 		bm_lock = OCFS_BH_GET_DATA_WRITE(globalbh);
-		bm_lock->u.bitinfo.used_bits = ocfs_count_bits(&osb->cluster_bitmap);
+		bm_lock->id1.bitmap1.i_used =
+			ocfs_count_bits(osb->sb, &osb->cluster_bitmap);
 		OCFS_BH_PUT_DATA(globalbh);
 
 		status = ocfs_journal_dirty(handle, globalbh);
@@ -385,18 +422,16 @@
 		ocfs_abort_trans(handle);
 
 	if (free_vol_bits) {
-		status = ocfs_release_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
-					    FLAG_FILE_CREATE, globalbh, 
-					    vol_inode);
+		status = ocfs_release_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+					   FLAG_FILE_CREATE, globalbh, 
+					   vol_inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 	}
 
-	lock_id = (OCFS_FILE_FILE_ALLOC_BITMAP * osb->sect_size) +
-		  osb->vol_layout.root_int_off;
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) {
+	for (i = 0; i < osb->max_nodes; i++) {
 		if (free_ext_node[i] != NULL) {
 			status = ocfs_release_lock (osb,
 						    OCFS_DLM_EXCLUSIVE_LOCK,
@@ -413,11 +448,11 @@
 finally:
 
 	if (extnode_inode) {
-		for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+		for (i = 0; i < osb->max_nodes; i++) {
 			if (extnode_inode[i]) {
 				up_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
 				iput(extnode_inode[i]);
-                        }
+			}
 		}
 		kfree(extnode_inode);
 	}
@@ -431,7 +466,7 @@
 		brelse(globalbh);
 
 	if (free_ext_node) {
-		for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+		for (i = 0; i < osb->max_nodes; i++) {
 			if (free_ext_node[i])
 				kfree(free_ext_node[i]);
 		}
@@ -458,10 +493,10 @@
 
 	bitmap = &osb->cluster_bitmap;
 
-	bitmapblocks = (OCFS_ALIGN(bitmap->validbits, OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+	bitmapblocks = ocfs_blocks_for_bits(osb->sb, bitmap->validbits);
 
-	status = ocfs_read_bhs(osb, osb->vol_layout.bitmap_off, 
-			       bitmapblocks * osb->sect_size, 
+	status = ocfs_read_bhs(osb, osb->bitmap_blkno << osb->sb->s_blocksize_bits, 
+			       bitmapblocks << osb->sb->s_blocksize_bits, 
 			       bitmap->chunk, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
@@ -469,7 +504,9 @@
 	}
 
 	for (i = 0; i < freelog->num_updates; i++)
-		ocfs_clear_bits(handle, bitmap, freelog->update[i].file_off, freelog->update[i].length);
+		ocfs_clear_bits(osb->sb, handle, bitmap,
+				freelog->update[i].file_off,
+				freelog->update[i].length);
 
 	status = 0;
 bail:
@@ -481,19 +518,22 @@
  * ocfs_free_vol_block()
  *
  */
-static int ocfs_free_vol_block (ocfs_super * osb, ocfs_journal_handle *handle, ocfs_free_rec * FreeLog, __u32 NodeNum, __u32 Type)
+static int ocfs_free_vol_block(ocfs_super *osb,
+			       ocfs_journal_handle *handle,
+			       ocfs_free_rec *FreeLog, __u32 NodeNum,
+			       __u32 Type)
 {
 	int status = 0;
 	__u64 fileSize = 0;
-	__u64 offset = 0;
 	__u64 allocSize = 0;
 	__u32 foundBit = -1;
 	__u32 blockSize = 0, blockSizeBits = 0;
-	__u32 fileId = 0;
+	int file_type;
 	__u32 bitmapblocks = 0;
 	ocfs_alloc_bm AllocBitmap;
 	ocfs_alloc_bm *tmpbitmap = NULL;
 	__u32 i;
+	struct inode *inode = NULL;
 
 	LOG_ENTRY ();
 
@@ -506,20 +546,15 @@
 
 	switch (Type) {
 	    case DISK_ALLOC_EXTENT_NODE:
-		    fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
-		    blockSize = (__u32) osb->vol_layout.file_node_size;
-		    blockSizeBits = osb->file_alloc_bits;
-	
-		    if (!IS_VALID_NODE_NUM (NodeNum)) {
-			    LOG_ERROR_STATUS(status = -EINVAL);
-			    goto leave;
-		    }
+		    file_type = EXTENT_ALLOC_BITMAP_SYSTEM_INODE;
+		    blockSize = osb->sb->s_blocksize;
+		    blockSizeBits = osb->sb->s_blocksize_bits;
 		    break;
 
 	    case DISK_ALLOC_INODE:
-		    fileId = OCFS_INODE_BITMAP + NodeNum;
-		    blockSize = osb->inode_size;
-		    blockSizeBits = osb->inode_alloc_bits;
+		    file_type = INODE_ALLOC_BITMAP_SYSTEM_INODE;
+		    blockSize = osb->sb->s_blocksize;
+		    blockSizeBits = osb->sb->s_blocksize_bits;
 		    break;
 
 	    case DISK_ALLOC_VOLUME:
@@ -536,21 +571,23 @@
 		goto leave;
 	}
 
-	/* Read in the bitmap file for the dir alloc and look
-	   for the required space, if found */
-	status = ocfs_get_system_file_size (osb, fileId, &fileSize, &allocSize);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
+	inode = ocfs_get_system_file_inode(osb, file_type, NodeNum);
+	if (!inode) {
+		LOG_ERROR_STATUS (status = -EINVAL);
 		goto leave;
 	}
+	fileSize = inode->i_size;
+	allocSize = OCFS_I(inode)->alloc_size;
 
-	ocfs_initialize_bitmap(&AllocBitmap, fileSize * 8, allocSize * 8);
+	ocfs_initialize_bitmap(osb->sb, &AllocBitmap, fileSize * 8,
+			       allocSize * 8);
 	tmpbitmap = &AllocBitmap;
-	bitmapblocks = (OCFS_ALIGN(tmpbitmap->validbits, OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+	bitmapblocks = ocfs_blocks_for_bits(osb->sb,
+					    tmpbitmap->validbits);
 
-	status = ocfs_read_system_file(osb, fileId, AllocBitmap.chunk,
-				       bitmapblocks * osb->sect_size, 
-				       offset);
+	status = ocfs_read_system_file(osb, file_type, NodeNum,
+				       AllocBitmap.chunk, 
+				       bitmapblocks << osb->sb->s_blocksize_bits);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
@@ -563,14 +600,15 @@
 		}
 		
 		foundBit = (__u32) (FreeLog->update[i].file_off >> blockSizeBits);
-		ocfs_clear_bits(handle, tmpbitmap, foundBit,
+		ocfs_clear_bits(osb->sb, handle, tmpbitmap, foundBit,
 				(__u32) FreeLog->update[i].length);
 	}
 
 leave:
 	if (tmpbitmap)
 		ocfs_uninitialize_bitmap(tmpbitmap);
-
+	if (inode)
+		iput(inode);
 	LOG_EXIT_STATUS (status);
 	return status;
 }			/* ocfs_free_vol_block */
@@ -579,32 +617,35 @@
 /* ocfs_allocate_new_data_node()
  *
  */
-static int ocfs_allocate_new_data_node (ocfs_super * osb, 
-				 ocfs_file_entry * FileEntry,
-				 __u64 actualDiskOffset, __u64 actualLength, 
-				 struct buffer_head * extent_header_bh, 
-				 __u64 * NewExtentOffset, ocfs_journal_handle *handle, struct inode *inode)
+static int ocfs_allocate_new_data_node(ocfs_super *osb, 
+				       ocfs2_dinode *fe,
+				       u32 new_clusters, 
+				       struct buffer_head *eb_bh, 
+				       u64 *new_eb_blkno,
+				       ocfs_journal_handle *handle,
+				       struct inode *inode)
 {
 	int status = 0;
 	__u8 *buff = NULL;
 	__u32 k, i;
 	__u32 depth;
-	__u32 allocSize;
-	__u64 upHeaderPtr;
+	int allocSize;
+	u64 parent_blk;
 	__u64 physicalOffset;
+	u64 phys_blkno;
 	__u64 fileOffset = 0;
-	__u64 numSectorsAlloc = 0;
-	__u64 lastExtPointer;
-	ocfs_extent_group* extent_header = NULL;
-	struct buffer_head **header_bhs = NULL;
+	int new_blocks = 0;
+	ocfs2_extent_block *eb = NULL;
+	ocfs2_extent_list *el1, *el2 = NULL;
+	struct buffer_head **eb_bhs = NULL;
 	struct buffer_head *bh = NULL;
 	int bh_locked = 0;
 	int size;
 
 	LOG_ENTRY ();
 	
-	if (extent_header_bh) {
-		status = ocfs_journal_access(handle, extent_header_bh, 
+	if (eb_bh) {
+		status = ocfs_journal_access(handle, eb_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 
 		if (status < 0) {
@@ -612,169 +653,171 @@
 			goto finally;
 		}
 
-		extent_header = OCFS_BH_GET_DATA_WRITE(extent_header_bh);
+		eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
+		el1 = &eb->h_list;
 		bh_locked = 1;
 	}
-	if (extent_header != NULL) {
-		depth = extent_header->granularity;
-		upHeaderPtr = extent_header->this_ext;
-	} else {
-		depth = FileEntry->granularity;
-		upHeaderPtr = FileEntry->this_sector;
-	}
+	else
+		el1 = &fe->id2.i_list;
 
-	numSectorsAlloc = NUM_SECTORS_IN_LEAF_NODE + depth;
-	allocSize = (numSectorsAlloc * osb->sect_size);
+	depth = el1->l_tree_depth;
 
+	if (eb != NULL)
+		parent_blk = eb->h_blkno;
+	else
+		parent_blk = fe->i_blkno;
+
+	new_blocks = depth + 1;
+	allocSize = new_blocks << osb->sb->s_blocksize_bits;
+
 	/* allocate contiguous blocks on disk */
-	status = ocfs_alloc_node_block (osb, allocSize, &physicalOffset, 
-					&fileOffset, osb->node_num, 
-					DISK_ALLOC_EXTENT_NODE, handle);
+	status = ocfs_alloc_node_block(osb, allocSize, &physicalOffset, 
+				       &fileOffset, osb->node_num, 
+				       DISK_ALLOC_EXTENT_NODE, handle);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
+	phys_blkno = physicalOffset >> osb->sb->s_blocksize_bits;
 
-	size = sizeof(struct buffer_head *) * numSectorsAlloc;
-	header_bhs = ocfs_malloc(size);
-	if (header_bhs == NULL) {
+	size = sizeof(struct buffer_head *) * new_blocks;
+	eb_bhs = kmalloc(size, GFP_KERNEL);
+	if (eb_bhs == NULL) {
 		status = -ENOMEM;
 		LOG_ERROR_STATUS(status);
 		goto finally;
 	}
-	memset(header_bhs, 0, size);
+	memset(eb_bhs, 0, size);
 
-	status = ocfs_read_bhs(osb, physicalOffset, numSectorsAlloc * osb->sect_size, header_bhs, OCFS_BH_CACHED, inode);
+	status = ocfs_read_bhs(osb, physicalOffset,
+			       (u64)new_blocks << osb->sb->s_blocksize_bits,
+			       eb_bhs, OCFS_BH_CACHED, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
 
 	/* zero them all out */
-	for(i = 0; i < numSectorsAlloc; i++) {
-		status = ocfs_journal_access(handle, header_bhs[i], 
+	for(i = 0; i < new_blocks; i++) {
+		status = ocfs_journal_access(handle, eb_bhs[i], 
 					     OCFS_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto finally;
 		}
 
-		buff = OCFS_BH_GET_DATA_WRITE(header_bhs[i]);
-		memset(buff, 0, osb->sect_size);
-		set_buffer_uptodate(header_bhs[i]);
-		OCFS_BH_PUT_DATA(header_bhs[i]);
+		buff = OCFS_BH_GET_DATA_WRITE(eb_bhs[i]);
+		memset(buff, 0, osb->sb->s_blocksize);
+		set_buffer_uptodate(eb_bhs[i]);
+		OCFS_BH_PUT_DATA(eb_bhs[i]);
 	}
 	
-	if (extent_header != NULL) {
-		k = extent_header->next_free_ext;
-		extent_header->extents[k].file_off = FileEntry->alloc_size;
-		extent_header->extents[k].num_bytes = actualLength;
-		extent_header->extents[k].disk_off = physicalOffset;
-		extent_header->next_free_ext++;
-	} else {
-		k = FileEntry->next_free_ext;
-		FileEntry->extents[k].file_off = FileEntry->alloc_size;
-		FileEntry->extents[k].num_bytes = actualLength;
-		FileEntry->extents[k].disk_off = physicalOffset;
-		FileEntry->next_free_ext++;
-	}
+	k = el1->l_next_free_rec;
+	el1->l_recs[k].e_cpos = fe->i_clusters;
+	el1->l_recs[k].e_clusters = new_clusters;
+	el1->l_recs[k].e_blkno = phys_blkno;
+	el1->l_next_free_rec++;
 
-
-	lastExtPointer = FileEntry->last_ext_ptr;
-
 	/* Fill in all the headers and the leaf */
 	for (i = 0; i <= depth; i++) {
-		ocfs_extent_group *ext;
-		ext = OCFS_BH_GET_DATA_WRITE(header_bhs[i]);
+		ocfs2_extent_block *eb;
 
-		ext->last_ext_ptr = lastExtPointer;
-		ext->up_hdr_node_ptr = upHeaderPtr;
-		ext->next_free_ext = 1;
-		ext->alloc_file_off = fileOffset + (osb->sect_size * i);
-		ext->alloc_node = osb->node_num;
-		ext->extents[0].file_off = FileEntry->alloc_size;
-		ext->extents[0].num_bytes = actualLength;
-		ext->this_ext = physicalOffset + (osb->sect_size * i);
+		eb = OCFS_BH_GET_DATA_WRITE(eb_bhs[i]);
 
+		eb->h_parent_blk = parent_blk;
+		eb->h_suballoc_blkno =
+			(fileOffset >> osb->sb->s_blocksize_bits) + i;
+		eb->h_suballoc_node = osb->node_num;
+		eb->h_blkno =
+			(physicalOffset >> osb->sb->s_blocksize_bits) + i;
+		strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+
+		el2 = &eb->h_list;
+		el2->l_count = ocfs2_extent_recs_per_eb(osb->sb);
+		el2->l_next_free_rec = 1;
+		el2->l_recs[0].e_cpos = fe->i_clusters;
+		el2->l_recs[0].e_clusters = new_clusters;
+		el2->l_tree_depth = (depth - 1 - i);
+
 		if (i != depth) {
 			/* fill in each header */
-			ext->type = OCFS_EXTENT_HEADER;
-			ext->extents[0].disk_off = physicalOffset + (osb->sect_size * (i + 1));
-			ext->granularity = (depth - 1 - i);
-			strcpy (ext->signature, OCFS_EXTENT_HEADER_SIGNATURE);
+			el2->l_recs[0].e_blkno = phys_blkno + (i + 1);
 		} else {
 			/* fill in the leaf */
-			ext->type = OCFS_EXTENT_DATA; 
-			ext->extents[0].disk_off = actualDiskOffset;
-			ext->curr_sect = 1; 
-			ext->max_sects = NUM_SECTORS_IN_LEAF_NODE;
-			strcpy (ext->signature, OCFS_EXTENT_DATA_SIGNATURE);
-			(*NewExtentOffset) = FileEntry->last_ext_ptr = ext->this_ext;
+			el2->l_recs[0].e_blkno = phys_blkno;
+			*new_eb_blkno = fe->i_last_eb_blk =
+				eb->h_blkno;
 		}
 
-		upHeaderPtr = lastExtPointer = ext->this_ext;
-		OCFS_BH_PUT_DATA(header_bhs[i]);
+		parent_blk = eb->h_blkno;
+		OCFS_BH_PUT_DATA(eb_bhs[i]);
 	}
 
-	for(i = 0; i < numSectorsAlloc; i++) {
-		status = ocfs_journal_dirty(handle, header_bhs[i]);
+	for(i = 0; i < new_blocks; i++) {
+		status = ocfs_journal_dirty(handle, eb_bhs[i]);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 	}
 
-	if (extent_header != NULL) {
+	if (eb != NULL) {
 		/* both needed below in for loop */
-		__u64 up_hdr_node_ptr = extent_header->up_hdr_node_ptr;
-		int granularity = extent_header->granularity;
+		u64 tmp_blk = eb->h_parent_blk;
+		int tree_depth = el1->l_tree_depth;
 
-		OCFS_BH_PUT_DATA(extent_header_bh);
+		OCFS_BH_PUT_DATA(eb_bh);
 		bh_locked = 0;
-		extent_header = NULL;
+		eb = NULL;
 
-		status = ocfs_journal_dirty(handle, extent_header_bh);
+	       	el1 = &fe->id2.i_list;
+
+		status = ocfs_journal_dirty(handle, eb_bh);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
 		/* gotta fix up his parent extents now. We totally
-		 * reuse the extent_header variable now as it's no
-		 * longer needed for it's original purpose. */
-		for (i = granularity + 1; i < FileEntry->granularity; i++) {
+		 * reuse the eb and el2 variables now as they're no
+		 * longer needed for their original purpose. */
+		for (i = tree_depth + 1; i < el1->l_tree_depth; i++) {
 			bh = NULL;
-			status = ocfs_read_bh (osb, up_hdr_node_ptr, &bh, OCFS_BH_COND_CACHED, inode);
+			status = ocfs_read_bh(osb,
+					      tmp_blk << osb->sb->s_blocksize_bits,
+					      &bh, OCFS_BH_COND_CACHED,
+					      inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto finally;
 			}
 			status = ocfs_journal_access(handle, bh, 
-						    OCFS_JOURNAL_ACCESS_WRITE);
+						     OCFS_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
 				goto finally;
 			}
 
-			extent_header = OCFS_BH_GET_DATA_WRITE(bh);
-			if (!IS_VALID_EXTENT_HEADER(extent_header)) {
+			eb = OCFS_BH_GET_DATA_WRITE(bh);
+			if (!IS_VALID_EXTENT_BLOCK(eb)) {
 				OCFS_BH_PUT_DATA(bh);
 				brelse(bh);
 				LOG_ERROR_STATUS (status = -EINVAL);
 				goto finally;
 			}
+			el2 = &eb->h_list;
 
-			if (extent_header->next_free_ext == 0) {
+			if (el2->l_next_free_rec == 0) {
 				OCFS_BH_PUT_DATA(bh);
 				brelse(bh);
 				LOG_ERROR_STATUS (status = -EFAIL);
 				goto finally;
 			}
 
-			k = extent_header->next_free_ext - 1;
-			extent_header->extents[k].num_bytes += actualLength;
+			k = el2->l_next_free_rec - 1;
+			el2->l_recs[k].e_clusters += new_clusters;
 
-			up_hdr_node_ptr = extent_header->up_hdr_node_ptr;
+			tmp_blk = eb->h_parent_blk;
 			OCFS_BH_PUT_DATA(bh);
 
 			status = ocfs_journal_dirty(handle, bh);
@@ -786,17 +829,17 @@
 
 			brelse(bh);
 		}
-		k = FileEntry->next_free_ext - 1;
-		FileEntry->extents[k].num_bytes += actualLength;
+		k = el1->l_next_free_rec - 1;
+		el1->l_recs[k].e_clusters += new_clusters;
 	}
 finally:
 	if (bh_locked)
-		OCFS_BH_PUT_DATA(extent_header_bh);
-	if (header_bhs) {
-		for (i = 0; i < numSectorsAlloc; i++)
-			if (header_bhs[i])
-				brelse(header_bhs[i]);
-		ocfs_free(header_bhs);
+		OCFS_BH_PUT_DATA(eb_bh);
+	if (eb_bhs) {
+		for (i = 0; i < new_blocks; i++)
+			if (eb_bhs[i])
+				brelse(eb_bhs[i]);
+		kfree(eb_bhs);
 	}
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -805,41 +848,46 @@
 /* ocfs_grow_extent_tree()
  *
  */
-static int ocfs_grow_extent_tree (ocfs_super * osb, struct buffer_head *fe_bh, ocfs_journal_handle *handle, __u64 disk_off, __u64 length, struct inode *inode)
+static int ocfs_grow_extent_tree(ocfs_super *osb,
+				 struct buffer_head *fe_bh,
+				 ocfs_journal_handle *handle,
+				 u64 blkno, u32 new_clusters,
+				 struct inode *inode)
 {
 	int status = 0;
 	__s32 k, i;
-	ocfs_extent_group *OcfsExtent = NULL;
-	ocfs_extent_group *ExtentHeader = NULL;
-	ocfs_extent_group *ext = NULL;
+	ocfs2_extent_block *eb1 = NULL;
+	ocfs2_extent_block *eb2 = NULL;
+	ocfs2_extent_list *ebl, *fel;
 	__u64 physicalOffset;
 	__u64 fileOffset = 0;
-	__u64 upHeaderPtr, lastExtentPtr;
-	__u32 AllocSize;
-	__u64 new_up_hdr_ptr = 0;
+	u64 phys_blkno, parent_blk, last_eb_blkno;
+	u64 new_parent_blk = 0;
 	struct buffer_head **bhs = NULL;
 	int numbhs = 0;
 	void *buf;
-	ocfs_file_entry * fe = NULL, *real_fe = NULL;
+	ocfs2_dinode *fe = NULL, *real_fe = NULL;
 
-	LOG_ENTRY_ARGS("(0x%p, 0x%p, %llu, %llu\n", osb, fe, disk_off, length);
+	LOG_ENTRY_ARGS("(0x%p, 0x%p, %llu, %u\n", osb, fe, blkno,
+		       new_clusters);
 
 	/* too complicated to deal with both reads and writes to the structure     */
 	/* just save off a copy and replace the fe_bh with the new data at the end */
-	fe = ocfs_allocate_file_entry();
+#warning markflar, fix the fe copy
+	fe = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
 	if (fe == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto finally;
 	}
 	real_fe = OCFS_BH_GET_DATA_READ(fe_bh);
-	memcpy(fe, real_fe, osb->sect_size);
+	memcpy(fe, real_fe, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(fe_bh);
 	real_fe = NULL;
 	
-	AllocSize = ((fe->granularity + 2) * osb->sect_size);
+	fel = &fe->id2.i_list;
+	numbhs = fel->l_tree_depth + 2;
 
-	numbhs = fe->granularity + 2;
-	bhs = ocfs_malloc(numbhs * sizeof(*bhs));
+	bhs = kmalloc(numbhs * sizeof(*bhs), GFP_KERNEL);
 	if (bhs == NULL) {
 		status = -ENOMEM;
 		LOG_ERROR_STATUS(status = -ENOMEM);
@@ -849,129 +897,137 @@
 
 	/* Allocate the space from the Extent file. This function should */
 	/* return contigous disk blocks requested. */
-	status = ocfs_alloc_node_block (osb, AllocSize, &physicalOffset,
-				 &fileOffset, osb->node_num, 
-					DISK_ALLOC_EXTENT_NODE, handle);
+	status = ocfs_alloc_node_block(osb,
+				       numbhs << osb->sb->s_blocksize_bits,
+				       &physicalOffset, &fileOffset,
+				       osb->node_num, 
+			       	       DISK_ALLOC_EXTENT_NODE, handle);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
+	phys_blkno = physicalOffset >> osb->sb->s_blocksize_bits;
 
 	for (i = 0; i < numbhs; i++) {
-		bhs[i] = getblk(OCFS_GET_BLOCKDEV(osb->sb), 
-				(physicalOffset + i * osb->sect_size) >> 
-				  osb->sect_size_bits, 
-				osb->sb->s_blocksize);
+		bhs[i] = sb_getblk(osb->sb, phys_blkno + i);
 		if (bhs[i] == NULL) {
 			status = -EIO;
 			LOG_ERROR_STATUS(status);
 			goto finally;
 		}
 		buf = OCFS_BH_GET_DATA_WRITE(bhs[i]);
-		memset(buf, 0, osb->sect_size);
+		memset(buf, 0, osb->sb->s_blocksize);
 		set_buffer_uptodate(bhs[i]);
 		OCFS_BH_PUT_DATA(bhs[i]);
 	}
 
-	if (physicalOffset == 0) {
+	if (phys_blkno == 0) {
 		LOG_ERROR_STATUS(status = -ENOMEM);
 		goto finally;
 	}
 
-	OcfsExtent = OCFS_BH_GET_DATA_WRITE(bhs[0]);
+	eb1 = OCFS_BH_GET_DATA_WRITE(bhs[0]);
 	/* Copy the File Entry information in to the newly allocated sector */
-	for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) {
-		OcfsExtent->extents[k].file_off = fe->extents[k].file_off;
-		OcfsExtent->extents[k].num_bytes = fe->extents[k].num_bytes;
-		OcfsExtent->extents[k].disk_off = fe->extents[k].disk_off;
+	ebl = &eb1->h_list;
+	for (k = 0; k < fel->l_count; k++) {
+		ebl->l_recs[k].e_cpos = fel->l_recs[k].e_cpos;
+		ebl->l_recs[k].e_clusters = fel->l_recs[k].e_clusters;
+		ebl->l_recs[k].e_blkno = fel->l_recs[k].e_blkno;
 	}
 
-	OcfsExtent->last_ext_ptr = fe->last_ext_ptr;
-	lastExtentPtr = fe->last_ext_ptr;
-	OcfsExtent->this_ext = new_up_hdr_ptr = physicalOffset;
-	OcfsExtent->alloc_file_off = fileOffset;
-	OcfsExtent->alloc_node = osb->node_num;
-	OcfsExtent->next_data_ext = 0;
-	fe->local_ext = 0;
-	fe->granularity++;
+	last_eb_blkno = fe->i_last_eb_blk;
+	eb1->h_blkno =
+		physicalOffset >> osb->sb->s_blocksize_bits;
+	new_parent_blk = eb1->h_blkno;
+	eb1->h_suballoc_blkno =
+		fileOffset >> osb->sb->s_blocksize_bits;
+	eb1->h_suballoc_node = osb->node_num;
+	eb1->h_next_leaf_blk = 0;
+	fel->l_tree_depth++;
 
-	LOG_TRACE_ARGS ("Granularity is: %d\n", fe->granularity);
+	LOG_TRACE_ARGS ("Tree depth is: %d\n", fel->l_tree_depth);
 
 	OCFS_BH_PUT_DATA(bhs[0]);
 	
-	/* If granularity is zero now, the for loop will not execute. */
-	/* First time a file is created ,granularity = -1 and local_ext flag */
-	/* is set to true */
+	/* If tree_depth is zero now, the for loop will not execute. */
+	/* First time a file is created, tree_depth = -1 */
 
-	upHeaderPtr = fe->this_sector;
+	parent_blk = fe->i_blkno;
 
-	for (i = 0; i < fe->granularity; i++) {
-		ExtentHeader = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+	for (i = 0; i < fel->l_tree_depth; i++) {
+		eb2 = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+		ebl = &eb2->h_list;
 
-		ExtentHeader->type = OCFS_EXTENT_HEADER;
-		ExtentHeader->granularity = (fe->granularity - 1) - i;
+		ebl->l_tree_depth = (fel->l_tree_depth - 1) - i;
+		ebl->l_count = ocfs2_extent_recs_per_eb(osb->sb);
 
-		strcpy (ExtentHeader->signature, OCFS_EXTENT_HEADER_SIGNATURE);
+		strcpy(eb2->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
 
 		if (i == 0) {
-			ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS].disk_off = physicalOffset + osb->sect_size;
-			ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS].file_off = fe->alloc_size;
-			ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS].num_bytes = length;
+			ebl->l_recs[fel->l_count].e_blkno =
+				phys_blkno + 1;
+			ebl->l_recs[fel->l_count].e_cpos =
+				fe->i_clusters;
+			ebl->l_recs[fel->l_count].e_clusters =
+				new_clusters;
+			ebl->l_next_free_rec = fel->l_count + 1;
 
-			ExtentHeader->next_free_ext = OCFS_MAX_FILE_ENTRY_EXTENTS + 1;
-			ExtentHeader->this_ext = physicalOffset;
-			ExtentHeader->last_ext_ptr = lastExtentPtr;
-			ExtentHeader->up_hdr_node_ptr = upHeaderPtr;
+			eb2->h_blkno =
+				physicalOffset >> osb->sb->s_blocksize_bits;
+			eb2->h_parent_blk = parent_blk;
 
-			upHeaderPtr = ExtentHeader->this_ext;
-			lastExtentPtr = ExtentHeader->this_ext;
+			parent_blk = last_eb_blkno = eb2->h_blkno;
 		} else {
-			ExtentHeader->extents[0].disk_off = physicalOffset + (osb->sect_size * (i + 1));
-			ExtentHeader->extents[0].file_off = fe->alloc_size;
-			ExtentHeader->extents[0].num_bytes = length;
-			ExtentHeader->next_free_ext = 1;
-			ExtentHeader->alloc_file_off = fileOffset + (osb->sect_size * i);
-			ExtentHeader->alloc_node = osb->node_num;
-			ExtentHeader->this_ext = physicalOffset + (osb->sect_size * i);
-			ExtentHeader->up_hdr_node_ptr = upHeaderPtr;
-			ExtentHeader->last_ext_ptr = lastExtentPtr;
+			ebl->l_recs[0].e_blkno = phys_blkno + (i + 1);
+			ebl->l_recs[0].e_cpos = fe->i_clusters;
+			ebl->l_recs[0].e_clusters = new_clusters;
+			ebl->l_next_free_rec = 1;
 
-			upHeaderPtr = ExtentHeader->this_ext;
-			lastExtentPtr = ExtentHeader->this_ext;
+			eb2->h_suballoc_blkno =
+				(fileOffset >> osb->sb->s_blocksize_bits) + i;
+			eb2->h_suballoc_node = osb->node_num;
+			eb2->h_blkno = 
+				(physicalOffset >> osb->sb->s_blocksize_bits) + i;
+			eb2->h_parent_blk = parent_blk;
+
+			parent_blk = last_eb_blkno = eb2->h_blkno;
 		}
 		OCFS_BH_PUT_DATA(bhs[i]);
 	}
 
 	/* Update the Data Segment, which is the last one in our array */
-	OcfsExtent = OCFS_BH_GET_DATA_WRITE(bhs[fe->granularity]);
+	eb1 = OCFS_BH_GET_DATA_WRITE(bhs[fel->l_tree_depth]);
+	ebl = &eb1->h_list;
 
-	i = (fe->granularity) ? 0 : OCFS_MAX_FILE_ENTRY_EXTENTS;
+	i = (fel->l_tree_depth) ? 0 : fel->l_count;
 
-	LOG_TRACE_ARGS ("EntryAvailable is: %d\n", OcfsExtent->next_free_ext);
+	LOG_TRACE_ARGS ("EntryAvailable is: %d\n", ebl->l_next_free_rec);
 
 	/* For the time being we are assuming that the newly allocated Extent */
 	/* will have one more entry to accomodate the latest allocation */
 
-	strcpy (OcfsExtent->signature, OCFS_EXTENT_DATA_SIGNATURE);
+	strcpy(eb1->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+	ebl->l_tree_depth = -1;
+	ebl->l_count = ocfs2_extent_recs_per_eb(osb->sb);
 
-	OcfsExtent->extents[i].file_off = fe->alloc_size;
-	OcfsExtent->extents[i].num_bytes = length;
-	OcfsExtent->extents[i].disk_off = disk_off;
-	OcfsExtent->curr_sect = 1;
-	OcfsExtent->max_sects = NUM_SECTORS_IN_LEAF_NODE;
-	OcfsExtent->type = OCFS_EXTENT_DATA;
-	OcfsExtent->next_free_ext = i + 1;
-	OcfsExtent->alloc_file_off = fileOffset + (fe->granularity * osb->sect_size);
-	OcfsExtent->alloc_node = osb->node_num;
-	OcfsExtent->this_ext = physicalOffset + (fe->granularity * osb->sect_size);
-	OcfsExtent->up_hdr_node_ptr = upHeaderPtr;
-	OcfsExtent->last_ext_ptr = lastExtentPtr;
-	OcfsExtent->next_data_ext = 0;
+	ebl->l_recs[i].e_cpos = fe->i_clusters;
+	ebl->l_recs[i].e_clusters = new_clusters;
+	ebl->l_recs[i].e_blkno = blkno;
+	ebl->l_next_free_rec = i + 1;
 
-	upHeaderPtr = OcfsExtent->this_ext;
-	lastExtentPtr = OcfsExtent->this_ext;
+	eb1->h_suballoc_blkno =
+		(fileOffset >> osb->sb->s_blocksize_bits) +
+		fel->l_tree_depth;
+	eb1->h_suballoc_node = osb->node_num;
+	eb1->h_blkno =
+		(physicalOffset >> osb->sb->s_blocksize_bits) +
+		fel->l_tree_depth;
+	eb1->h_parent_blk = parent_blk;
+	eb1->h_next_leaf_blk = 0;
+
+	parent_blk = last_eb_blkno = eb1->h_blkno;
 	
-	OCFS_BH_PUT_DATA(bhs[fe->granularity]);
+	OCFS_BH_PUT_DATA(bhs[fel->l_tree_depth]);
 
 	/* This needs to be a sync write OR journalled to be safe. */
 	status = ocfs_write_bhs(osb, bhs, numbhs, 0, inode);
@@ -982,10 +1038,12 @@
 
 	/* Update the Previous Last Data Extent with this new Data
 	 * Extent Pointer */
-	if (fe->last_ext_ptr != 0) {
+	if (fe->i_last_eb_blk != 0) {
 		struct buffer_head *bh = NULL;
 
-		status = ocfs_read_bh(osb, fe->last_ext_ptr, &bh, OCFS_BH_COND_CACHED, inode);
+		status = ocfs_read_bh(osb,
+				      fe->i_last_eb_blk << osb->sb->s_blocksize_bits,
+				      &bh, OCFS_BH_COND_CACHED, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
@@ -998,15 +1056,16 @@
 			goto finally;
 		}
 
-		ext = OCFS_BH_GET_DATA_WRITE(bh);
-		if (!IS_VALID_EXTENT_DATA(ext)) {
+		eb1 = OCFS_BH_GET_DATA_WRITE(bh);
+		if (!IS_VALID_EXTENT_BLOCK(eb1) ||
+		    (eb1->h_list.l_tree_depth != -1)) {
 			OCFS_BH_PUT_DATA(bh);
 			brelse(bh);
 			LOG_ERROR_STATUS (status = -EINVAL);
 			goto finally;
 		}
 
-		ext->next_data_ext = lastExtentPtr;
+		eb1->h_next_leaf_blk = last_eb_blkno;
 		OCFS_BH_PUT_DATA(bh);
 		
 		status = ocfs_journal_dirty(handle, bh);
@@ -1017,16 +1076,17 @@
 		}
 	}
 
-	/* Update the uphdrptr of the three extents pointed to by fe */
-	if (fe->granularity > 0) {
+	/* Update the uphdrptr of the extents pointed to by fe */
+	if (fel->l_tree_depth > 0) {
 		int i;
-		__u64 offset;
 		struct buffer_head *bh = NULL;
 
 		status = 0;
-		for (i = 0; i < OCFS_MAX_FILE_ENTRY_EXTENTS; ++i) {
-			offset = fe->extents[i].disk_off;
-			status = ocfs_read_bh(osb, offset, &bh, OCFS_BH_COND_CACHED, inode);
+		for (i = 0; i < fel->l_count; ++i) {
+			status = ocfs_read_bh(osb,
+					      fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits,
+					      &bh, OCFS_BH_COND_CACHED,
+					      inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
 				brelse(bh);
@@ -1040,8 +1100,8 @@
 				goto finally;
 			}
 
-			ext = OCFS_BH_GET_DATA_WRITE(bh);
-			ext->up_hdr_node_ptr = new_up_hdr_ptr;
+			eb1 = OCFS_BH_GET_DATA_WRITE(bh);
+			eb1->h_parent_blk = new_parent_blk;
 			OCFS_BH_PUT_DATA(bh);
 
 			status = ocfs_journal_dirty(handle, bh);
@@ -1053,31 +1113,31 @@
 	}
 
 	/* Clear all the extent information from File Entry */
-	for (i = 0; i < OCFS_MAX_FILE_ENTRY_EXTENTS; i++) {
-		fe->extents[i].file_off = 0;
-		fe->extents[i].num_bytes = 0;
-		fe->extents[i].disk_off = 0;
+	for (i = 0; i < fel->l_count; i++) {
+		fel->l_recs[i].e_cpos = 0;
+		fel->l_recs[i].e_clusters = 0;
+		fel->l_recs[i].e_blkno = 0;
 	}
 
 	/* Update the File Entry Extent */
-	fe->local_ext = 0;
 
-	LOG_TRACE_ARGS("fe->alloc_size = %llu\n", fe->alloc_size);
-	fe->extents[0].file_off = 0;
-	fe->extents[0].num_bytes = fe->alloc_size + length;
-	fe->extents[0].disk_off = physicalOffset;
-	fe->last_ext_ptr = lastExtentPtr;
-	fe->next_free_ext = 1;
+	LOG_TRACE_ARGS("fe->i_clusters = %u\n", fe->i_clusters);
+	fel->l_recs[0].e_cpos = 0;  /* FIXME: not needed */
+	fel->l_recs[0].e_clusters = fe->i_clusters + new_clusters;
+	fel->l_recs[0].e_blkno = phys_blkno;
+	fel->l_next_free_rec = 1;
+	fe->i_last_eb_blk = last_eb_blkno;
 
 finally:
 
 	if (fe) {
 		real_fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-		memcpy(real_fe, fe, osb->sect_size);
+		memcpy(real_fe, fe, osb->sb->s_blocksize);
 		OCFS_BH_PUT_DATA(fe_bh);
 		real_fe = NULL;
-		ocfs_release_file_entry(fe);
+		kfree(fe);
 	}
+#warning Leaking bhs here
 	LOG_EXIT_STATUS (status);
 	return (status);
 }				/* ocfs_grow_extent_tree */
@@ -1087,279 +1147,310 @@
  *
  * You need to be holding node_alloc_sem!
  */
-int ocfs_allocate_extent (ocfs_super * osb, struct buffer_head *fe_bh, ocfs_journal_handle *handle, __u64 actualDiskOffset, __u64 actualLength, struct inode *inode)
+int ocfs_allocate_extent(ocfs_super *osb, struct buffer_head *fe_bh,
+			 ocfs_journal_handle *handle,
+			 u64 blkno, u32 new_clusters,
+			 struct inode *inode)
 {
 	int status = 0;
 	int IncreaseTreeDepth = 0;
 	int k = 0, i;
-	ocfs_extent_group *extent = NULL, *extent_header = NULL;
-	struct buffer_head *extent_bh = NULL, *extent_header_bh = NULL;
+	ocfs2_extent_block *eb1 = NULL, *eb2 = NULL;
+	ocfs2_extent_list *fel, *el1 = NULL, *el2 = NULL;
+	struct buffer_head *eb1_bh = NULL, *eb2_bh = NULL;
 	int UpdateParent = 0;
-	__u64 newExtentOff, up_ptr;
-	ocfs_file_entry * FileEntry = NULL;
+	u64 parent_blk, new_eb_blkno;
+	ocfs2_dinode *fe = NULL;
 
-	LOG_ENTRY_ARGS("(actualDiskOffset=%llu, actualLength=%llu)\n", actualDiskOffset, actualLength);
+	LOG_ENTRY_ARGS("(blkno=%llu, new_clusters=%u, inode=%llu)\n", blkno, new_clusters, 
+		       GET_INODE_FEOFF(inode));
 
-	FileEntry = OCFS_BH_GET_DATA_WRITE(fe_bh);
-	OCFS_ASSERT (FileEntry);
+	fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
+	OCFS_ASSERT(fe);
 
-	if (!IS_VALID_FILE_ENTRY (FileEntry)) {
+	if (!IS_VALID_FILE_ENTRY (fe)) {
 		LOG_ERROR_STATUS(status = -EINVAL);
 		goto finally;
 	}
+	fel = &fe->id2.i_list;
 
-	if (FileEntry->local_ext) {
-		LOG_TRACE_STR("Using local extents");
+	if (fel->l_tree_depth < 0) {
+		LOG_TRACE_ARGS("Using local extents: depth=%d, next_free=%u, l_count=%u\n", 
+			       fel->l_tree_depth, fel->l_next_free_rec, fel->l_count);
 		/* We are still using the local extents of File Entry */
-		if (FileEntry->next_free_ext > OCFS_MAX_FILE_ENTRY_EXTENTS) {
+		if (fel->l_next_free_rec > fel->l_count) {
 			LOG_ERROR_STATUS(status = -EINVAL);
 			goto finally;
 		}
 
-		k = FileEntry->next_free_ext - 1;
-		if (k >= 0 && OCFS_EXTENT_MERGEABLE (&FileEntry->extents[k], actualDiskOffset)) {
+		k = fel->l_next_free_rec - 1;
+		if (k >= 0 &&
+		    ocfs_extent_contig(inode, &fel->l_recs[k], blkno)) {
 			/* See if we can merge the extents and just increase the length */
-			LOG_TRACE_ARGS ("Using local_ext for extent Entry = %u\n", k);
-			FileEntry->extents[k].num_bytes += actualLength;
+			LOG_TRACE_ARGS ("Using local extent for extent Entry = %u\n", k);
+			fel->l_recs[k].e_clusters += new_clusters;
 			goto finally;
 		}
 
 		/* We cannot merge try to give him the next extent */
-		k = FileEntry->next_free_ext;
-		if (k != OCFS_MAX_FILE_ENTRY_EXTENTS) {
+		k = fel->l_next_free_rec;
+		if (k != fel->l_count) {
 			/* file_off for the new extent will be equal
 			 * to the previous allocation size of file */
-			FileEntry->extents[k].file_off = FileEntry->alloc_size;
-			FileEntry->extents[k].num_bytes = actualLength;
-			FileEntry->extents[k].disk_off = actualDiskOffset;
-			FileEntry->next_free_ext++;
+			fel->l_recs[k].e_cpos = fe->i_clusters;
+			fel->l_recs[k].e_clusters = new_clusters;
+			fel->l_recs[k].e_blkno = blkno;
+			fel->l_next_free_rec++;
 			goto finally;
 		}
 		/* We have no more room in the fe, must increase
-		 * granularity */
+		 * tree_depth */
 		IncreaseTreeDepth = 1;
 		goto increase_depth;
 	}
 	LOG_TRACE_STR("Using NON-local extents");
 
 	/*** Nonlocal Extents ***/
-	if (FileEntry->granularity > 3)
-		LOG_ERROR_ARGS ("granularity=%d", FileEntry->granularity);
+	/* This is now less likely with OCFSv2 extent lists */
+	if (fel->l_tree_depth > 3)
+		LOG_ERROR_ARGS ("tree_depth=%d", fel->l_tree_depth);
 	
 	/* This File is no longer using Local Extents */
 	IncreaseTreeDepth = 0;
 	
-	status = ocfs_read_bh(osb, FileEntry->last_ext_ptr, &extent_bh, OCFS_BH_COND_CACHED, inode);
+	status = ocfs_read_bh(osb,
+			      fe->i_last_eb_blk << osb->sb->s_blocksize_bits,
+			      &eb1_bh, OCFS_BH_COND_CACHED, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status = -EINVAL);
 		goto finally;
 	}
-	extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
-	if (!IS_VALID_EXTENT_DATA(extent)) {
+	eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
+	el1 = &eb1->h_list;
+	if (!IS_VALID_EXTENT_BLOCK(eb1) ||
+	    (el1->l_tree_depth != -1)) {
 		LOG_ERROR_STATUS (status = -EINVAL);
 		goto finally;
 	}
 	
-	k = extent->next_free_ext - 1;
-	LOG_TRACE_ARGS ("Using local_ext for extent Entry = %u\n", k);
-	if (extent->next_free_ext < 1)
-		LOG_ERROR_ARGS ("next_free_ext=%d", extent->next_free_ext);
+	k = el1->l_next_free_rec - 1;
+	LOG_TRACE_ARGS ("Using local extent for extent Entry = %u\n", k);
+	if (el1->l_next_free_rec < 1)
+		LOG_ERROR_ARGS ("l_next_free_rec=%d",
+				el1->l_next_free_rec);
 	
 	/* See if we can merge the extents and just increase
 	 * the length */
-	if (k >= 0 && OCFS_EXTENT_MERGEABLE (&(extent->extents[k]), actualDiskOffset)) {
-		OCFS_BH_PUT_DATA(extent_bh);
+	if (k >= 0 &&
+	    ocfs_extent_contig(inode, &(el1->l_recs[k]), blkno)) {
+		OCFS_BH_PUT_DATA(eb1_bh);
 
-		status = ocfs_journal_access(handle, extent_bh, 
+		status = ocfs_journal_access(handle, eb1_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
-		extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
+		eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
+		el1 = &eb1->h_list;
 
-		extent->extents[k].num_bytes += actualLength;
+		el1->l_recs[k].e_clusters += new_clusters;
 		status = 0;
 		UpdateParent = 1;
 		goto do_update_parent;
 	}
 
 	/* We cannot merge, give him the next extent */
-	k = extent->next_free_ext;
+	k = el1->l_next_free_rec;
 	
-	if (k != OCFS_MAX_DATA_EXTENTS) {
+	if (k != el1->l_count) {
 		/* we can just add next extent */
-		OCFS_BH_PUT_DATA(extent_bh);
+		OCFS_BH_PUT_DATA(eb1_bh);
 
-		status = ocfs_journal_access(handle, extent_bh, 
+		status = ocfs_journal_access(handle, eb1_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
-		extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
+		eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
+		el1 = &eb1->h_list;
 
-		extent->extents[k].file_off = FileEntry->alloc_size;
-		extent->extents[k].num_bytes = actualLength;
-		extent->extents[k].disk_off = actualDiskOffset;
-		extent->next_free_ext++;
+		el1->l_recs[k].e_cpos = fe->i_clusters;
+		el1->l_recs[k].e_clusters = new_clusters;
+		el1->l_recs[k].e_blkno = blkno;
+		el1->l_next_free_rec++;
 		UpdateParent = 1;
 	} else {
-		__u64 up_hdr_node_ptr = 0;
-		
 		/* Read the last extent and keep traversing
 		 * upward till we find a free extent or we are
 		 * at the top and need to create another
 		 * level. */
-		if (FileEntry->granularity > 0)
-			up_hdr_node_ptr = extent->up_hdr_node_ptr;
+		if (fel->l_tree_depth > 0)
+			parent_blk = eb1->h_parent_blk;
+		else
+			parent_blk = 0;
 
-		for (i = 0; i < FileEntry->granularity; i++) {
+		for (i = 0; i < fel->l_tree_depth; i++) {
 			/* if we loop back around */
-			if (extent_header) {
-				OCFS_BH_PUT_DATA(extent_header_bh);
-				brelse(extent_header_bh);
-				extent_header = NULL;
-				extent_header_bh =NULL;
+			if (eb2) {
+				OCFS_BH_PUT_DATA(eb2_bh);
+				brelse(eb2_bh);
+				eb2 = NULL;
+				el2 = NULL;
+				eb2_bh =NULL;
 			}
-			status = ocfs_read_bh (osb, up_hdr_node_ptr, &extent_header_bh, OCFS_BH_COND_CACHED, inode); 
+			status = ocfs_read_bh(osb,
+					      parent_blk << osb->sb->s_blocksize_bits,
+					      &eb2_bh,
+					      OCFS_BH_COND_CACHED,
+					      inode); 
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto finally;
 			}
-			extent_header = OCFS_BH_GET_DATA_READ(extent_header_bh);
-			if (!IS_VALID_EXTENT_HEADER(extent_header)) {
+			eb2 = OCFS_BH_GET_DATA_READ(eb2_bh);
+			el2 = &eb2->h_list;
+			if (!IS_VALID_EXTENT_BLOCK(eb2) ||
+			    (el2->l_tree_depth < 0)) {
 				LOG_ERROR_STATUS (status = -EINVAL);
 				goto finally;
 			}
 			
-			if (extent_header->granularity != i ||
-			    extent_header->next_free_ext > OCFS_MAX_DATA_EXTENTS) {
+			if ((el2->l_tree_depth != i) ||
+			    (el2->l_next_free_rec > el2->l_count)) {
 				LOG_ERROR_STATUS(status = -EINVAL);
 				goto finally;
 			}
 			
-			if (extent_header->next_free_ext != OCFS_MAX_DATA_EXTENTS)
+			if (el2->l_next_free_rec != el2->l_count)
 				break;
 			
-			up_hdr_node_ptr = extent_header->up_hdr_node_ptr;
-		} /* for (i = 0; i < FileEntry->granularity; i++) */
+			parent_blk = eb2->h_parent_blk;
+		} /* for (i = 0; i < fe->i_tree_depth; i++) */
 		
-		if (extent_header) {
-			OCFS_BH_PUT_DATA(extent_header_bh);
-			extent_header = NULL;
+		if (eb2) {
+			OCFS_BH_PUT_DATA(eb2_bh);
+			eb2 = NULL;
+			el2 = NULL;
 			/* we may still need the bh so don't brelse */
 		}
 
 		/* if we got to the top, then we're at the FE. Check
 		 * if the FE is full -- if so, then we need to
-		 * increase the granularity. */
-		if (i == FileEntry->granularity &&
-		    FileEntry->next_free_ext == OCFS_MAX_FILE_ENTRY_EXTENTS) {
+		 * increase the tree_depth. */
+		if ((i == fel->l_tree_depth) &&
+		    (fel->l_next_free_rec == fel->l_count)) {
 			IncreaseTreeDepth = 1;
 			goto increase_depth;
 		}
 		/* ok, we need to add a branch. pass in NULL
 		 * if we need a whole branch, otherwise the
 		 * extent which needs the new leaf */
-		status = ocfs_allocate_new_data_node (osb, FileEntry, 
-						      actualDiskOffset,
-						      actualLength, 
-						      extent_header_bh,
-						      &newExtentOff, handle, inode);
+		status = ocfs_allocate_new_data_node(osb, fe, 
+						     new_clusters, 
+						     eb2_bh,
+						     &new_eb_blkno,
+						     handle, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto finally;
 		}
 
-		OCFS_BH_PUT_DATA(extent_bh);
+		OCFS_BH_PUT_DATA(eb1_bh);
 
-		status = ocfs_journal_access(handle, extent_bh, 
+		status = ocfs_journal_access(handle, eb1_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
-		extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
+		eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
 
-		extent->next_data_ext = newExtentOff;
-		FileEntry->last_ext_ptr = newExtentOff;
+		eb1->h_next_leaf_blk = fe->i_last_eb_blk = new_eb_blkno;
 	}
 	
 do_update_parent:
-	/* before we put the variable away, save off up_ptr as
+	/* before we put the variable away, save off parent_blk as
 	 * we may need it if we update parent */
-	up_ptr = extent->up_hdr_node_ptr;
+	parent_blk = eb1->h_parent_blk;
 	
 	/* gotta put it away to write it ;) */
-	OCFS_BH_PUT_DATA(extent_bh);
-	extent = NULL;
-	status = ocfs_journal_dirty(handle, extent_bh);
+	OCFS_BH_PUT_DATA(eb1_bh);
+	eb1 = NULL;
+	status = ocfs_journal_dirty(handle, eb1_bh);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto finally;
 	}
 	
 	if (!IncreaseTreeDepth && UpdateParent) {
-		for (i = 0; i < FileEntry->granularity; i++) {
+		for (i = 0; i < fel->l_tree_depth; i++) {
 			
 			/* next two if's are for loop around */
-			if (extent_header_bh) {
-				if (extent_header) {
-					OCFS_BH_PUT_DATA(extent_header_bh);
-					extent_header = NULL;
+			if (eb2_bh) {
+				if (eb2) {
+					OCFS_BH_PUT_DATA(eb2_bh);
+					eb2 = NULL;
+					el2 = NULL;
 				}
-				brelse(extent_header_bh);
-				extent_header_bh = NULL;
+				brelse(eb2_bh);
+				eb2_bh = NULL;
 			}
 			/* TODO: Can we do a cached read here? */
-			status = ocfs_read_bh(osb, up_ptr, &extent_header_bh, 
-					      OCFS_BH_COND_CACHED, inode);
+			status = ocfs_read_bh(osb,
+					      parent_blk << osb->sb->s_blocksize_bits,
+					      &eb2_bh, 
+					      OCFS_BH_COND_CACHED,
+					      inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto finally;
 			}
 
-			status = ocfs_journal_access(handle, extent_header_bh,
-						    OCFS_JOURNAL_ACCESS_WRITE);
+			status = ocfs_journal_access(handle, eb2_bh,
+						     OCFS_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
 				goto finally;
 			}
 
-			extent_header = OCFS_BH_GET_DATA_WRITE(extent_header_bh);
-			if (!IS_VALID_EXTENT_HEADER(extent_header)) {
+			eb2 = OCFS_BH_GET_DATA_WRITE(eb2_bh);
+			el2 = &eb2->h_list;
+			if (!IS_VALID_EXTENT_BLOCK(eb2) ||
+			    (el2->l_tree_depth < 0)) {
 				LOG_ERROR_STATUS (status = -EINVAL);
 				goto finally;
 			}
 			
-			if (extent_header->next_free_ext == 0) {
+			if (el2->l_next_free_rec == 0) {
 				LOG_ERROR_STATUS (status = -EFAIL);
 				goto finally;
 			}
 			
-			k = extent_header->next_free_ext - 1;
+			k = el2->l_next_free_rec - 1;
 			
-			extent_header->extents[k].num_bytes += actualLength;
+			el2->l_recs[k].e_clusters += new_clusters;
 			
 			/* gonna need it if we loop around */
-			up_ptr = extent_header->up_hdr_node_ptr;
+			parent_blk = eb2->h_parent_blk;
 			
-			OCFS_BH_PUT_DATA(extent_header_bh);
-			extent_header = NULL;
+			OCFS_BH_PUT_DATA(eb2_bh);
+			eb2 = NULL;
+			el2 = NULL;
 
-			status = ocfs_journal_dirty(handle, extent_header_bh);
+			status = ocfs_journal_dirty(handle, eb2_bh);
 			if (status < 0) {
 				goto finally;
 			}
 		}
 		
-		k = FileEntry->next_free_ext - 1;
+		k = fel->l_next_free_rec - 1;
 		
-		FileEntry->extents[k].num_bytes += actualLength;
+		fel->l_recs[k].e_clusters += new_clusters;
 	}
 	
 	if (status < 0) {
@@ -1370,17 +1461,21 @@
 increase_depth:
 	if (IncreaseTreeDepth) {
 		OCFS_BH_PUT_DATA(fe_bh);
-		FileEntry = NULL;
-		if (extent_bh && extent) {
-			OCFS_BH_PUT_DATA(extent_bh);
-			extent = NULL;
+		fe = NULL;
+		if (eb1_bh && eb1) {
+			OCFS_BH_PUT_DATA(eb1_bh);
+			eb1 = NULL;
+			el1 = NULL;
 		}
-		if (extent_header_bh && extent_header) {
-			OCFS_BH_PUT_DATA(extent_header_bh);
-			extent_header = NULL;
+		if (eb2_bh && eb2) {
+			OCFS_BH_PUT_DATA(eb2_bh);
+			eb2 = NULL;
+			el2 = NULL;
 		}	
 
-		status = ocfs_grow_extent_tree(osb, fe_bh, handle, actualDiskOffset, actualLength, inode);
+		status = ocfs_grow_extent_tree(osb, fe_bh, handle,
+					       blkno, new_clusters,
+					       inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto finally;
@@ -1397,33 +1492,34 @@
 		 * ocfs_add_extent_map_entry merges them into a single
 		 * mapping run.So just adding this entry will be
 		 * fine. */
-		if (FileEntry == NULL)
-			FileEntry = OCFS_BH_GET_DATA_READ(fe_bh);
+		if (fe == NULL)
+			fe = OCFS_BH_GET_DATA_READ(fe_bh);
 
-		Vbo = FileEntry->alloc_size;
-		Lbo = actualDiskOffset;
+		Vbo = (u64)fe->i_clusters << osb->s_clustersize_bits;
+		Lbo = blkno << osb->sb->s_blocksize_bits;
 
 		/* Add the Entry to the extent map list */
-		if (!ocfs_add_extent_map_entry (osb, &OCFS_I(inode)->map, Vbo, 
-						Lbo, actualLength))
+		if (!ocfs_add_extent_map_entry(osb, &OCFS_I(inode)->map,
+					       Vbo, Lbo,
+					       (u64)new_clusters << osb->s_clustersize_bits))
 			LOG_ERROR_STATUS (status = -EFAIL);
 	}
 
 	/* Buffers are always null if they haven't been mapped and
 	 * non-null if they have. 
 	 * Buffer heads are non-NULL if they need to be brelsed */
-	if (extent_bh) {
-		if (extent)
-			OCFS_BH_PUT_DATA(extent_bh);
-		brelse(extent_bh);
+	if (eb1_bh) {
+		if (eb1)
+			OCFS_BH_PUT_DATA(eb1_bh);
+		brelse(eb1_bh);
 	}
 
-	if (extent_header_bh) {
-		if (extent_header)
-			OCFS_BH_PUT_DATA(extent_header_bh);
-		brelse(extent_header_bh);
+	if (eb2_bh) {
+		if (eb2)
+			OCFS_BH_PUT_DATA(eb2_bh);
+		brelse(eb2_bh);
 	}
-	if (FileEntry) {
+	if (fe) {
 		OCFS_BH_PUT_DATA(fe_bh);
 	}
 
@@ -1442,16 +1538,21 @@
  * 'flag' seems to be an indicator that (if true) tells us that we already know
  *	we're gonna have to clear out all of extarr.
  */
-static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, __u8 *freeExtent, ocfs_journal_handle *handle, __u64 FileSize, int flag, struct inode *inode) 
+static int _squish_extent_entries(ocfs_super *osb,
+				  ocfs2_extent_rec *extarr,
+				  __u16 *freeExtent,
+				  ocfs_journal_handle *handle,
+				  u32 num_clusters, int flag,
+				  struct inode *inode) 
 {
 	int status = 0;
 	int FirstTime = 1;
-	ocfs_alloc_ext *ext; 
-	__u32 i, csize = osb->cluster_size_bits,
+	__u64 FileSize = (u64)num_clusters << osb->s_clustersize_bits; /* FIXME get rid of this and use num_clusters!! */
+	ocfs2_extent_rec *ext; 
+	__u32 i, csize = osb->s_clustersize_bits,
 	    numBitsAllocated = 0, bitmapOffset = 0, 
 	    firstfree = *freeExtent;
 	__u64 bytes, foff, doff, 
-	    dstart = osb->vol_layout.data_start_off, 
 	    diskOffsetTobeFreed, lengthTobeFreed = 0, 
 	    actualSize = 0, origLength = 0;
 
@@ -1460,12 +1561,12 @@
 
 	firstfree = *freeExtent;
 
-	/* loop through the used alloc_extents */
+	/* loop through the used ocfs2_extent_recs */
 	for (i = 0; i < firstfree; i++) { 
 		ext = &(extarr[i]); 
-		bytes = ext->num_bytes; 
-		foff = ext->file_off; 
-		doff = ext->disk_off; 
+		bytes = (u64)ext->e_clusters << osb->s_clustersize_bits;
+		foff = (u64)ext->e_cpos << osb->s_clustersize_bits; 
+		doff = ext->e_blkno << osb->sb->s_blocksize_bits; 
 		actualSize = (bytes + foff);
 		if (flag || actualSize > FileSize) { 
 			if (flag || foff >= FileSize) { 
@@ -1473,23 +1574,26 @@
 					*freeExtent = i; 
 					FirstTime = 0; 
 				} 
-				numBitsAllocated = (__u32) (bytes>>csize); 
-				bitmapOffset = (__u32) ((doff - dstart) >> csize); 
-				ext->num_bytes = ext->disk_off = ext->file_off = 0; 
+				numBitsAllocated = ext->e_clusters; 
+				bitmapOffset = (__u32) (doff >> csize); 
+				ext->e_clusters = ext->e_blkno =
+					ext->e_cpos = 0; 
 			} else { 
 				if (FirstTime) { 
 					*freeExtent = i + 1; 
 					FirstTime = 0; 
 				} 
 				origLength = bytes; 
-				ext->num_bytes = bytes = FileSize - foff; 
+				bytes = FileSize - foff; 
+				ext->e_clusters =
+					(u32)(bytes >> osb->s_clustersize_bits);
 				lengthTobeFreed = origLength - bytes; 
 				if (lengthTobeFreed == 0) { 
 					continue; 
 				} 
 				numBitsAllocated = (__u32) (lengthTobeFreed >> csize); 
 				diskOffsetTobeFreed = doff + bytes; 
-				bitmapOffset = (__u32) ((diskOffsetTobeFreed - dstart) >> csize); 
+				bitmapOffset = (__u32) (diskOffsetTobeFreed >> csize); 
 			} 
 			status = ocfs_handle_add_commit_bits(handle, 
 							   numBitsAllocated, 
@@ -1510,7 +1614,7 @@
 
 /* used by ocfs_kill_this_tree and ocfs_split_this_tree */
 /* This value needs to be removed in a future version and set to
- * granularity + 1, dynamically */
+ * tree_depth + 1, dynamically */
 #define OCFS_TREE_STACK_SIZE 8
 
 /*
@@ -1518,27 +1622,32 @@
  *
  * Given an extent_group (can be a DAT or header), delete everything,
  * including itself, it's children, and any data blocks they point to.
- * Works fine with any granularity (up to 4, in which case we'd need
+ * Works fine with any tree_depth (up to 4, in which case we'd need
  * more stack space)
  *
  * extent_grp_bh will be unchanged, though it will be marked for
  * deletion in free_head.
  */
 
-/* We can't recurse, so we keep a simple stack of ocfs_extent_groups. */
-static int ocfs_kill_this_tree(ocfs_super *osb, struct buffer_head *extent_grp_bh, ocfs_journal_handle *handle, struct inode *inode) 
+/*
+ * We can't recurse, so we keep a simple stack of ocfs2_extent_blocks.
+ */
+static int ocfs_kill_this_tree(ocfs_super *osb,
+			       struct buffer_head *extent_grp_bh,
+			       ocfs_journal_handle *handle,
+			       struct inode *inode) 
 {
 	int status = -EFAIL;
 	int i;
 	__u32 victim;
-	__u32 csize = osb->cluster_size_bits;
-	__u64 dstart = osb->vol_layout.data_start_off;
 	__u64 tmp_off;
-	__u32 num_sectors = 0, bitmap_offset = 0;
-	ocfs_alloc_ext *ext;
+	u32 num_clusters = 0;
+	__u32 bitmap_offset = 0;
+	ocfs2_extent_rec *ext;
 	struct buffer_head *tmp_bh = NULL;
 	char * stack[OCFS_TREE_STACK_SIZE];
-	ocfs_extent_group * cur_extent; /* convenience, points to TOS */
+	ocfs2_extent_block *cur_eb; /* convenience, points to TOS */
+	ocfs2_extent_list *cur_el;
 	int tos = 0;
 
 	LOG_ENTRY();
@@ -1546,28 +1655,29 @@
 	for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
 		stack[i] = NULL;
 
-	stack[tos] = ocfs_malloc(osb->sect_size);
+	stack[tos] = kmalloc(osb->sb->s_blocksize, GFP_KERNEL);
 	memcpy(stack[tos], OCFS_BH_GET_DATA_READ(extent_grp_bh),
-	       osb->sect_size);
+	       osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(extent_grp_bh);
 
 	do {
-		cur_extent = (ocfs_extent_group *) stack[tos];
-
-		if (!IS_VALID_EXTENT_DATA(cur_extent) && 
-		    !IS_VALID_EXTENT_HEADER(cur_extent)) {
-			LOG_ERROR_STR("Invalid extent group!");
+		cur_eb = (ocfs2_extent_block *) stack[tos];
+		if (!IS_VALID_EXTENT_BLOCK(cur_eb)) {
+			LOG_ERROR_STR("Invalid extent block!");
 			goto bail;
 		}
 
-		if (IS_VALID_EXTENT_DATA(cur_extent)) {
-			LOG_TRACE_ARGS("found some data to free (%llu)\n", cur_extent->this_ext);
-			for(i = 0; i < cur_extent->next_free_ext; i++) {
+		cur_el = &cur_eb->h_list;
+
+		if (cur_el->l_tree_depth == -1) {
+			LOG_TRACE_ARGS("found some data to free (%llu)\n", cur_eb->h_blkno);
+			for(i = 0; i < cur_el->l_next_free_rec; i++) {
 				/* Free the data associated with each header */
-				ext = &cur_extent->extents[i];
-				num_sectors = (__u32) (ext->num_bytes >> csize);
-				bitmap_offset = (__u32) ((ext->disk_off - dstart) >> csize);
-				status = ocfs_handle_add_commit_bits(handle, num_sectors, bitmap_offset, -1, DISK_ALLOC_VOLUME);
+				ext = &cur_el->l_recs[i];
+				num_clusters = ext->e_clusters;
+				bitmap_offset =
+					(u32)((ext->e_blkno << osb->sb->s_blocksize_bits) >> osb->s_clustersize_bits);
+				status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, DISK_ALLOC_VOLUME);
 				if (status < 0) {
 					LOG_ERROR_STATUS (status);
 					goto bail;
@@ -1578,30 +1688,33 @@
 
 			/* Did we already kill all his children, or
 			 * are they already dead? */
-			if (cur_extent->next_free_ext == 0) {
-				LOG_TRACE_ARGS("Popping this header (%llu)\n", cur_extent->this_ext);
+			if (cur_el->l_next_free_rec == 0) {
+				LOG_TRACE_ARGS("Popping this header (%llu)\n", cur_eb->h_blkno);
 				goto free_meta;
 			}
 
 			/* We're gonna read in our last used extent
 			 * and put him at the top of the stack. We
-			 * also update our next_free_ext so that next
+			 * also update our h_next_free_rec so that next
 			 * time we read in the next to last one and so
 			 * on until we've finished all of them
 			 */
 
 			/* grow the stack, gotta save off a couple
 			 * things 1st. */
-			victim = cur_extent->next_free_ext - 1;
-			tmp_off = cur_extent->extents[victim].disk_off;
-			cur_extent->next_free_ext--;
+			victim = cur_el->l_next_free_rec - 1;
+			tmp_off = cur_el->l_recs[victim].e_blkno;
+			tmp_off <<= osb->sb->s_blocksize_bits;
+			cur_el->l_next_free_rec--;
 
-			cur_extent = NULL;
+			cur_eb = NULL;
+			cur_el = NULL;
 			tos++;
 
 			/* should already be null, but we can do this
 			 * just in case. */
-			stack[tos] = ocfs_malloc(osb->sect_size);
+			stack[tos] = kmalloc(osb->sb->s_blocksize,
+					     GFP_KERNEL);
 
 			status = ocfs_read_bh(osb, tmp_off, &tmp_bh, 
 					      OCFS_BH_COND_CACHED, inode);
@@ -1612,7 +1725,7 @@
 
 			memcpy(stack[tos],
 			       OCFS_BH_GET_DATA_READ(tmp_bh),
-			       osb->sect_size);
+			       osb->sb->s_blocksize);
 			OCFS_BH_PUT_DATA(tmp_bh);
 			brelse(tmp_bh);
 			tmp_bh = NULL;
@@ -1622,15 +1735,19 @@
 
 free_meta:
 		/* Free the metadata associated with this extent group */
-		status = ocfs_handle_add_commit_bits(handle, 1, cur_extent->alloc_file_off, cur_extent->alloc_node, DISK_ALLOC_EXTENT_NODE);
+		status = ocfs_handle_add_commit_bits(handle, 1,
+						     cur_eb->h_suballoc_blkno << osb->sb->s_blocksize_bits,
+						     cur_eb->h_suballoc_node,
+						     DISK_ALLOC_EXTENT_NODE);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto bail;
 		}
 		/* Pop one off the stack */
-		ocfs_free(stack[tos]);
+		kfree(stack[tos]);
 		stack[tos] = NULL;
-		cur_extent = NULL;
+		cur_eb = NULL;
+		cur_el = NULL;
 		tos--;
 	} while (tos >= 0);
 
@@ -1638,7 +1755,7 @@
 bail:
 	for(i = 0; i < OCFS_TREE_STACK_SIZE; i++)
 		if (stack[i])
-			ocfs_free(stack[i]);
+			kfree(stack[i]);
 
 	LOG_EXIT_STATUS (status);
 	return(status);
@@ -1646,43 +1763,44 @@
 
 #ifndef USERSPACE_TOOL
 
-static int ocfs_fix_extent_group(ocfs_super *osb, struct buffer_head *group_bh, struct inode *inode) 
+static int ocfs_fix_extent_block(ocfs_super *osb,
+				 struct buffer_head *eb_bh,
+				 struct inode *inode) 
 {
-	ocfs_alloc_ext *ext;
-	ocfs_extent_group *group = NULL;
-	int status=-EFAIL;
+	ocfs2_extent_block *eb = NULL;
+	ocfs2_extent_list *el;
+	int status = -EFAIL;
 	int i;
 
-	LOG_ENTRY ();
+	LOG_ENTRY();
 
-	if (!group_bh) {
-		LOG_ERROR_STR("Invalid extent group bh (NULL)!");
+	if (!eb_bh) {
+		LOG_ERROR_STR("Invalid extent block bh (NULL)!");
 		goto bail;
 	}
 
-	group = OCFS_BH_GET_DATA_WRITE(group_bh);
+	eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
 
-	if (!IS_VALID_EXTENT_DATA(group) && 
-	    !IS_VALID_EXTENT_HEADER(group)) {
-		LOG_ERROR_STR("Invalid extent group!");
+	if (!IS_VALID_EXTENT_BLOCK(eb)) {
+		LOG_ERROR_STR("Invalid extent block!");
 		goto bail;
 	}
 	
-	ext = group->extents;
+	el = &eb->h_list;
 	
-	for(i=group->next_free_ext; i < OCFS_MAX_DATA_EXTENTS; i++) {
-		ext[i].num_bytes = 0;
-		ext[i].disk_off = 0;
-		ext[i].file_off = 0;
+	for(i = el->l_next_free_rec; i < el->l_count; i++) {
+		el->l_recs[i].e_clusters = 0;
+		el->l_recs[i].e_blkno = 0;
+		el->l_recs[i].e_cpos = 0;
 	}
 
-	OCFS_BH_PUT_DATA(group_bh);
-	group = NULL;
+	OCFS_BH_PUT_DATA(eb_bh);
+	eb = NULL;
 
 	status=0;
 bail:
-	if (group)
-		OCFS_BH_PUT_DATA(group_bh);
+	if (eb)
+		OCFS_BH_PUT_DATA(eb_bh);
 
 	LOG_EXIT_STATUS (status);
 	return(status);
@@ -1699,81 +1817,93 @@
  * write it so that it can.
  * TODO: This function should be split up into a couple smaller ones.
  */
-static int ocfs_split_this_tree(ocfs_super * osb, struct buffer_head *extent_grp_bh, ocfs_journal_handle *handle, ocfs_file_entry *fe, struct inode *inode) 
+static int ocfs_split_this_tree(ocfs_super *osb,
+				struct buffer_head *eb_bh,
+				ocfs_journal_handle *handle,
+				ocfs2_dinode *fe,
+				struct inode *inode) 
 {
 	int status = -EFAIL;
-	__u64 newsize = fe->alloc_size;
-	ocfs_alloc_ext *ext;
+	ocfs2_extent_rec *rec;
 	struct buffer_head * bh_stack[OCFS_TREE_STACK_SIZE];
-	ocfs_extent_group * AllocExtent = NULL;/* convenience, points to TOS */
-	ocfs_extent_group *extent_grp;
+	ocfs2_extent_block *alloc_eb = NULL;  /* convenience, points to TOS */
+	ocfs2_extent_block *eb;
+	ocfs2_extent_list *el;
 	struct buffer_head *tmp_bh = NULL, *tmp_bh2 = NULL;
 	int tos = 0;
 	int i, victim;
-	__u64 bytes, foff, doff, orig_bytes, dstart = osb->vol_layout.data_start_off, total_bytes, tmp_off;
-       	__u32 csize = osb->cluster_size_bits;
-	__u32 num_sectors, bitmap_offset;
+	__u64 bytes, doff, orig_bytes, tmp_off;
+	__u64 total_bytes;  /* FIXME needs to be clusters!!! */
+	__u32 num_clusters, bitmap_offset;
 	int done = 0;
-	int gran = fe->granularity;
+	int depth = fe->id2.i_list.l_tree_depth;
 	int needs_brelse = 0;
 
 	LOG_ENTRY();
 
-	/* This is a similar hack to the one below, untested for gran = 3 files
+	/* This is a similar hack to the one below, untested for depth = 3 files
 	   because I can't recreate one. */
-	if (gran == 3) {
-		LOG_ERROR_STR("Truncating file with granularity 3, this is not tested and may be unsafe!");
-		LOG_TRACE_STR("Found a granularity 3 tree, trimming it.\n");
+	if (depth == 3) {
+		LOG_ERROR_STR("Truncating file with tree_depth 3, this is not tested and may be unsafe!");
+		LOG_TRACE_STR("Found a tree_depth 3 tree, trimming it.\n");
 
-		status = ocfs_journal_access(handle, extent_grp_bh, 
+		status = ocfs_journal_access(handle, eb_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
-		extent_grp = OCFS_BH_GET_DATA_WRITE(extent_grp_bh);
-		for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
-			ext = &extent_grp->extents[i];
+		eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
+		el = &eb->h_list;
+		for (i = (el->l_next_free_rec - 1); i >= 0; i--) {
+			rec = &el->l_recs[i];
 
 			if (tmp_bh2)
 				brelse(tmp_bh2);
 			tmp_bh2 = NULL;
-			status = ocfs_read_bh(osb, ext->disk_off, &tmp_bh2, 
+			status = ocfs_read_bh(osb,
+					      rec->e_blkno << osb->sb->s_blocksize_bits,
+					      &tmp_bh2, 
 					      OCFS_BH_COND_CACHED, inode);
 			if (status < 0) {
-				OCFS_BH_PUT_DATA(extent_grp_bh);
-				extent_grp = NULL;
+				OCFS_BH_PUT_DATA(eb_bh);
+				eb = NULL;
 				brelse(tmp_bh2);
 				LOG_ERROR_STATUS (status);
 				goto bail;
 			}
 			
-			if (ext->file_off >= newsize) {
+			if (rec->e_cpos >= fe->i_clusters) {
 				/* Trim this whole subtree */
-				status = ocfs_kill_this_tree(osb, tmp_bh2, 
-							     handle, inode);
+				status = ocfs_kill_this_tree(osb,
+							     tmp_bh2, 
+							     handle,
+							     inode);
 				if (status < 0) {
-					OCFS_BH_PUT_DATA(extent_grp_bh);
-					extent_grp = NULL;
+					OCFS_BH_PUT_DATA(eb_bh);
+					eb = NULL;
+					el = NULL;
 					brelse(tmp_bh2);
 					LOG_ERROR_STATUS (status);
 					goto bail;
 				}
-				ext->file_off = 0;
-				ext->disk_off = 0;
-				ext->num_bytes = 0;
-				extent_grp->next_free_ext = i;
+				rec->e_cpos = 0;
+				rec->e_blkno = 0;
+				rec->e_clusters = 0;
+				el->l_next_free_rec = i;
 			} else  { /* This is the one we want to split. */
-				ext->num_bytes = newsize - ext->file_off;
+				rec->e_clusters =
+					fe->i_clusters - rec->e_cpos;
 				break;
 			}
 		}
 
 		/* Write out our new top of the tree duder */
-		OCFS_BH_PUT_DATA(extent_grp_bh);
-		extent_grp = NULL;
+		OCFS_BH_PUT_DATA(eb_bh);
+		eb = NULL;
+		el = NULL;
 
-		status = ocfs_journal_dirty(handle, extent_grp_bh);
+		status = ocfs_journal_dirty(handle, eb_bh);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto bail;
@@ -1784,69 +1914,79 @@
 			LOG_ERROR_STATUS(-EFAIL);
 			goto bail;
 		}
-		extent_grp_bh = tmp_bh2;
+		eb_bh = tmp_bh2;
 
 		/* We want to do the next bit of stuff too */
-		gran = 2;
+		depth = 2;
 		needs_brelse = 1;
 	}
 
 	/* This is a hack, but i have little time to make this function right*/
 	/* get rid of everything from the top level HDR that we can, then
-	   proceeed as if we're granularity 1 (which we know works) */
-	if (gran == 2) {
-		LOG_TRACE_STR("Found a granularity 2 tree, trimming it.\n");
+	   proceeed as if we're tree_depth 1 (which we know works) */
+	if (depth == 2) {
+		LOG_TRACE_STR("Found a tree_depth 2 tree, trimming it.\n");
 
-		status = ocfs_journal_access(handle, extent_grp_bh, 
+		status = ocfs_journal_access(handle, eb_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
 
-		extent_grp = OCFS_BH_GET_DATA_WRITE(extent_grp_bh);
-		for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
-			ext = &extent_grp->extents[i];
+		eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
+		el = &eb->h_list;
+		for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
+			rec = &el->l_recs[i];
 
 			if (tmp_bh)
 				brelse(tmp_bh);
 			tmp_bh = NULL;
 
-			status = ocfs_read_bh(osb, ext->disk_off, &tmp_bh, 
-					      OCFS_BH_COND_CACHED, inode);
+			status = ocfs_read_bh(osb,
+					      rec->e_blkno << osb->sb->s_blocksize_bits,
+					      &tmp_bh, 
+					      OCFS_BH_COND_CACHED,
+					      inode);
 			if (status < 0) {
-				OCFS_BH_PUT_DATA(extent_grp_bh);
-				extent_grp = NULL;
+				OCFS_BH_PUT_DATA(eb_bh);
+				eb = NULL;
+				el = NULL;
 				brelse(tmp_bh);
 				LOG_ERROR_STATUS (status);
 				goto bail;
 			}
 			
-			if (ext->file_off >= newsize) {
+			if (rec->e_cpos >= fe->i_clusters) {
 				/* Trim this whole subtree */
-				status = ocfs_kill_this_tree(osb, tmp_bh, 
-							     handle, inode);
+				status = ocfs_kill_this_tree(osb,
+							     tmp_bh, 
+							     handle,
+							     inode);
 				if (status < 0) {
-					OCFS_BH_PUT_DATA(extent_grp_bh);
-					extent_grp = NULL;
+					OCFS_BH_PUT_DATA(eb_bh);
+					eb = NULL;
+					el = NULL;
 					brelse(tmp_bh);
 					LOG_ERROR_STATUS (status);
 					goto bail;
 				}
-				ext->file_off = 0;
-				ext->disk_off = 0;
-				ext->num_bytes = 0;
-				extent_grp->next_free_ext = i;
+				rec->e_cpos = 0;
+				rec->e_blkno = 0;
+				rec->e_clusters = 0;
+				el->l_next_free_rec = i;
 			} else  { /* This is the one we want to split. */
-				ext->num_bytes = newsize - ext->file_off;
+				rec->e_clusters = 
+					fe->i_clusters - rec->e_cpos;
 				break;
 			}
 		}
 		/* Write out our new top of the tree duder */
-		OCFS_BH_PUT_DATA(extent_grp_bh);
-		extent_grp = NULL;
+		OCFS_BH_PUT_DATA(eb_bh);
+		eb = NULL;
+		el = NULL;
 
-		status = ocfs_journal_dirty(handle, extent_grp_bh);
+		status = ocfs_journal_dirty(handle, eb_bh);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto bail;
@@ -1858,11 +1998,11 @@
 			goto bail;
 		}
 
-		extent_grp_bh = tmp_bh;
+		eb_bh = tmp_bh;
 
-		/* Right now, we don't use 'gran' below here, but just
+		/* Right now, we don't use 'depth' below here, but just
 		 * in case */
-		gran = 1;
+		depth = 1;
 		if (needs_brelse)
 			brelse(tmp_bh2);
 		needs_brelse = 1;
@@ -1871,7 +2011,7 @@
 	for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
 		bh_stack[i] = NULL;
 
-	bh_stack[tos] = extent_grp_bh;
+	bh_stack[tos] = eb_bh;
 
 	/* Ok, find the splitting point (can be a DAT or HDR) */
 	do {
@@ -1885,36 +2025,40 @@
 			goto bail;
 		}
 
-		AllocExtent = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
-
-		if (!IS_VALID_EXTENT_DATA(AllocExtent) && 
-		    !IS_VALID_EXTENT_HEADER(AllocExtent)) {
-			LOG_ERROR_STR("Invalid extent group!");
+		alloc_eb = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+		if (!IS_VALID_EXTENT_BLOCK(alloc_eb)) {
+			LOG_ERROR_STR("Invalid extent block!");
 			goto bail;
 		}
 		
-		if (IS_VALID_EXTENT_DATA(AllocExtent)) {
+		el = &alloc_eb->h_list;
+
+		if (el->l_tree_depth == -1) {
 			/* shall we just do away with him? */
-			if (AllocExtent->extents[0].file_off >= newsize) {
-				LOG_TRACE_ARGS("Killing this data extent (%llu)\n", AllocExtent->this_ext);
+			if (el->l_recs[0].e_cpos >= fe->i_clusters) {
+				LOG_TRACE_ARGS("Killing this data extent (%llu)\n", alloc_eb->h_blkno);
 				/* Boundary case - what if this guy is
 				 * the last DAT we should delete
 				 * (i.e., split no more ;) */
 				OCFS_BH_PUT_DATA(bh_stack[tos]);
-				AllocExtent = NULL;
-				status = ocfs_kill_this_tree(osb, bh_stack[tos], handle, inode);
+				alloc_eb = NULL;
+				el = NULL;
+				status = ocfs_kill_this_tree(osb,
+							     bh_stack[tos],
+							     handle,
+							     inode);
 				if (status < 0) {
 					LOG_ERROR_STATUS (status);
 					goto bail;
 				}
 				/* silly, but what to do? */
-				AllocExtent = OCFS_BH_GET_DATA_READ(bh_stack[tos]);
+				alloc_eb = OCFS_BH_GET_DATA_READ(bh_stack[tos]);
 			} else {
 				/* Alright, we know for sure that
 				 * we're splitting in this guy. */
-				LOG_TRACE_ARGS("Splitting this data extent (%llu)\n", AllocExtent->this_ext);
-				fe->last_ext_ptr = AllocExtent->this_ext;
-				AllocExtent->next_data_ext = 0;
+				LOG_TRACE_ARGS("Splitting this data extent (%llu)\n", alloc_eb->h_blkno);
+				fe->i_last_eb_blk = alloc_eb->h_blkno;
+				alloc_eb->h_next_leaf_blk = 0;
 				/* total_bytes is used below to know
 				 * how much total we've whacked off
 				 * this extent*/
@@ -1923,37 +2067,43 @@
 				/* there is a chance the split is at a
 				 * header boundary. this will catch
 				 * it: */
-				ext = &AllocExtent->extents[AllocExtent->next_free_ext - 1];
-				if ((ext->file_off + ext->num_bytes)==newsize){
+				rec = &el->l_recs[el->l_next_free_rec - 1];
+				if ((rec->e_cpos + rec->e_clusters) == fe->i_clusters) {
 					LOG_TRACE_STR("Ok, hit that boundary in the DAT");
 					goto fix_headers;
 				}
 
 				/* Either kill the data or resize it */
-				for(i = (AllocExtent->next_free_ext - 1); i>=0; i--) {
-					ext = &AllocExtent->extents[i];
+				for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
+					rec = &el->l_recs[i];
 
 					/* changed this from > to >= */
 					/* Do we delete it completely? */
-					if (ext->file_off >= newsize) {
-						total_bytes+=ext->num_bytes;
+					if (rec->e_cpos >= fe->i_clusters) {
+						total_bytes += (u64)rec->e_clusters << osb->s_clustersize_bits;
 
-						num_sectors = (__u32) (ext->num_bytes >> csize);
-						bitmap_offset = (__u32) ((ext->disk_off - dstart) >> csize);
-						ext->file_off = 0;
-						ext->num_bytes = 0;
-						ext->disk_off = 0;
-					} else if ((ext->file_off + ext->num_bytes) > newsize) {
+						num_clusters = rec->e_clusters;
+						bitmap_offset = (u32)(((rec->e_blkno << osb->sb->s_blocksize_bits)) >> osb->s_clustersize_bits);
+						rec->e_cpos = 0;
+						rec->e_clusters = 0;
+						rec->e_blkno = 0;
+					} else if ((rec->e_cpos + rec->e_clusters) > fe->i_clusters) {
 						/* Do we shrink it? */
-						orig_bytes = ext->num_bytes;
-						doff = ext->disk_off; 
-						foff = ext->file_off;
-						bytes = ext->num_bytes = newsize - foff;
-						num_sectors = (__u32) ((orig_bytes - bytes) >> csize);
-						bitmap_offset = (__u32) (((doff + bytes) - dstart) >> csize);
+						/* FIXME
+						 * this is soooo a
+						 * hodge-podge of bytes
+						 * and clusters
+						 */
+						orig_bytes = (u64)rec->e_clusters << osb->s_clustersize_bits;
+						num_clusters = rec->e_clusters;
+						doff = rec->e_blkno << osb->sb->s_blocksize_bits; 
+						rec->e_clusters = fe->i_clusters - rec->e_cpos;
+						bytes = (u64)rec->e_clusters << osb->s_clustersize_bits;
+						num_clusters -= rec->e_clusters;
+						bitmap_offset = (u32)((doff + bytes) >> osb->s_clustersize_bits);
 						/* we want to exit the
 						 * for loop now */
-						total_bytes+= (orig_bytes - bytes);
+						total_bytes += (orig_bytes - bytes);
 						done = 1;
 					} else {
 						/* if we get here,
@@ -1961,11 +2111,11 @@
 						 * to actually delete
 						 * *anything* from
 						 * this extent. */
-						LOG_TRACE_ARGS("Not deleting extent %d, disk_off = %llu, num_bytes = %llu, file_off = %llu\n", i, ext->disk_off, ext->num_bytes, ext->file_off);
+						LOG_TRACE_ARGS("Not deleting extent %d, e_blkno = %llu, e_clusters = %u, e_cpos = %u\n", i, rec->e_blkno, rec->e_clusters, rec->e_cpos);
 						done = 1;
 						goto skip_bitmap_add;
 					}
-					status = ocfs_handle_add_commit_bits(handle, num_sectors, bitmap_offset, -1, DISK_ALLOC_VOLUME);
+					status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, DISK_ALLOC_VOLUME);
 					if (status < 0) {
 						LOG_ERROR_STATUS (status);
 						goto bail;
@@ -1973,16 +2123,19 @@
 
 skip_bitmap_add:
 					if (done) {
-						AllocExtent->next_free_ext=i+1;
+						el->l_next_free_rec =
+							i + 1;
 						break;
 					}
 				} /* For loop */
 
 				/* Either way, we need to write this back out*/
 				OCFS_BH_PUT_DATA(bh_stack[tos]);
-				AllocExtent = NULL;
+				alloc_eb = NULL;
+				el = NULL;
 
-				status = ocfs_journal_dirty(handle, bh_stack[tos]);
+				status = ocfs_journal_dirty(handle,
+							    bh_stack[tos]);
 				if (status < 0) {
 					LOG_ERROR_STATUS(status);
 					goto bail;
@@ -2000,23 +2153,26 @@
 						goto bail;
 					}
 
-					AllocExtent = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
-					victim = AllocExtent->next_free_ext;
-					AllocExtent->next_free_ext++;
+					alloc_eb = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+					el = &alloc_eb->h_list;
+					victim = el->l_next_free_rec;
+					el->l_next_free_rec++;
 					/* need to also update
 					 * numbytes on these guys */
-					ext = &AllocExtent->extents[victim];
-					ext->num_bytes-= total_bytes;
+					rec = &el->l_recs[victim];
+					rec->e_clusters -=
+						(u32)(total_bytes >> osb->s_clustersize_bits);
 					OCFS_BH_PUT_DATA(bh_stack[tos]);
-					AllocExtent = NULL;
-					status = ocfs_fix_extent_group(osb, 
+					alloc_eb = NULL;
+					el = NULL;
+					status = ocfs_fix_extent_block(osb, 
 								bh_stack[tos], inode);
 					if (status < 0) {
 						LOG_ERROR_STATUS(status);
 						goto bail;
 					}
 					status = ocfs_journal_dirty(handle, 
-								bh_stack[tos]);
+								    bh_stack[tos]);
 					if (status < 0) {
 						LOG_ERROR_STATUS(status);
 						goto bail;
@@ -2031,38 +2187,40 @@
 
 			/* Did we already kill all his children, or
 			 * are they already dead? */
-			if (AllocExtent->next_free_ext == 0) {
+			if (el->l_next_free_rec == 0) {
 				/*Ok, we're done with this guy, pop the stack*/
 				LOG_TRACE_ARGS("Popping this header (%llu)\n",
-					       AllocExtent->this_ext);
+					       alloc_eb->h_blkno);
 
-				status = ocfs_handle_add_commit_bits(handle, 1, AllocExtent->alloc_file_off, AllocExtent->alloc_node, DISK_ALLOC_EXTENT_NODE);
+				status = ocfs_handle_add_commit_bits(handle, 1, alloc_eb->h_suballoc_blkno << osb->sb->s_blocksize_bits, alloc_eb->h_suballoc_node, DISK_ALLOC_EXTENT_NODE);
 				if (status < 0) {
 					LOG_ERROR_STATUS (status);
 					goto bail;
 				}
 				OCFS_BH_PUT_DATA(bh_stack[tos]);
 				brelse(bh_stack[tos]);
-				AllocExtent = NULL;
+				alloc_eb = NULL;
+				el = NULL;
 				bh_stack[tos] = NULL;
 				tos--;
 				continue;
 			}
 			/* changed this from > to >= */
 			/* Do we just delete this whole part of the tree? */
-			if (AllocExtent->extents[0].file_off >= newsize) {
+			if (el->l_recs[0].e_cpos >= fe->i_clusters) {
 				LOG_TRACE_ARGS("whacking this tree: (%llu)\n",
-					       AllocExtent->this_ext);
+					       alloc_eb->h_blkno);
 
-				if (AllocExtent->extents[0].file_off ==newsize)
+				if (el->l_recs[0].e_cpos == fe->i_clusters)
 					done = 1;
 
 				OCFS_BH_PUT_DATA(bh_stack[tos]);
-				AllocExtent = NULL;
+				alloc_eb = NULL;
 				ocfs_kill_this_tree(osb, bh_stack[tos], 
 						    handle, inode);
 				brelse(bh_stack[tos]);
-				AllocExtent = NULL;
+				alloc_eb = NULL;
+				el = NULL;
 				bh_stack[tos] = NULL;
 				tos--;
 				if (tos < 0) {
@@ -2071,7 +2229,7 @@
 				}
 				/* I just have to fix my parent,
 				 * right? Yes, but only because our
-				 * max granularity is 2. if it were
+				 * max tree_depth is 2. if it were
 				 * more, we'd have to fix his
 				 * parents parent. */
 				status = ocfs_journal_access(handle, bh_stack[tos], OCFS_JOURNAL_ACCESS_WRITE);
@@ -2080,15 +2238,17 @@
 					goto bail;
 				}
 
-				AllocExtent = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+				alloc_eb = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+				el = &alloc_eb->h_list;
 
-				victim = AllocExtent->next_free_ext;
-				AllocExtent->extents[victim].file_off = 0;
-				AllocExtent->extents[victim].num_bytes = 0;
-				AllocExtent->extents[victim].disk_off = 0;
-				AllocExtent->next_free_ext--;
+				victim = el->l_next_free_rec;
+				el->l_recs[victim].e_cpos = 0;
+				el->l_recs[victim].e_clusters = 0;
+				el->l_recs[victim].e_blkno = 0;
+				el->l_next_free_rec--;
 				OCFS_BH_PUT_DATA(bh_stack[tos]);
-				AllocExtent = NULL;
+				alloc_eb = NULL;
+				el = NULL;
 				/* Here's an interesting boundary
 				 * case. What if we're truncating on a
 				 * boundary between two headers and
@@ -2096,12 +2256,12 @@
 				 * that case we're done, but need to 
 				 * write the parent out before we leave
 				 * again, this bit of code depends on 
-				 * granularity of 2. */
+				 * tree_depth of 2. */
 				if (done) {
 					LOG_TRACE_STR("Found a boundary "     \
 						      "header, almost done "  \
 						      " (gonna quit)");
-					status = ocfs_fix_extent_group(osb, 
+					status = ocfs_fix_extent_block(osb, 
 								bh_stack[tos], inode);
 					if (status < 0) {
 						LOG_ERROR_STATUS(status);
@@ -2135,14 +2295,16 @@
 				continue;
 			}
 
-			AllocExtent->next_free_ext--;
-			victim = AllocExtent->next_free_ext;
-			ext = &AllocExtent->extents[victim];
-			tmp_off = ext->disk_off;
+			el->l_next_free_rec--;
+			victim = el->l_next_free_rec;
+			rec = &el->l_recs[victim];
+			tmp_off = rec->e_blkno << osb->sb->s_blocksize_bits;
 			OCFS_BH_PUT_DATA(bh_stack[tos]);
-			AllocExtent = NULL;
+			alloc_eb = NULL;
+			el = NULL;
 
-			status = ocfs_journal_dirty(handle, bh_stack[tos]);
+			status = ocfs_journal_dirty(handle,
+						    bh_stack[tos]);
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto bail;
@@ -2155,8 +2317,10 @@
 				LOG_ERROR_STR("uhoh, not brelsing a buffer " \
 					      "on our stack!\n");
 
-			status = ocfs_read_bh(osb, tmp_off, &bh_stack[tos], 
-					      OCFS_BH_COND_CACHED, inode);
+			status = ocfs_read_bh(osb, tmp_off,
+					      &bh_stack[tos], 
+					      OCFS_BH_COND_CACHED,
+					      inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto bail;
@@ -2165,11 +2329,12 @@
 			/* We only want to free on our way up the tree */
 			continue;
 		}
-		if (AllocExtent)
+		if (alloc_eb)
 			OCFS_BH_PUT_DATA(bh_stack[tos]);
 		brelse(bh_stack[tos]);
 		bh_stack[tos] = NULL;
-		AllocExtent = NULL;
+		alloc_eb = NULL;
+		el = NULL;
 		tos--;
 	} while (tos >= 0);
 	
@@ -2181,7 +2346,7 @@
 bail:
 	/* brelse the stack. We only brelse the bottom of the stack if
 	 * we know for sure that it wasn't passed from the caller */
-	if (AllocExtent)
+	if (alloc_eb)
 		OCFS_BH_PUT_DATA(bh_stack[tos]);
 	if (needs_brelse)
 		brelse(bh_stack[0]);
@@ -2195,82 +2360,98 @@
 
 
 /*
- * ocfs_update_last_ext_ptr
+ * ocfs_update_last_eb_blk
  *
- *  Travel all the way to the rightmost DAT and set fe->last_ext_ptr
+ *  Travel all the way to the rightmost DAT and set fe->i_last_eb_blk
  *  to it.  
  *
  *  We do cached reads here because we ought to have already read the
  *  various ext headers and dats off the system previously in the
  *  truncate path.
  */
-static int ocfs_update_last_ext_ptr(ocfs_super *osb, ocfs_file_entry *fe, struct inode *inode) 
+static int ocfs_update_last_eb_blk(ocfs_super *osb,
+				   ocfs2_dinode *fe,
+				   struct inode *inode) 
 {
 	int status = -EFAIL;
-	ocfs_extent_group *extent = NULL;
-	struct buffer_head *extent_bh = NULL;
-	__u64 next_ext;
+	struct buffer_head *eb_bh = NULL;
+	ocfs2_extent_block *eb = NULL;
+	ocfs2_extent_list *el, *fel;
+	u64 next_blk;
 	int victim;
 
 	LOG_ENTRY ();
 
-	if (fe->next_free_ext == 0) {
+	fel = &fe->id2.i_list;
+	if (fel->l_next_free_rec == 0) {
 		LOG_TRACE_STR("setting to zero as there isn't any used extents");
-		fe->last_ext_ptr = 0;
+		fe->i_last_eb_blk = 0;
 		status = 0;
 		goto bail;
 	}
 
-	victim = fe->next_free_ext - 1;
-	status = ocfs_read_bh(osb, fe->extents[victim].disk_off, &extent_bh, 
+	/* Can't be called with local extents */
+	if (fel->l_tree_depth < 0)
+		BUG();
+
+	/* Ugly magic -1 */
+	victim = fel->l_next_free_rec - 1;
+	status = ocfs_read_bh(osb,
+			      fel->l_recs[victim].e_blkno << osb->sb->s_blocksize_bits,
+			      &eb_bh, 
 			      OCFS_BH_CACHED, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto bail;
 	}
 
-	extent = OCFS_BH_GET_DATA_READ(extent_bh);
+	eb = OCFS_BH_GET_DATA_READ(eb_bh);
 
-	if (!IS_VALID_EXTENT_DATA(extent) && 
-	    !IS_VALID_EXTENT_HEADER(extent)) {
-		LOG_ERROR_STR("Invalid extent group!");
+	if (!IS_VALID_EXTENT_BLOCK(eb)) { 
+		LOG_ERROR_STR("Invalid extent block!");
 		goto bail;
 	}
 
-	while (!IS_VALID_EXTENT_DATA(extent)) {
-		if (!IS_VALID_EXTENT_HEADER(extent)) {
-			LOG_ERROR_STR("Invalid extent group!");
+	el = &eb->h_list;
+
+	while (el->l_tree_depth >= 0) {
+		if (!IS_VALID_EXTENT_BLOCK(eb)) {
+			LOG_ERROR_STR("Invalid extent block!");
 			goto bail;
 		}
 
-		next_ext = extent->extents[extent->next_free_ext - 1].disk_off;
+		el = &eb->h_list;
 
-		OCFS_BH_PUT_DATA(extent_bh);
-		brelse(extent_bh);
-		extent = NULL;
-		extent_bh = NULL;
+		next_blk = el->l_recs[el->l_next_free_rec - 1].e_blkno;
 
-		status = ocfs_read_bh(osb, next_ext, &extent_bh, 
+		OCFS_BH_PUT_DATA(eb_bh);
+		brelse(eb_bh);
+		eb = NULL;
+		eb_bh = NULL;
+
+		status = ocfs_read_bh(osb,
+				      next_blk << osb->sb->s_blocksize_bits,
+				      &eb_bh, 
 				      OCFS_BH_CACHED, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
-		extent = OCFS_BH_GET_DATA_READ(extent_bh);
+		eb = OCFS_BH_GET_DATA_READ(eb_bh);
 	}
 	
-	fe->last_ext_ptr = extent->this_ext;
+	fe->i_last_eb_blk = eb->h_blkno;
 	status = 0;
 bail:
-	if (extent_bh) {
-		if (extent)
-			OCFS_BH_PUT_DATA(extent_bh);
-		brelse(extent_bh);
+	if (eb_bh) {
+		if (eb)
+			OCFS_BH_PUT_DATA(eb_bh);
+		brelse(eb_bh);
 	}
 
 	LOG_EXIT_STATUS(status);
 	return(status);
-}  /* ocfs_update_last_ext_ptr */
+}  /* ocfs_update_last_eb_blk */
 
 /*
  * ocfs_free_extents_for_truncate()
@@ -2279,25 +2460,28 @@
  * before leaving this function, but that's pretty much up to the
  * caller!
  */
-int ocfs_free_extents_for_truncate (ocfs_super * osb, ocfs_file_entry * FileEntry, ocfs_journal_handle *handle, struct inode *inode)
+int ocfs_free_extents_for_truncate(ocfs_super *osb,
+				   ocfs2_dinode *fe,
+				   ocfs_journal_handle *handle,
+				   struct inode *inode)
 {
 	int status = 0;
 	struct buffer_head *extent_bh = NULL;
-	__u64 alloc_size;
 	int i, j;
-	int updated_lep; /* used to mark whether fe->last_ext_ptr has
+	ocfs2_extent_list *fel;
+	int updated_leb; /* used to mark whether fe->i_last_eb_blk has
 			   * been updated */
 
 	LOG_ENTRY ();
 
-	alloc_size = FileEntry->alloc_size;
+	fel = &fe->id2.i_list;
 
 	/* local extents */
-	if (FileEntry->local_ext) {
-		status = _squish_extent_entries(osb, FileEntry->extents, 
-						&FileEntry->next_free_ext, 
-						handle, alloc_size, 0, 
-						inode);
+	if (fel->l_tree_depth < 0) {
+		status = _squish_extent_entries(osb, fel->l_recs, 
+						&fel->l_next_free_rec, 
+						handle, fe->i_clusters,
+						0, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
@@ -2305,39 +2489,42 @@
 		goto finally;
 	}
 
-	LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to alloc_size of (%llu)\n", alloc_size);
+	LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to i_clusters of (%u)\n", fe->i_clusters);
 	/* non-local extents */
 
-	updated_lep = 0;
+	updated_leb = 0;
 
-	/* Loop backwards through only the used free extent headers here */
-	for (i = (FileEntry->next_free_ext - 1); i >= 0; i--) {
+	/* Loop backwards through only the used free extent block here */
+	for (i = (fel->l_next_free_rec - 1); i >= 0; i--) {
 		LOG_TRACE_ARGS("at top of loop, i = %d\n", i);
 		/* Go ahead and read that bit of the tree - we'll need it. */
-		status = ocfs_read_bh(osb, FileEntry->extents[i].disk_off,
-				      &extent_bh, OCFS_BH_CACHED, inode);
+		status = ocfs_read_bh(osb,
+				      fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits,
+				      &extent_bh, OCFS_BH_CACHED,
+				      inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto finally;
 		}
 		/* Figure out, do we want to kill this whole tree? */
-		if (FileEntry->extents[i].file_off >= alloc_size) {
+		if (fel->l_recs[i].e_cpos >= fe->i_clusters) {
 			LOG_TRACE_ARGS("Found an entire tree to delete!\n");
 			
-			status = ocfs_kill_this_tree(osb, extent_bh, handle, inode);
+			status = ocfs_kill_this_tree(osb, extent_bh,
+						     handle, inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
 				goto finally;
 			}
-			/* Ok, update the FileEntry */
-			FileEntry->extents[i].file_off = 0;
-			FileEntry->extents[i].disk_off = 0;
-			FileEntry->extents[i].num_bytes = 0;
-			FileEntry->next_free_ext = i;
+			/* Ok, update the fe */
+			fel->l_recs[i].e_cpos = 0;
+			fel->l_recs[i].e_blkno = 0;
+			fel->l_recs[i].e_clusters = 0;
+			fel->l_next_free_rec = i;
 		} else { /* Ok, we only want part of it. */
 			LOG_TRACE_ARGS("Splitting this tree!\n");
 			status = ocfs_split_this_tree(osb, extent_bh, 
-						      handle, FileEntry, 
+						      handle, fe, 
 						      inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
@@ -2345,15 +2532,19 @@
 			}
 
 			/* Ok, update the FileEntry */
-			LOG_TRACE_ARGS("Alright. num_bytes = (%llu), alloc_size = (%llu) file_off = (%llu)\n", FileEntry->extents[i].num_bytes, alloc_size, FileEntry->extents[i].file_off);
-			FileEntry->extents[i].num_bytes = alloc_size;
+			LOG_TRACE_ARGS("Alright. e_clusters = (%u), i_clusters = (%u) e_cpos = (%u)\n",
+				       fel->l_recs[i].e_clusters,
+				       fe->i_clusters,
+				       fel->l_recs[i].e_cpos);
+			fel->l_recs[i].e_clusters = fe->i_clusters;
 			for (j=0; j < i; j++) 
-				FileEntry->extents[i].num_bytes += FileEntry->extents[j].num_bytes;
+				fel->l_recs[i].e_clusters +=
+					fel->l_recs[j].e_clusters;
 
-			FileEntry->next_free_ext = i + 1;
+			fel->l_next_free_rec = i + 1;
 			/* We're done - we can't split more than one
 			 * parts of the tree. */
-			updated_lep = 1;
+			updated_leb = 1;
 			break;
 		}
 		brelse(extent_bh);
@@ -2361,15 +2552,14 @@
 	}
 
 	/* Ok, trunc to zero is a special case, doofus */
-	if (alloc_size == 0) {
-		FileEntry->last_ext_ptr = 0;
-		FileEntry->granularity = -1;
-		FileEntry->local_ext = 1;
-		updated_lep = 1;
+	if (fe->i_clusters == 0) {
+		fe->i_last_eb_blk = 0;
+		fel->l_tree_depth = -1;
+		updated_leb = 1;
 	}
 
-	if (!updated_lep) {
-		status = ocfs_update_last_ext_ptr(osb, FileEntry, inode);
+	if (!updated_leb) {
+		status = ocfs_update_last_eb_blk(osb, fe, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto finally;
@@ -2395,23 +2585,24 @@
  * decoded and updated in the extent map.
  *
  */
-int ocfs_lookup_file_allocation (ocfs_super * osb, __s64 Vbo, __s64 * Lbo, 
-				 __u32 sectors, u32 *sector_count, 
-				 struct inode *inode, int locked)
+int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
+				__s64 *Lbo, __u32 blocks,
+				__s64 *contig_bytes,
+				struct inode *inode, int locked)
 {
 	int status = -EFAIL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	struct buffer_head *fe_bh = NULL;
-	ocfs_extent_group *OcfsExtent = NULL;
-	struct buffer_head *ext_bh = NULL;
-	__u64 next_data_ext;
+	ocfs2_extent_block *eb = NULL;
+	struct buffer_head *eb_bh = NULL;
+	u64 next_leaf;
 	__u64 remainingLength = 0;
 	__s64 localVbo;
 	__u64 cnt;
 	__u32 NumIndex;
 	int have_io_sem = 0;
 
-	LOG_ENTRY_ARGS("(vbo=%llu, sectors=%u, inode=%llu)\n", Vbo, sectors, 
+	LOG_ENTRY_ARGS("(vbo=%llu, blocks=%u, inode=%llu)\n", Vbo, blocks, 
 		       GET_INODE_FEOFF(inode));
 
 	OCFS_ASSERT (osb);
@@ -2443,10 +2634,11 @@
 	if (!locked)
 		up(&(OCFS_I(inode)->priv_sem));
 
-	if (status && cnt >= sectors) {
+	if (status &&
+	    (cnt >= (u64)blocks << osb->sb->s_blocksize_bits)) {
 		/* Found a what we were looking for. */
 		status = 0;
-		goto finally;
+		goto success;
 	}
 
 	/* Ok, we didn't find it in the extent map (or we need to
@@ -2470,11 +2662,13 @@
 	if (!locked)
 		have_io_sem = 1;
 
-	remainingLength = sectors;
+	remainingLength = blocks << osb->sb->s_blocksize_bits;
 	localVbo = Vbo;
 
-	/*  We are looking for a Vbo, but it is not in the Map or not Valid. */
-	/*  Thus we have to go to the disk, and update the Map */
+	/*
+	 * We are looking for a Vbo, but it is not in the Map or not
+	 * Valid.  Thus we have to go to the disk, and update the Map
+	 */
 
 	/* Read the file Entry corresponding to this */
 	status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh, 
@@ -2491,19 +2685,20 @@
 		goto finally;
 	}
 
-	if (Vbo >= (__s64) fe->alloc_size) {
-		LOG_ERROR_ARGS ("vbo=%llu, fe->alloc_sz=%llu alloc_size=%llu", 
-				Vbo, fe->alloc_size,
+	if (Vbo >= (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) {
+		LOG_ERROR_ARGS ("vbo=%llu, fe->i_clusters=%u alloc_size=%llu", 
+				Vbo, fe->i_clusters,
 				OCFS_I(inode)->alloc_size);
 		status = -EFAIL;
 		goto finally;
 	}
 
-	if (fe->local_ext) {
+	if (fe->id2.i_list.l_tree_depth < 0) {
 		if (!locked)
 			down(&(OCFS_I(inode)->priv_sem));
-		status = ocfs_update_extent_map (osb, &OCFS_I(inode)->map, fe,
-						 NULL, NULL, LOCAL_EXT);
+		status = ocfs_update_extent_map(osb,
+						&OCFS_I(inode)->map, fe,
+						NULL, NULL, LOCAL_EXT);
 		if (!locked)
 			up(&(OCFS_I(inode)->priv_sem));
 		if (status < 0) {
@@ -2514,23 +2709,22 @@
 		/* Extents are branched and we are no longer using
 		 * Local Extents for this File Entry. */
 
-		status = ocfs_get_leaf_extent(osb, fe, localVbo, &ext_bh, 
-					      inode);
+		status = ocfs_get_leaf_extent(osb, fe, localVbo,
+					      &eb_bh, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
-		OcfsExtent = OCFS_BH_GET_DATA_READ(ext_bh);
+		eb = OCFS_BH_GET_DATA_READ(eb_bh);
 		while (1) {
 			if (!locked)
 				down(&(OCFS_I(inode)->priv_sem));
-			status = ocfs_update_extent_map (osb, 
-							 &OCFS_I(inode)->map, 
-							 OcfsExtent,
-							 &localVbo, 
-							 &remainingLength, 
-							 NONLOCAL_EXT);
+			status = ocfs_update_extent_map(osb,
+							&OCFS_I(inode)->map,
+							eb, &localVbo,
+							&remainingLength,
+							NONLOCAL_EXT);
 			if (!locked)
 				up(&(OCFS_I(inode)->priv_sem));
 			if (status < 0) {
@@ -2539,34 +2733,36 @@
 			}
 
 			if (remainingLength > 0) {
-				if (!OcfsExtent->next_data_ext) {
+				if (!eb->h_next_leaf_blk) {
 					LOG_ERROR_ARGS ("localVbo=%llu, "
 						"alloc_size=%llu, "
 						" thisext=%llu",
 						localVbo, 
 						OCFS_I(inode)->alloc_size,
-						OcfsExtent->this_ext);
+						eb->h_blkno);
 					status = -EFAIL;
 					goto finally;
 				}
 
-				next_data_ext = OcfsExtent->next_data_ext;
+				next_leaf = eb->h_next_leaf_blk;
 
-				OCFS_BH_PUT_DATA(ext_bh);
-				brelse(ext_bh);
-				ext_bh = NULL;
-				OcfsExtent = NULL;
+				OCFS_BH_PUT_DATA(eb_bh);
+				brelse(eb_bh);
+				eb_bh = NULL;
+				eb = NULL;
 
-				status = ocfs_read_bh(osb, next_data_ext, 
-						      &ext_bh, 
+				status = ocfs_read_bh(osb,
+						      next_leaf << osb->sb->s_blocksize_bits, 
+						      &eb_bh, 
 						      OCFS_BH_COND_CACHED, 
 						      inode);
 				if (status < 0) {
 					LOG_ERROR_STATUS(status);
 					goto finally;
 				}
-				OcfsExtent = OCFS_BH_GET_DATA_READ(ext_bh);
-				if (!IS_VALID_EXTENT_DATA(OcfsExtent)) {
+				eb = OCFS_BH_GET_DATA_READ(eb_bh);
+				if (!IS_VALID_EXTENT_BLOCK(eb) ||
+				    (eb->h_list.l_tree_depth != -1)) {
 					LOG_ERROR_STATUS (status = -EINVAL);
 					goto finally;
 				}
@@ -2578,18 +2774,24 @@
 
 	if (!locked)
 		down(&(OCFS_I(inode)->priv_sem));
-	if (ocfs_lookup_extent_map_entry (osb, &(OCFS_I(inode)->map), Vbo, Lbo,
-					  &cnt, &NumIndex) && cnt >= sectors) {
+	if (ocfs_lookup_extent_map_entry(osb, &(OCFS_I(inode)->map),
+					 Vbo, Lbo, &cnt, &NumIndex) &&
+	    (cnt >= (u64)blocks << osb->sb->s_blocksize_bits)) {
 		status = 0;
-	} else
+	} else {
 		status = -EFAIL;
+		printk("failed to lookup extmap... vbo=%llu mapcount=%d\n", Vbo, OCFS_I(inode)->map.count);
+	}
 	if (!locked)
 		up(&(OCFS_I(inode)->priv_sem));
 
+success:
 	/* want to return cnt only if asked for it */
-	if (sector_count)
-		*sector_count = (u32) cnt;
+	if (contig_bytes)
+		*contig_bytes = cnt;
 
+	LOG_TRACE_ARGS("returning contig_bytes=%lld, lbo=%lld\n", cnt, *Lbo);
+
 finally:
 	if (have_io_sem)
 		up_read(&OCFS_I(inode)->ip_io_sem);
@@ -2600,10 +2802,10 @@
 		brelse(fe_bh);
 	}
 
-	if (ext_bh) {
-		if (OcfsExtent)
-			OCFS_BH_PUT_DATA(ext_bh);
-		brelse(ext_bh);
+	if (eb_bh) {
+		if (eb)
+			OCFS_BH_PUT_DATA(eb_bh);
+		brelse(eb_bh);
 	}
 	LOG_EXIT_STATUS (status);
 	return (status);
@@ -2612,21 +2814,25 @@
 /* ocfs_get_leaf_extent()
  * '*data_exent_bh' should be NULL.
  */
-int ocfs_get_leaf_extent (ocfs_super * osb, ocfs_file_entry * FileEntry, __s64 Vbo, struct buffer_head **data_extent_bh, struct inode *inode)
+int ocfs_get_leaf_extent(ocfs_super *osb, ocfs2_dinode *fe,
+			 __s64 Vbo, struct buffer_head **data_extent_bh,
+			 struct inode *inode)
 {
 	int status = 0, tempstat;
 	__u32 i, j;
-	ocfs_extent_group *ExtentHeader = NULL;
 	struct buffer_head *ext_bh = NULL;
-	ocfs_extent_group *tmp = NULL;
+	ocfs2_extent_block *eb = NULL;
+	ocfs2_extent_list *el, *fel;
+	ocfs2_extent_block *tmp = NULL;
 	__u64 childDiskOffset = 0;
 
 	LOG_ENTRY ();
 
-	for (i = 0; i < FileEntry->next_free_ext; i++) {
-		if ((__s64) (FileEntry->extents[i].file_off +
-			   FileEntry->extents[i].num_bytes) > Vbo) {
-			childDiskOffset = FileEntry->extents[i].disk_off;
+	fel = &fe->id2.i_list;
+
+	for (i = 0; i < fel->l_next_free_rec; i++) {
+		if ((__s64)((u64)(fel->l_recs[i].e_cpos + fel->l_recs[i].e_clusters) << osb->s_clustersize_bits) > Vbo) {
+			childDiskOffset = fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits;
 			break;
 		}
 	}
@@ -2636,25 +2842,26 @@
 		goto finally;
 	}
 
-	for (i = 0; i < FileEntry->granularity; i++) {
-		tempstat = ocfs_read_bh(osb, childDiskOffset, &ext_bh, OCFS_BH_COND_CACHED, inode);
+	for (i = 0; i < fel->l_tree_depth; i++) {
+		tempstat = ocfs_read_bh(osb, childDiskOffset, &ext_bh,
+					OCFS_BH_COND_CACHED, inode);
 		if (tempstat < 0) {
 			LOG_ERROR_STATUS (status = tempstat);
 			goto finally;
 		}
 
-		ExtentHeader = OCFS_BH_GET_DATA_READ(ext_bh);
-		if (!IS_VALID_EXTENT_HEADER(ExtentHeader)) {
+		eb = OCFS_BH_GET_DATA_READ(ext_bh);
+		if (!IS_VALID_EXTENT_BLOCK(eb)) {
 			LOG_ERROR_STATUS (status = -EINVAL);
 			goto finally;
 		}
+		el = &eb->h_list;
 
-		for (j = 0; j < ExtentHeader->next_free_ext; j++) {
-			if ((__s64) (ExtentHeader->extents[j].file_off +
-				   ExtentHeader->extents[j].num_bytes) > Vbo)
+		for (j = 0; j < el->l_next_free_rec; j++) {
+			if ((__s64)((u64)(el->l_recs[j].e_cpos + el->l_recs[j].e_clusters) << osb->s_clustersize_bits) > Vbo)
 			{
 				childDiskOffset =
-				    ExtentHeader->extents[j].disk_off;
+				    el->l_recs[j].e_blkno << osb->sb->s_blocksize_bits;
 				break;
 			}
 		}
@@ -2662,7 +2869,7 @@
 		OCFS_BH_PUT_DATA(ext_bh);
 		brelse(ext_bh);
 		ext_bh = NULL;
-		ExtentHeader = NULL;
+		eb = NULL;
 	}
 
 	if (*data_extent_bh) {
@@ -2678,7 +2885,8 @@
 	}
 
 	tmp = OCFS_BH_GET_DATA_READ(*data_extent_bh);
-	if (!IS_VALID_EXTENT_DATA(tmp)) {
+	if (!IS_VALID_EXTENT_BLOCK(tmp) ||
+	    (tmp->h_list.l_tree_depth != -1)) {
 		LOG_ERROR_STATUS (status = -EINVAL);
 		OCFS_BH_PUT_DATA(*data_extent_bh);
 		brelse(*data_extent_bh);
@@ -2689,7 +2897,7 @@
 	OCFS_BH_PUT_DATA(*data_extent_bh);
 finally:
 	if (ext_bh) {
-		if (ExtentHeader)
+		if (eb)
 			OCFS_BH_PUT_DATA(ext_bh);
 		brelse(ext_bh);
 	}
@@ -2714,7 +2922,14 @@
  * Pass in 'lock_bh' and bitmap_inode only if you've already taken the 
  * vol_alloc semaphore, and you've done the acquire_lock on the bitmap.
  */
-int ocfs_find_contiguous_space_from_bitmap (ocfs_super * osb, ocfs_journal_handle *handle, __u64 file_size, __u64 * cluster_off, __u64 * cluster_count, int sysfile, struct buffer_head *lock_bh, struct inode *bitmap_inode)
+int ocfs_find_contiguous_space_from_bitmap(ocfs_super *osb,
+					   ocfs_journal_handle *handle,
+					   __u64 file_size,
+					   __u64 *cluster_off,
+					   __u64 *cluster_count,
+					   int sysfile,
+					   struct buffer_head *lock_bh,
+					   struct inode *bitmap_inode)
 {
 	int status = 0, startbh, numblocks;
 	__u32 bitoffset = 0, ClusterCount = 0;
@@ -2723,11 +2938,11 @@
 	static __u32 LargeAllocOffset = 0;
 	static __u32 SmallAllocOffset = 0;
 	struct buffer_head *bh = NULL;
-	ocfs_file_entry *bm_lock = NULL;
+	ocfs2_dinode *bm_lock = NULL;
 	__u32 bitmapblocks; /* we only care about the valid blocks */
 	int local_lock = 0;
 	int local_inode = 0;
-	__u32 five_percent;
+	__u32 five_percent, free_bits;
 
 	LOG_ENTRY ();
 
@@ -2737,7 +2952,7 @@
 		BUG();
 
 	if (!bitmap_inode) {
-		bitmap_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
+		bitmap_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
 		if (!bitmap_inode) {
 			status = -EINVAL;
 			LOG_ERROR_STR("Could not get bitmap inode!");
@@ -2754,8 +2969,8 @@
 		ocfs_handle_add_inode(handle, bitmap_inode);
 
 		/* Get the allocation lock here */
-		status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK, 0, 
-					    &bh, bitmap_inode);
+		status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+					   0, &bh, bitmap_inode);
 		if (status < 0) {
 			if (status != -EINTR)
 				LOG_ERROR_STATUS (status);
@@ -2773,24 +2988,27 @@
 
 	bm_lock = OCFS_BH_GET_DATA_WRITE(bh);
 
-	ClusterCount = (__u32) ((__u64) (file_size + (osb->vol_layout.cluster_size-1)) >> 
-				osb->cluster_size_bits);
+	ClusterCount = (__u32) ((__u64) (file_size + (osb->s_clustersize-1)) >> 
+				osb->s_clustersize_bits);
 	if (ClusterCount == 0) {
 		LOG_ERROR_STR ("DISK_FULL?: ClusterCount==0");
 		status = 0;
 		goto leave;
 	}
-	ByteCount = ClusterCount << osb->cluster_size_bits;
+	ByteCount = (u64)ClusterCount << osb->s_clustersize_bits;
 	if (ByteCount == 0) {
 		LOG_ERROR_STR ("DISK_FULL?: Bytecount==0");
 		status = 0;
 		goto leave;
 	}
 
-	if (sysfile ? (ClusterCount > osb->vol_layout.num_clusters) :
-	    (ClusterCount > (osb->vol_layout.num_clusters - 
-			     ((8 * ONE_MEGA_BYTE) >> osb->cluster_size_bits)))){
-		LOG_ERROR_STR ("Disk Full");
+	free_bits = le32_to_cpu(bm_lock->id1.bitmap1.i_total) - 
+		le32_to_cpu(bm_lock->id1.bitmap1.i_used);
+	if ( (sysfile && ClusterCount > free_bits) || 
+	     (!sysfile && ClusterCount > 
+	         (free_bits - ((8 * ONE_MEGA_BYTE) >> osb->s_clustersize_bits))) ){
+		LOG_ERROR_ARGS("Disk Full: ClusterCount=%u, free_bits=%u, sysfile=%s\n",
+			       ClusterCount, free_bits, sysfile?"yes":"no");
 		status = -ENOSPC;
 		goto leave;
 	}
@@ -2804,19 +3022,19 @@
 	/* allocation at 5%, so that small files stay in the beginning
 	 * as much as possible */
 
-	// ok because num_clusters should not even be a __u64
-	five_percent = ((__u32)osb->vol_layout.num_clusters) / 20;
+	five_percent = le32_to_cpu(bm_lock->id1.bitmap1.i_total) / 20;
 	if (ClusterCount > five_percent) {
 		LargeAlloc = 1;
 		LargeAllocOffset = five_percent;
 	}
 
-	bitmapblocks = (OCFS_ALIGN(osb->cluster_bitmap.validbits, 
-				   OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+	bitmapblocks =
+		ocfs_blocks_for_bits(osb->sb,
+ 				     osb->cluster_bitmap.validbits);
 	
 	/* Ok, somewhat lame, but we submit the whole bitmap for reading here*/
-	if (ocfs_read_bhs(osb, osb->vol_layout.bitmap_off, 
-			  bitmapblocks * osb->sect_size, 
+	if (ocfs_read_bhs(osb, osb->bitmap_blkno << osb->sb->s_blocksize_bits, 
+			  bitmapblocks << osb->sb->s_blocksize_bits,
 			  osb->cluster_bitmap.chunk, 0, bitmap_inode)) {
 		LOG_ERROR_STATUS(-EIO);
 		goto leave;
@@ -2827,7 +3045,7 @@
 					  LargeAlloc ? LargeAllocOffset :
 					  SmallAllocOffset, sysfile ? 0 :
 					  ((8 * ONE_MEGA_BYTE) >>
-					   osb->cluster_size_bits));
+					   osb->s_clustersize_bits));
 
 	/* if fails we should try again from the beginning of the disk. */
 	/* in the end we pass # of bits we want to keep for system
@@ -2841,8 +3059,8 @@
 		bitoffset = ocfs_find_clear_bits (osb, &osb->cluster_bitmap,
 						  ClusterCount, 0,
 						  sysfile ? 0 :
-						  ((8 * ONE_MEGA_BYTE) /
-						   osb->vol_layout.cluster_size));
+						  ((8 * ONE_MEGA_BYTE) >>
+						   osb->s_clustersize_bits));
 	}
 
 	/* It returns -1 on failure, otherwise bitoffset points at the */
@@ -2857,20 +3075,24 @@
 		goto leave;
 	}
 
-	LOG_TRACE_ARGS ("setting at bit offset=%u\n", bitoffset);
+	LOG_TRACE_ARGS ("setting %u bits at bit offset=%u\n", ClusterCount, bitoffset);
 
-	ocfs_set_bits (handle, &osb->cluster_bitmap, bitoffset, ClusterCount);
+	ocfs_set_bits(osb->sb, handle, &osb->cluster_bitmap, bitoffset,
+		      ClusterCount);
 
 	/* Ok, write out the bitmap now. We optimize only by writing
 	 * out the bitmap blocks which have changed, and not all of
 	 * them like before. */
-	startbh = OCFS_GLOBAL_OFF_TO_CHUNK(bitoffset);
-	numblocks = OCFS_GLOBAL_OFF_TO_CHUNK(bitoffset + ClusterCount - 1) - startbh + 1;
+	numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+						bitoffset,
+						ClusterCount,
+						&startbh);
 
 	LOG_TRACE_ARGS("bitoffset = %u, ClusterCount = %u, startbh = %u, numblocks = %u\n", bitoffset, ClusterCount, startbh, numblocks);
 
 	/* write the bitmap size info to the lock sector */
-	bm_lock->u.bitinfo.used_bits = ocfs_count_bits(&osb->cluster_bitmap);
+	bm_lock->id1.bitmap1.i_used =
+		ocfs_count_bits(osb->sb, &osb->cluster_bitmap);
 	OCFS_BH_PUT_DATA(bh);
 	bm_lock = NULL;
 
@@ -2903,12 +3125,14 @@
  *
  * You need to be holding node_alloc_sem!
  */
-int ocfs_alloc_node_block (ocfs_super * osb, __u64 FileSize, __u64 * DiskOffset, __u64 * file_off, __u32 NodeNum, __u32 Type, ocfs_journal_handle *handle)
+int ocfs_alloc_node_block(ocfs_super *osb, __u64 bytes_wanted,
+			  __u64 *DiskOffset, __u64 *file_off,
+			  __u32 NodeNum, __u32 Type,
+			  ocfs_journal_handle *handle)
 {
 	int status = 0;
 	int startbh, numblocks;
 	__u64 fileSize = 0;
-	__u64 offset = 0;
 	__u64 numBytes = 0;
 	__u64 allocSize = 0;
 	__u64 prevFileSize = 0;
@@ -2919,31 +3143,32 @@
 	__u32 numBits = 0;
 	__u32 foundBit = -1;
 	__u32 blockSize = 0, blockSizeBits = 0;
-	__u32 bm_file = 0;
-	__u32 alloc_file = 0;
+	int bm_file = 0;
+	int alloc_file = 0;
 	struct buffer_head *bh = NULL;
-	ocfs_file_entry *fe = NULL;
+	struct buffer_head *alloc_bh = NULL;
+	ocfs2_dinode *fe = NULL;
+	ocfs2_dinode *alloc_fe = NULL;
 	int needs_uninit = 0;
 	int delay_lockrel = 0;
 	struct inode *inode = NULL; /* alloc bitmap file inode */
+	struct inode *alloc_inode = NULL; /* alloc file inode */
 
-	LOG_ENTRY_ARGS("(FileSize = (%llu), Type=%d)\n", FileSize,Type);
+	LOG_ENTRY_ARGS("(bytes_wanted = (%llu), Type=%d)\n", bytes_wanted,Type);
 
 	switch (Type) {
 		case DISK_ALLOC_EXTENT_NODE:
-			bm_file = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
-			alloc_file = OCFS_FILE_FILE_ALLOC + NodeNum;
-			blockSize = (__u32) osb->vol_layout.file_node_size;
-			blockSizeBits = osb->file_alloc_bits;
+			bm_file = EXTENT_ALLOC_BITMAP_SYSTEM_INODE;
+			alloc_file = EXTENT_ALLOC_SYSTEM_INODE;
+			blockSize = osb->sb->s_blocksize;
+			blockSizeBits = osb->sb->s_blocksize_bits;
 			atomic_inc(&osb->alloc_stats.ext_allocs);
-			inode = igrab(osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE]);
 			break;
 		case DISK_ALLOC_INODE:
-			bm_file = OCFS_INODE_BITMAP + NodeNum;
-			alloc_file = OCFS_INODE_FILE + NodeNum;
-			blockSize = osb->inode_size;
-			blockSizeBits = osb->inode_alloc_bits;
-			inode = igrab(osb->system_inodes[INODE_ALLOC_BITMAP_SYSTEM_INODE]);
+			bm_file = INODE_ALLOC_BITMAP_SYSTEM_INODE;
+			alloc_file = INODE_ALLOC_SYSTEM_INODE;
+			blockSize = osb->sb->s_blocksize;
+			blockSizeBits = osb->sb->s_blocksize_bits;
 			break;
 		default:
 			status = -EINVAL;
@@ -2951,11 +3176,16 @@
 			goto leave;
 	}
 
+	inode = ocfs_get_system_file_inode(osb, bm_file, NodeNum);
 	if (!inode) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
+		LOG_ERROR_STATUS(status=-EINVAL);
 		goto leave;
 	}
+	alloc_inode = ocfs_get_system_file_inode(osb, alloc_file, NodeNum);
+	if (!alloc_inode) {
+		LOG_ERROR_STATUS(status=-EINVAL);
+		goto leave;
+	}
 
 	/* Allocate a block of size blocksize from the relevant file/bitmap */
 	OCFS_ASSERT (blockSize);
@@ -2968,38 +3198,35 @@
 	}
 
 	ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK, 
-			      FLAG_FILE_CREATE, 
-			      bh, inode, 1);
+			     FLAG_FILE_CREATE, 
+			     bh, inode, 1);
 
-	numBits = ((FileSize + (blockSize-1)) >> blockSizeBits);
-	numBytes = numBits << blockSizeBits;
+	numBits = ((bytes_wanted + (blockSize-1)) >> blockSizeBits);
+	numBytes = (u64)numBits << blockSizeBits;
 
-	/* Read in the bitmap file for the dir alloc and look for the
+	/* Read in the bitmap file for the alloc and look for the
 	 * required space, if found */
 	fe = OCFS_BH_GET_DATA_READ(bh);
-	fileSize = fe->file_size;
-	allocSize = fe->alloc_size;
+	prevFileSize = fileSize = fe->i_size;
+	allocSize = (u64)fe->i_clusters << osb->s_clustersize_bits;
 	OCFS_BH_PUT_DATA(bh);
-	
-	prevFileSize = fileSize;
 
+
+
 	if ((fileSize != 0) && (allocSize != 0)) {
-		/* Round this off to dirnodesize */
-		ocfs_initialize_bitmap (&bitmap, (__u32) fileSize * 8, (__u32) allocSize * 8);
+		ocfs_initialize_bitmap(osb->sb, &bitmap,
+				       (__u32)fileSize * 8,
+				       (__u32)allocSize * 8);
 		needs_uninit = 1;
 
-		status = ocfs_read_system_file (osb, bm_file, bitmap.chunk, 
-						allocSize, offset);
+		status = ocfs_read_system_file(osb, bm_file, NodeNum,
+					       bitmap.chunk, allocSize);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 
-		/* Find the requisite number of bits... */
-
-		/* This function will check for clear bits in the Bitmap for */
-		/* consective clear bits equal to ClusterCount */
-		foundBit = ocfs_find_clear_bits (osb, &bitmap, numBits, 0, 0);
+		foundBit = ocfs_find_clear_bits(osb, &bitmap, numBits, 0, 0);
 	}
 
 	/* It returns -1 on failure , otherwise ByteOffset points at the */
@@ -3008,24 +3235,45 @@
 
 	if (foundBit == -1) {
 		/* if not found add more allocation to the file and try again. */
+		//extent = ONE_MEGA_BYTE;
 		extent = ( ((numBits * blockSize) + (ONE_MEGA_BYTE-1)) >> 20 ) << 20;
-		
-		status = ocfs_get_system_file_size (osb, alloc_file,
-						    &newFileSize, &allocSize);
+
+#warning maybe take this out and put a verifyupdateinode in here
+		status = ocfs_read_bh(osb, GET_INODE_FEOFF(alloc_inode), 
+			&alloc_bh, OCFS_BH_CACHED, alloc_inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		
+
+		alloc_fe = OCFS_BH_GET_DATA_READ(alloc_bh);
+		newFileSize = alloc_fe->i_size;
+		allocSize = (u64)alloc_fe->i_clusters << osb->s_clustersize_bits;
+		if (newFileSize != alloc_inode->i_size ||
+		    allocSize != OCFS_I(alloc_inode)->alloc_size) {
+			LOG_ERROR_ARGS("aha! alloc inode was out of date! "
+			       		"newFileSize=%llu, i_size=%llu, "
+			       		"allocSize=%llu, alloc_size=%llu\n",
+			       		newFileSize, alloc_inode->i_size,
+			       		allocSize, OCFS_I(alloc_inode)->alloc_size);
+		}
+		OCFS_BH_PUT_DATA(alloc_bh);
+
+		//newFileSize = alloc_inode->i_size;
+		//allocSize = OCFS_I(alloc_inode)->alloc_size;
+
 		/* This is for OUI optimzation to allocate more disk
 		 * space for directory allocations */
 		
 		if (allocSize > 0)
 			extent *= 2;
 		
-		status = ocfs_extend_system_file (osb, alloc_file,
-						  newFileSize + extent, NULL, 
-						  handle, 0);
+		LOG_TRACE_ARGS("extending the alloc file to %llu\n",
+		       newFileSize + extent);
+		status = ocfs_extend_file(osb, newFileSize + extent, 
+					  GET_INODE_FEOFF(alloc_inode),
+					  handle, alloc_inode, NULL, 1,
+					  NULL);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
@@ -3037,9 +3285,10 @@
 		/* Does this need the buffer_head? if so, we need to
 		 * do a put_data first! */
 		/* Calculate the new bitmap size */
-
-		status = ocfs_extend_system_file (osb, bm_file, bitMapSize, bh,
-						  handle, 1);
+		
+		LOG_TRACE_ARGS("extending the bitmap file to %llu\n", bitMapSize);
+		status = ocfs_extend_file (osb, bitMapSize, GET_INODE_FEOFF(inode), 
+					   handle, inode, NULL, 1, bh);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
@@ -3047,46 +3296,56 @@
 		fe = OCFS_BH_GET_DATA_READ(bh);
 		/* we wrote it back out in ocfs_extend_system_file so
 		 * we can trust the sizes here */
-		fileSize = fe->file_size;
-		allocSize = fe->alloc_size;
+		fileSize = fe->i_size;
+		allocSize = (u64)fe->i_clusters << osb->s_clustersize_bits;
+		LOG_TRACE_ARGS("fileSize=%llu, allocSize=%llu\n",
+		       fileSize, allocSize);
 		OCFS_BH_PUT_DATA(bh);
 
 		if (needs_uninit)
-			ocfs_reinitialize_bitmap(&bitmap, fileSize * 8, 
+			ocfs_reinitialize_bitmap(osb->sb, &bitmap,
+						 fileSize * 8, 
 						 allocSize * 8);
 		else
-			ocfs_initialize_bitmap(&bitmap, fileSize * 8, 
+			ocfs_initialize_bitmap(osb->sb, &bitmap,
+					       fileSize * 8, 
 					       allocSize * 8);
 		needs_uninit = 1;
 
-		status = ocfs_read_system_file (osb, bm_file, bitmap.chunk, 
-						allocSize, offset);
+		status = ocfs_read_system_file(osb, bm_file, NodeNum, 
+					       bitmap.chunk, allocSize);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 		
-		foundBit = prevFileSize * 8;
+		foundBit = ocfs_find_clear_bits(osb, &bitmap, numBits, 0, 0);
 
 		delay_lockrel = 1;
 		if (Type == DISK_ALLOC_EXTENT_NODE)
 			atomic_inc(&osb->alloc_stats.ext_extends);
 	}
 
-	LOG_TRACE_ARGS ("byte offset=%d\n", foundBit);
+	LOG_TRACE_ARGS ("bit offset=%d, num=%d\n", foundBit, numBits);
 
-	ocfs_set_bits (handle, &bitmap, foundBit, numBits);
+	ocfs_set_bits(osb->sb, handle, &bitmap, foundBit, numBits);
 
 	/* only write out what has changed... */
-	startbh = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit);
-	numblocks = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit + numBits - 1) - startbh + 1;
+	numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+						foundBit,
+						numBits,
+						&startbh);
 
 	LOG_TRACE_ARGS ("offset=%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
 			foundBit * blockSize, Type, blockSize, foundBit, alloc_file);
-	*DiskOffset = ocfs_file_to_disk_off (osb, (alloc_file),
-					(foundBit * blockSize));
-	if (*DiskOffset == 0) {
-		LOG_ERROR_STATUS(status = -EFAIL);
+
+	status = ocfs_lookup_file_allocation(osb, foundBit * blockSize, 
+					     DiskOffset, 1, NULL,
+					     alloc_inode, 1);
+	if (status < 0 || *DiskOffset == 0) {
+		if (!status)
+			status = -EINVAL;
+		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
 
@@ -3104,9 +3363,13 @@
 
 	if (inode)
 		iput(inode);
+	if (alloc_inode)
+		iput(alloc_inode);
 
 	if (bh != NULL)
 		brelse(bh);
+	if (alloc_bh != NULL)
+		brelse(alloc_bh);
 
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -3116,22 +3379,24 @@
  * ocfs_free_file_extents()
  *
  */
-int ocfs_free_file_extents (ocfs_super * osb, struct buffer_head *fe_bh,
-			    ocfs_journal_handle *handle, struct inode *inode)
+int ocfs_free_file_extents(ocfs_super *osb, struct buffer_head *fe_bh,
+			   ocfs_journal_handle *handle,
+			   struct inode *inode)
 {
 	int status = 0;
 	__u32 i;
 	__u32 numBitsAllocated = 0, bitmapOffset = 0;
-	ocfs_extent_group *extent = NULL;
+	ocfs2_extent_block *extent = NULL;
 	struct buffer_head *extent_bh = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_extent_list *fel;
+	ocfs2_dinode *fe = NULL;
 	__u64 offset;
 
 	LOG_ENTRY ();
 
 	offset = GET_INODE_FEOFF(inode);
 	fe = OCFS_BH_GET_DATA_READ(fe_bh);
-	if (offset != fe->this_sector)
+	if (offset != (fe->i_blkno << osb->sb->s_blocksize_bits))
 		BUG();
 	OCFS_BH_PUT_DATA(fe_bh);
 
@@ -3139,31 +3404,34 @@
 		SET_BH_SEQNUM(inode, fe_bh);
 
 	fe = OCFS_BH_GET_DATA_READ(fe_bh);
+	fel = &fe->id2.i_list;
 
-	if (fe->local_ext) {
-		for (i = 0; i < fe->next_free_ext; i++) {
-			numBitsAllocated = (__u32) (fe->extents[i].num_bytes >>
-						  osb->cluster_size_bits);
+	if (fel->l_tree_depth < 0) {
+		for (i = 0; i < fel->l_next_free_rec; i++) {
+			numBitsAllocated = fel->l_recs[i].e_clusters;
 
 			bitmapOffset =
-			    (__u32) ((fe->extents[i].disk_off -
-				    osb->vol_layout.data_start_off) >>
-				   osb->cluster_size_bits);
+			    (__u32)(((fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits)) >>
+				   osb->s_clustersize_bits);
 
 			ocfs_handle_add_commit_bits(handle, numBitsAllocated, 
 						    bitmapOffset, -1, 
 						    DISK_ALLOC_VOLUME);
 		}
 	} else {
-		for (i = 0; i < fe->next_free_ext; i++) {
-			status = ocfs_read_bh(osb, fe->extents[i].disk_off, 
-					      &extent_bh, OCFS_BH_COND_CACHED, inode);
+		for (i = 0; i < fel->l_next_free_rec; i++) {
+			status = ocfs_read_bh(osb,
+					      fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits, 
+					      &extent_bh,
+					      OCFS_BH_COND_CACHED,
+					      inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto leave;
 			}
 			extent = OCFS_BH_GET_DATA_READ(extent_bh);
-			if ((fe->granularity && (!IS_VALID_EXTENT_HEADER(extent))) && !IS_VALID_EXTENT_DATA(extent)) {
+			if (fel->l_tree_depth &&
+			    !IS_VALID_EXTENT_BLOCK(extent)) {
 				status = -EINVAL;
 				LOG_ERROR_STATUS(status);
 				goto leave;
@@ -3216,7 +3484,7 @@
 {
 	int numbits;
 
-	switch (osb->vol_layout.cluster_size) {
+	switch (osb->s_clustersize) {
 	case (4*1024):
 		numbits = 1024;
 		break;
@@ -3239,7 +3507,7 @@
 /*
  * ocfs_alloc_count_bits
  */
-static __u32 ocfs_alloc_count_bits(ocfs_local_alloc *alloc)
+static __u32 ocfs_alloc_count_bits(ocfs2_dinode *alloc)
 {
 	int i;
 	__u8 tmp;
@@ -3248,8 +3516,8 @@
 
 	LOG_ENTRY();
 
-	buffer = &(alloc->bitmap[0]);
-	for (i = 0; i < 256; i++) {
+	buffer = LOCAL_ALLOC(alloc)->la_bitmap;
+	for (i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++) {
 		memcpy(&tmp, buffer, 1);
 		count+= BITCOUNT(tmp);
 		buffer++;
@@ -3262,16 +3530,16 @@
 /*
  * ocfs_clear_local_alloc
  */
-static void ocfs_clear_local_alloc(ocfs_local_alloc *alloc) 
+static void ocfs_clear_local_alloc(ocfs2_dinode *alloc) 
 {
 	int i;
 	LOG_ENTRY();
 
-	alloc->alloc_size = 0;
-	alloc->num_used = 0;
-	alloc->bitmap_start = 0;
-	for(i = 0; i < 256; i++)
-		alloc->bitmap[i] = 0;
+	LOCAL_ALLOC(alloc)->la_bm_bits = 0;
+	LOCAL_ALLOC(alloc)->la_bits_set = 0;
+	LOCAL_ALLOC(alloc)->la_bm_off = 0;
+	for(i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++)
+		LOCAL_ALLOC(alloc)->la_bitmap[i] = 0;
 
 	LOG_EXIT();
 	return;
@@ -3287,7 +3555,7 @@
  */
 static int ocfs_sync_local_to_main(ocfs_super *osb, 
 				   ocfs_journal_handle *handle, 
-				   ocfs_local_alloc *alloc,
+				   ocfs2_dinode *alloc,
 				   struct inode *main_bm_inode)
 {
 	int status = 0;
@@ -3295,41 +3563,45 @@
 	void *bitmap;
 	unsigned int start, numblocks, bitmapblocks;
 
-	LOG_ENTRY_ARGS("alloc->alloc_size = %u, COUNT = %u, num_used = %u\n", 
-		       alloc->alloc_size, ocfs_alloc_count_bits(alloc), 
-		       alloc->num_used);
+	LOG_ENTRY_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n", 
+		       LOCAL_ALLOC(alloc)->la_bm_bits,
+		       ocfs_alloc_count_bits(alloc), 
+		       LOCAL_ALLOC(alloc)->la_bits_set);
 
-	if (alloc->alloc_size == 0) {
+	if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
 		LOG_TRACE_STR("nothing to sync!");
 		goto bail;
 	}
 
-	bitmapblocks = (OCFS_ALIGN(osb->cluster_bitmap.validbits, 
-				   OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+	bitmapblocks =
+		ocfs_blocks_for_bits(osb->sb,
+ 				     osb->cluster_bitmap.validbits);
+
 	/* figure out which block in the bitmap to start on and the
 	 * maximum number of blocks we can span over -- we don't need
 	 * to read any more as that's the most we'll be touching... */
-	start = OCFS_GLOBAL_OFF_TO_CHUNK(alloc->bitmap_start);
-	numblocks = 2 + OCFS_GLOBAL_OFF_TO_CHUNK(alloc->bitmap_start + alloc->alloc_size) - start;
+	numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+						LOCAL_ALLOC(alloc)->la_bm_off,
+						LOCAL_ALLOC(alloc)->la_bits_set,
+						&start);
 
-	if ((start + numblocks) > bitmapblocks)
-		numblocks--;
 	if ((start + numblocks) > bitmapblocks) {
 		printk("uhoh, bitmap calculation is bad!\n");
-		printk("alloc->alloc_size = %u, COUNT = %u, num_used = %u"
-		       "start=%u, bitmap_start = %u, numblocks=%u, "
+		printk("alloc->la_bm_bits = %u, COUNT = %u, alloc->la_bits_set = %u"
+		       "start=%u, alloc->la_bm_off = %u, numblocks=%u, "
 		       "bitmapblocks = %u\n",
-		       alloc->alloc_size, ocfs_alloc_count_bits(alloc), 
-		       alloc->num_used, start, alloc->bitmap_start, numblocks,
+		       LOCAL_ALLOC(alloc)->la_bm_bits, ocfs_alloc_count_bits(alloc), 
+		       LOCAL_ALLOC(alloc)->la_bits_set, start, LOCAL_ALLOC(alloc)->la_bm_off, numblocks,
 		       bitmapblocks);
 
 		BUG();
 	}
 
-	LOG_TRACE_ARGS("start=%u, bitmap_start = %u, numblocks=%u\n", start, 
-		       alloc->bitmap_start, numblocks);
-	status = ocfs_read_bhs(osb, osb->vol_layout.bitmap_off,
-			       numblocks * osb->sect_size,
+	LOG_TRACE_ARGS("start=%u, alloc->la_bm_off = %u, numblocks=%u\n", start, 
+		       LOCAL_ALLOC(alloc)->la_bm_off, numblocks);
+	status = ocfs_read_bhs(osb,
+			       (osb->bitmap_blkno + start) << osb->sb->s_blocksize_bits,
+			       numblocks << osb->sb->s_blocksize_bits,
 			       &osb->cluster_bitmap.chunk[start], 0, 
 			       main_bm_inode);
 	if (status < 0) {
@@ -3337,10 +3609,10 @@
 		goto bail;
 	}
 
-	bitmap = alloc->bitmap;
+	bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
 	/* any unset bits in local alloc need to be unset in bitmap. */
 	bit_off = 0;
-	left = alloc->alloc_size;
+	left = LOCAL_ALLOC(alloc)->la_bm_bits;
 	while ((bit_off = find_next_zero_bit(bitmap, left, bit_off)) 
 	       != -1) {
 		if (bit_off >= left) {
@@ -3349,9 +3621,10 @@
 		}
 
 		LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n", 
-			       bit_off + alloc->bitmap_start);
-		ocfs_clear_bits(handle, &osb->cluster_bitmap, 
-				bit_off + alloc->bitmap_start, 1);
+			       bit_off + LOCAL_ALLOC(alloc)->la_bm_off);
+		ocfs_clear_bits(osb->sb, handle, &osb->cluster_bitmap, 
+				bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
+				1);
 		bit_off++;
 	}
 
@@ -3366,13 +3639,13 @@
  * without a journal handle -- used during shutdown and recovery.
  */
 static int ocfs_sync_local_from_shutdown(ocfs_super *osb, 
-					   ocfs_bitmap_free_head **f, 
-					   struct buffer_head *local_alloc_bh, 
-					   int in_recovery)
+					 ocfs_bitmap_free_head **f, 
+					 struct buffer_head *local_alloc_bh, 
+					 int in_recovery)
 {
 	int status = 0;
 	int bit_off, left;
-	ocfs_local_alloc *alloc = NULL;
+	ocfs2_dinode *alloc = NULL;
 	void *bitmap;
 
 	LOG_ENTRY();
@@ -3381,7 +3654,7 @@
 		BUG();
 
 	alloc = OCFS_BH_GET_DATA_READ(local_alloc_bh);
-	if (alloc->alloc_size == 0) {
+	if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
 		OCFS_BH_PUT_DATA(local_alloc_bh);
 		LOG_TRACE_STR("nothing to sync!");
 		goto bail;
@@ -3398,15 +3671,16 @@
 
 	alloc = OCFS_BH_GET_DATA_READ(local_alloc_bh);
 
-	LOG_TRACE_ARGS("alloc->alloc_size = %u, COUNT = %u, num_used = %u\n", 
-		       alloc->alloc_size, ocfs_alloc_count_bits(alloc), 
-		       alloc->num_used);
+	LOG_TRACE_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n", 
+		       LOCAL_ALLOC(alloc)->la_bm_bits,
+		       ocfs_alloc_count_bits(alloc), 
+		       LOCAL_ALLOC(alloc)->la_bits_set);
 
-	bitmap = alloc->bitmap;
+	bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
 
 	/* any unset bits in local alloc need to be unset in bitmap. */
 	bit_off = 0;
-	left = alloc->alloc_size;
+	left = LOCAL_ALLOC(alloc)->la_bm_bits;
 	while ((bit_off = find_next_zero_bit(bitmap, left, bit_off)) 
 	       != -1) {
 		if (bit_off >= left) {
@@ -3416,7 +3690,7 @@
 	     /* LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n", bit_off);*/
 		status = ocfs_add_to_bitmap_free_head(osb, 
 						 *f, 1, 
-						 bit_off + alloc->bitmap_start,
+						 bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
 						 -1, DISK_ALLOC_VOLUME);
 		if (status < 0) {
 			ocfs_free_bitmap_free_head(*f);
@@ -3444,18 +3718,18 @@
 {
 	int status = 0;
 	__u64 alloc_bytes, cluster_off, cluster_count;
-	ocfs_local_alloc *alloc = NULL;
+	ocfs2_dinode *alloc = NULL;
 
 	LOG_ENTRY();
 
 	alloc = OCFS_BH_GET_DATA_READ(osb->local_alloc_bh);
-	if (alloc->alloc_size != 0)
+	if (LOCAL_ALLOC(alloc)->la_bm_bits != 0)
 		LOG_TRACE_STR("asking me to alloc a new window over a"
 			      " non-empty one");
 	OCFS_BH_PUT_DATA(osb->local_alloc_bh);
 
 	/* we try to use find_contig_space_from_bitmap here for now. */
-	alloc_bytes = ocfs_local_alloc_window_bits(osb) << osb->cluster_size_bits;
+	alloc_bytes = (u64)ocfs_local_alloc_window_bits(osb) << osb->s_clustersize_bits;
 	LOG_TRACE_ARGS("Allocating %llu bytes (%u clusters) for a "
 		       "new window.\n", alloc_bytes, 
 		       ocfs_local_alloc_window_bits(osb));
@@ -3473,18 +3747,21 @@
 
 	alloc = OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh);
 
-	alloc->bitmap_start = cluster_off;
-	alloc->alloc_size = cluster_count;
+	LOCAL_ALLOC(alloc)->la_bm_off = cluster_off;
+	LOCAL_ALLOC(alloc)->la_bm_bits = cluster_count;
 	/* just in case... In the future when we find space ourselves,
 	 * we don't have to get all contiguous -- but we'll have to
 	 * set all previously used bits in bitmap and update
-	 * num_used before setting the bits in the main bitmap. */
-	alloc->num_used = 0;
-	memset(alloc->bitmap, 0, 256);
+	 * la_bits_set before setting the bits in the main bitmap. */
+	LOCAL_ALLOC(alloc)->la_bits_set = 0;
+	memset(LOCAL_ALLOC(alloc)->la_bitmap, 0,
+	       LOCAL_ALLOC(alloc)->la_size);
 
 	LOG_TRACE_STR("New window allocated:");
-	LOG_TRACE_ARGS("window bitmap_start = %u\n", alloc->bitmap_start);
-	LOG_TRACE_ARGS("window alloc_size = %u\n", alloc->alloc_size);
+	LOG_TRACE_ARGS("window la_bm_off = %u\n",
+		       LOCAL_ALLOC(alloc)->la_bm_off);
+	LOG_TRACE_ARGS("window la_bm_bits = %u\n",
+		       LOCAL_ALLOC(alloc)->la_bm_bits);
 
 	OCFS_BH_PUT_DATA(osb->local_alloc_bh);
 
@@ -3496,7 +3773,8 @@
 /*
  * ocfs_local_find_clear_bits
  */
-static int ocfs_local_find_clear_bits(ocfs_super *osb, ocfs_local_alloc *alloc,
+static int ocfs_local_find_clear_bits(ocfs_super *osb,
+				      ocfs2_dinode *alloc,
 				      __u32 numbits)
 {
 	int numfound, bitoff, left, startoff, lastzero;
@@ -3504,11 +3782,11 @@
 
 	LOG_ENTRY_ARGS("(numbits wanted = %u)\n", numbits);
 
-	bitmap = alloc->bitmap;
+	bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
 
 	numfound = bitoff = startoff = 0;
 	lastzero = -1;
-	left = alloc->alloc_size;
+	left = LOCAL_ALLOC(alloc)->la_bm_bits;
 	while ((bitoff = find_next_zero_bit(bitmap, left, startoff)) != -1) {
 		if (bitoff == left) {
 			/* LOG_TRACE_ARGS("bitoff (%d) == left", bitoff); */
@@ -3554,7 +3832,7 @@
 				      __u64 * bitoff, __u64 * bitcount, 
 				      ocfs_journal_handle *handle)
 {
-	ocfs_local_alloc *alloc = NULL;
+	ocfs2_dinode *alloc = NULL;
 	int status = 0, tmpstat;
 	int startoff, tmpoff;
 	__u32 tmpwanted;
@@ -3584,7 +3862,7 @@
 	alloc = OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh);
 tryagain:
 	/* If we need to initialize a new window, do so now. */
-	if (alloc->alloc_size == 0) {
+	if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
 		OCFS_BH_PUT_DATA(osb->local_alloc_bh);
 		alloc = NULL;
 		LOG_TRACE_STR("Allocating a new window...");
@@ -3627,7 +3905,7 @@
 		alloc = NULL;
 
 		if (!main_bm_inode)
-			main_bm_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
+			main_bm_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
 
 		if (!main_bm_inode) {
 			status = -EINVAL;
@@ -3639,9 +3917,9 @@
 		ocfs_handle_add_inode(handle, main_bm_inode);
 
 		/* Get the allocation lock here */
-		status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK, 0, 
-					    &main_bm_bh, 
-					    main_bm_inode);
+		status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+					   0, &main_bm_bh,
+					   main_bm_inode);
 		if (status < 0) {
 			main_bm_bh = NULL;
 			if (status != -EINTR)
@@ -3676,16 +3954,16 @@
 
 	/* Ok, if we've got this far then the search suceeded and we
 	 * can mark the bitmap. */
-	bitmap = alloc->bitmap;
+	bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
 	tmpoff = startoff;
 	tmpwanted = bitswanted;
 	while(tmpwanted--) {
 		/* LOG_TRACE_ARGS("setting bit %d\n", tmpoff); */
 		set_bit(tmpoff++, bitmap);
 	}
-	alloc->num_used += bitswanted;
+	LOCAL_ALLOC(alloc)->la_bits_set += bitswanted;
 
-	*bitoff = alloc->bitmap_start + startoff;
+	*bitoff = LOCAL_ALLOC(alloc)->la_bm_off + startoff;
 	*bitcount = bitswanted;
 
 	OCFS_BH_PUT_DATA(osb->local_alloc_bh);
@@ -3746,10 +4024,14 @@
 	 * We *always* use global bitmap for clustersize > 128k,
 	 * file_size > 2mb, so force it under these conditions. */
 	if ( (handle && osb->have_local_alloc)
-	     && (osb->vol_layout.cluster_size <= OCFS_LOCAL_ALLOC_MAX_CSIZE)
+	     && (osb->s_clustersize <= OCFS_LOCAL_ALLOC_MAX_CSIZE)
 	     && (file_size <= OCFS_LOCAL_ALLOC_MAX_ALLOC) )
 		use_global = 0;
 
+	/* TODO: let's find a way to use local alloc for system files someday ;-) */
+	if (sysfile)
+		use_global = 1;
+
 	if (!use_global) {
 		if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC) {
 			printk("whoa, I already have local alloc sem!?!\n");
@@ -3800,17 +4082,20 @@
 int ocfs_load_local_alloc(ocfs_super *osb)
 {
 	int status = 0;
-	ocfs_local_alloc *alloc = NULL;
+	ocfs2_dinode *alloc = NULL;
 	struct buffer_head *alloc_bh = NULL;
-	__u64 offset;
 	__u32 num_used;
+	struct inode *inode = NULL;
 
 	LOG_ENTRY();
 
 	/* read the alloc off disk */
-	offset = ((OCFS_LOCAL_ALLOC_FILE + osb->node_num) * osb->sect_size) + 
-		osb->vol_layout.root_int_off;
-	status = ocfs_read_bh(osb, offset, &alloc_bh, 0, NULL);
+	inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->node_num);
+	if (!inode) {
+		LOG_ERROR_STATUS(status=-EINVAL);
+		goto bail;
+	}
+	status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &alloc_bh, 0, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto bail;
@@ -3821,9 +4106,10 @@
 	/* do a little verification. */
 	num_used = ocfs_alloc_count_bits(alloc);
 
-	if (num_used != alloc->num_used) {
-		LOG_TRACE_ARGS("num_used = %u, alloc->num_used = %u!\n", 
-			       num_used, alloc->num_used);
+	if (num_used != LOCAL_ALLOC(alloc)->la_bits_set) {
+		LOG_TRACE_ARGS("num_used = %u, alloc->la_bits_set = %u!\n", 
+			       num_used,
+			       LOCAL_ALLOC(alloc)->la_bits_set);
 		OCFS_BH_PUT_DATA(alloc_bh);
 		status = -EFAIL;
 		goto bail;
@@ -3837,6 +4123,8 @@
 	if (status < 0)
 		if (alloc_bh)
 			brelse(alloc_bh);
+	if (inode)
+		iput(inode);
 
 	LOG_EXIT_STATUS(status);
 	return(status);
@@ -3857,7 +4145,7 @@
 			       int in_recovery)
 {
 	int status;
-	ocfs_local_alloc *alloc = NULL;
+	ocfs2_dinode *alloc = NULL;
 	ocfs_bitmap_free_head *f = NULL;
 	struct buffer_head *bh = NULL;
 
@@ -3914,13 +4202,17 @@
 {
 	int status = 0;
 	struct buffer_head *alloc_bh = NULL;
-	__u64 offset;
+	struct inode *inode = NULL;
 
 	LOG_ENTRY_ARGS("(node_num = %d)\n", node_num);
 
-	offset = ((OCFS_LOCAL_ALLOC_FILE + node_num) * osb->sect_size) + 
-		osb->vol_layout.root_int_off;
-	status = ocfs_read_bh(osb, offset, &alloc_bh, 0, NULL);
+	inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, node_num);
+	if (!inode) {
+		LOG_ERROR_STATUS(status=-EINVAL);
+		goto bail;
+	}
+
+	status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &alloc_bh, 0, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto bail;
@@ -3932,6 +4224,9 @@
 	if (alloc_bh)
 		brelse(alloc_bh);
 
+	if (inode)
+		iput(inode);
+
 	LOG_EXIT_STATUS(status);
 	return(status);
 } /* ocfs_recover_local_alloc */

Added: trunk/src/alloc.h
===================================================================
--- trunk/src/alloc.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/alloc.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,79 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * alloc.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_ALLOC_H
+#define OCFS2_ALLOC_H
+
+int ocfs_add_to_bitmap_free_head(ocfs_super *osb,
+				 ocfs_bitmap_free_head *f,
+				 __u32 len, __u32 fileoff,
+				 __u32 nodenum, __u32 type);
+int ocfs_allocate_extent(ocfs_super *osb, struct buffer_head *fe_bh,
+			 ocfs_journal_handle *handle,
+			 u64 blkno, u32 new_clusters,
+			 struct inode *inode);
+int ocfs_alloc_node_block(ocfs_super *osb, __u64 FileSize,
+			  __u64 *DiskOffset, __u64 *file_off,
+			  __u32 NodeNum, __u32 Type,
+			  ocfs_journal_handle *handle);
+int ocfs_find_contiguous_space_from_bitmap(ocfs_super *osb,
+					   ocfs_journal_handle *handle,
+					   __u64 file_size,
+					   __u64 *cluster_off,
+					   __u64 *cluster_count,
+					   int sysfile,
+					   struct buffer_head *lock_bh,
+					   struct inode *bitmap_inode);
+int ocfs_find_space(ocfs_super *osb, __u64 file_size,
+		    __u64 *cluster_off, __u64 *cluster_count,
+		    int sysfile, ocfs_journal_handle *handle);
+int ocfs_free_extents_for_truncate(ocfs_super *osb,
+				   ocfs2_dinode *fe,
+				   ocfs_journal_handle *handle,
+				   struct inode *inode);
+int ocfs_free_file_extents(ocfs_super *osb, struct buffer_head *fe_bh,
+			   ocfs_journal_handle *handle,
+			   struct inode *inode);
+int ocfs_get_leaf_extent(ocfs_super *osb, ocfs2_dinode *fe,
+			 __s64 Vbo, struct buffer_head **data_extent_bh,
+			 struct inode *inode);
+int ocfs_load_local_alloc(ocfs_super *osb);
+int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
+				__s64 *Lbo, __u32 blocks,
+				__s64 *contig_bytes,
+				struct inode *inode,
+				int locked);
+int ocfs_process_bitmap_free_head(ocfs_super *osb,
+				  ocfs_bitmap_free_head *f);
+int ocfs_recover_local_alloc(ocfs_super *osb, int node_num);
+void ocfs_shutdown_local_alloc(ocfs_super *osb, 
+			       struct buffer_head **local_alloc_bh, 
+ 			       int sync,
+ 			       int in_recovery);
+
+#endif /* OCFS2_ALLOC_H */

Modified: trunk/src/bitmap.c
===================================================================
--- trunk/src/bitmap.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/bitmap.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * bitmap.c
  *
  * Bitmap infrastructure code
@@ -24,17 +26,23 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
-#if defined(__KERNEL__)
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#else
-#include  <asm/bitops.h>
-#include  <dummy.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "bitmap.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT  OCFS_DEBUG_CONTEXT_BITMAP
 
@@ -42,19 +50,19 @@
  * ocfs_initialize_bitmap()
  * 
  */
-void ocfs_initialize_bitmap (ocfs_alloc_bm * bitmap, __u32 validbits, __u32 allocbits)
+void ocfs_initialize_bitmap(struct super_block *sb,
+			    ocfs_alloc_bm *bitmap, __u32 validbits,
+			    __u32 allocbits)
 {
-	__u32 tmp;
+	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u)\n", sb, bitmap, validbits,
+			allocbits);
 
-	LOG_ENTRY_ARGS ("(0x%p, %u, %u)\n", bitmap, validbits, allocbits);
-
 	bitmap->validbits = validbits;
 	bitmap->allocbits = allocbits;
 	bitmap->failed = 0;
 	bitmap->ok_retries = 0;
 
-	tmp = OCFS_ALIGN(allocbits, OCFS_BITS_IN_CHUNK);
-	bitmap->numbh = (tmp / OCFS_BITS_IN_CHUNK);
+	bitmap->numbh = ocfs_blocks_for_bits(sb, bitmap->allocbits);
 	bitmap->chunk = ocfs_malloc(bitmap->numbh * sizeof(struct buffer_head *));
 	if (!bitmap->chunk)
 		BUG();
@@ -75,7 +83,7 @@
 		if (bitmap->chunk[i])
 			brelse(bitmap->chunk[i]);
 
-	ocfs_free(bitmap->chunk);
+	kfree(bitmap->chunk);
 	bitmap->chunk = NULL;
 	bitmap->numbh = 0;
 	bitmap->validbits = 0;
@@ -88,9 +96,10 @@
 }
 
 /* we only handle if the bitmap has grown, not shrunk. */
-void ocfs_reinitialize_bitmap(ocfs_alloc_bm *bitmap, __u32 validbits, __u32 allocbits) 
+void ocfs_reinitialize_bitmap(struct super_block *sb,
+			      ocfs_alloc_bm *bitmap, __u32 validbits,
+			      __u32 allocbits) 
 {
-	__u32 tmp;
 	__u32 newnumbh;
 	struct buffer_head **newchunk;
 
@@ -105,8 +114,7 @@
 	if (allocbits == bitmap->allocbits)
 		goto skipbhs;
 
-	tmp = OCFS_ALIGN(allocbits, OCFS_BITS_IN_CHUNK);
-	newnumbh = (tmp / OCFS_BITS_IN_CHUNK);
+	newnumbh = ocfs_blocks_for_bits(sb, allocbits);
 	newchunk = ocfs_malloc(bitmap->numbh * sizeof(struct buffer_head *));
 
 	if (!newchunk)
@@ -116,7 +124,7 @@
 
 	memcpy(newchunk, bitmap->chunk, (bitmap->numbh * sizeof(struct buffer_head *)));
 
-	ocfs_free(bitmap->chunk);
+	kfree(bitmap->chunk);
 
 	bitmap->numbh = newnumbh;
 	bitmap->chunk = newchunk;
@@ -161,9 +169,12 @@
  * in case we have a disk full.
  *
  */
-int ocfs_find_clear_bits (ocfs_super *osb, ocfs_alloc_bm * bitmap, __u32 numBits, __u32 offset, __u32 sysonly)
+int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm * bitmap,
+			 __u32 numBits, __u32 offset, __u32 sysonly)
 {
-	__u32 globalsize, globaloff, localstart, lastbh, size, bitoff, count;
+	__u32 globalsize, globaloff, localstart, lastbh;
+	__u32 size = OCFS_BITS_IN_CHUNK(osb->sb);
+	__u32 bitoff = 0, count = 0;
 	void *buffer;
 	int c;
 	struct buffer_head *currbh = NULL;
@@ -172,19 +183,14 @@
 			offset, sysonly);
 
 	globalsize = bitmap->validbits - sysonly;
-	bitoff = count = 0;
-	lastbh = (OCFS_ALIGN(globalsize, OCFS_BITS_IN_CHUNK) / 
-		  OCFS_BITS_IN_CHUNK) - 1;
+	lastbh = ocfs_blocks_for_bits(osb->sb, globalsize) - 1;
 
 	globaloff = offset;
-	localstart = OCFS_GLOBAL_OFF_TO_LOCAL(globaloff);
+	ocfs_bitmap_block_for_off(osb->sb, globaloff, &c, &localstart);
 
 	if (lastbh == 0)
 		size = globalsize;
-	else
-		size = OCFS_BITS_IN_CHUNK;
 
-	c = OCFS_GLOBAL_OFF_TO_CHUNK(localstart);
 	currbh = bitmap->chunk[c];
 
 	/*LOG_TRACE_ARGS("globalsize=%u, bitmap->validbits=%u, sysonly=%u\n", 
@@ -193,7 +199,8 @@
 	  "localstart=%u\n", c, lastbh, size, localstart);*/
 	buffer = OCFS_BH_GET_DATA_READ(currbh);
 
-	while ((bitoff = find_next_zero_bit(buffer, OCFS_BITS_IN_CHUNK, 
+	while ((bitoff = find_next_zero_bit(buffer,
+					    OCFS_BITS_IN_CHUNK(osb->sb),
 					    localstart)) != -1) {
 		/*LOG_TRACE_ARGS("c=%u, globaloff=%u, bitoff=%u, "
 			       "localstart=%u\n", c, globaloff, bitoff, 
@@ -223,8 +230,10 @@
 			currbh = bitmap->chunk[c];
 			buffer = OCFS_BH_GET_DATA_READ(currbh);
 			if (c == lastbh)
-				size = globalsize-(OCFS_BITS_IN_CHUNK*lastbh);
-			globaloff = c * OCFS_BITS_IN_CHUNK;
+				size = globalsize -
+					(OCFS_BITS_IN_CHUNK(osb->sb)
+					 * lastbh);
+			globaloff = c * OCFS_BITS_IN_CHUNK(osb->sb);
 			continue;
 		}
 
@@ -237,7 +246,9 @@
 			if (localstart >= size)
 				goto nextbh;
 
-			globaloff = OCFS_CHUNK_TO_GLOBAL_OFF(c, bitoff) + 1;
+			globaloff =
+				ocfs_bitmap_off_for_block(osb->sb, c,
+					 		  bitoff) + 1;
 		} else if (bitoff == localstart) {
 			/*LOG_TRACE_ARGS("bitoff == localstart (%u)\n", 
 			  bitoff);*/
@@ -250,7 +261,9 @@
 			  bitoff, localstart);*/
 			/* we had to skip over some ones */
 			count = 1;
-			globaloff = OCFS_CHUNK_TO_GLOBAL_OFF(c, bitoff) + 1;
+			globaloff =
+				ocfs_bitmap_off_for_block(osb->sb, c,
+				 			  bitoff) + 1;
 			localstart = bitoff + 1;
 		}
 
@@ -276,7 +289,7 @@
  * ocfs_count_bits()
  *
  */
-int ocfs_count_bits (ocfs_alloc_bm * bitmap)
+int ocfs_count_bits(struct super_block *sb, ocfs_alloc_bm *bitmap)
 {
 	__u32 count = 0, i, j;
 	struct buffer_head *currbh;
@@ -287,14 +300,17 @@
 	LOG_ENTRY ();
 
 	size = (bitmap->validbits >> 3);
+	validbytes = size;
+	if (validbytes > sb->s_blocksize)
+		validbytes = sb->s_blocksize;
 	
-	for (i = 0, validbytes = (size >= OCFS_BITMAP_CHUNK ? OCFS_BITMAP_CHUNK : size);
-	     size > 0; size -= validbytes, i++) {
+	for (i = 0; size > 0; size -= validbytes, i++) {
 		currbh = bitmap->chunk[i];
 		buffer = OCFS_BH_GET_DATA_READ(currbh);
 		for (j = 0; j < validbytes; j++) {
-			memcpy (&tmp, buffer, 1);
-			count += BITCOUNT (tmp);
+			/* U-U-U-GLY */
+			memcpy(&tmp, buffer, 1);
+			count += BITCOUNT(tmp);
 			buffer++;
 		}
 		OCFS_BH_PUT_DATA(currbh);
@@ -312,15 +328,16 @@
  * ocfs_set_bits()
  *
  */
-void ocfs_set_bits (ocfs_journal_handle *handle, ocfs_alloc_bm * bitmap, 
-		    __u32 start, __u32 num)
+void ocfs_set_bits(struct super_block *sb, ocfs_journal_handle *handle,
+		   ocfs_alloc_bm * bitmap, __u32 start, __u32 num)
 {
 	struct buffer_head *currbh = NULL;
 	void *buff;
 	int i, local;
 	int status;
 
-	LOG_ENTRY_ARGS ("(0x%p, %u, %u)\n", bitmap, start, num);
+	LOG_ENTRY_ARGS ("(0x%p, 0x%p, 0x%p, %u, %u)\n", sb, handle,
+			bitmap, start, num);
 
 	if ((start + num) > bitmap->validbits) {
 		LOG_ERROR_ARGS("bitmap->validbits = %u but start = %u and "  \
@@ -328,8 +345,7 @@
 		goto bail;
 	}
 
-	i = OCFS_GLOBAL_OFF_TO_CHUNK(start);
-	local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
+	ocfs_bitmap_block_for_off(sb, start, &i, &local);
 	currbh = bitmap->chunk[i];
 
 	status = ocfs_journal_access(handle, currbh, OCFS_JOURNAL_ACCESS_UNDO);
@@ -342,7 +358,7 @@
 
 	while (num--) {
 		set_bit (local++, buff);
-		if (local >= OCFS_BITS_IN_CHUNK && num != 0) {
+		if ((local >= OCFS_BITS_IN_CHUNK(sb)) && (num != 0)) {
 			local = 0;
 			OCFS_BH_PUT_DATA(currbh);
 			status = ocfs_journal_dirty(handle, currbh);
@@ -381,7 +397,8 @@
  * ocfs_clear_bits()
  *
  */
-void ocfs_clear_bits (ocfs_journal_handle *handle, ocfs_alloc_bm * bitmap, 
+void ocfs_clear_bits(struct super_block *sb,
+		     ocfs_journal_handle *handle, ocfs_alloc_bm *bitmap,
 		      __u32 start, __u32 num)
 {
 	struct buffer_head *currbh = NULL;
@@ -389,7 +406,8 @@
 	int i, local;
 	int status;
 
-	LOG_ENTRY_ARGS ("(0x%p, %u, %u)\n", bitmap, start, num);
+	LOG_ENTRY_ARGS ("(0x%p, 0x%p, 0x%p, %u, %u)\n", sb, handle,
+			bitmap, start, num);
 
 	if ((start + num) > bitmap->validbits) {
 		LOG_ERROR_ARGS("bitmap->validbits = %u but start = %u and "  \
@@ -397,8 +415,7 @@
 		goto bail;
 	}
 
-	i = OCFS_GLOBAL_OFF_TO_CHUNK(start);
-	local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
+	ocfs_bitmap_block_for_off(sb, start, &i, &local);
 	currbh = bitmap->chunk[i];
 
 	status = ocfs_journal_access(handle, currbh, OCFS_JOURNAL_ACCESS_UNDO);
@@ -415,7 +432,7 @@
 			(unsigned long *)bh2jh(currbh)->b_committed_data);
 		local++;
 
-		if (local >= OCFS_BITS_IN_CHUNK && num != 0) {
+		if ((local >= OCFS_BITS_IN_CHUNK(sb)) && (num != 0)) {
 			local = 0;
 			OCFS_BH_PUT_DATA(currbh);
 

Added: trunk/src/bitmap.h
===================================================================
--- trunk/src/bitmap.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/bitmap.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,48 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * bitmap.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_BITMAP_H
+#define OCFS2_BITMAP_H
+
+int ocfs_count_bits(struct super_block *sb, ocfs_alloc_bm *bitmap);
+void ocfs_clear_bits(struct super_block *sb,
+		     ocfs_journal_handle *handle, ocfs_alloc_bm *bitmap,
+		     __u32 start, __u32 num);
+int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm *bitmap,
+			 __u32 numBits, __u32 offset, __u32 sysonly);
+void ocfs_initialize_bitmap(struct super_block *sb,
+			    ocfs_alloc_bm *bitmap, __u32 validbits,
+			    __u32 allocbits);
+void ocfs_reinitialize_bitmap(struct super_block *sb,
+			      ocfs_alloc_bm *bitmap, __u32 validbits,
+			      __u32 allocbits);
+void ocfs_set_bits(struct super_block *sb, ocfs_journal_handle *handle,
+		   ocfs_alloc_bm *bitmap, __u32 start, __u32 num);
+void ocfs_uninitialize_bitmap(ocfs_alloc_bm *bitmap);
+
+#endif /* OCFS2_BITMAP_H */

Added: trunk/src/buffer_head_io.c
===================================================================
--- trunk/src/buffer_head_io.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/buffer_head_io.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,328 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * io.c
+ *
+ * Buffer cache handling
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
+#define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_IO
+
+static void ocfs_end_buffer_io_sync(struct buffer_head *bh,
+				    int uptodate)
+{
+//	LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
+//		       uptodate);
+
+	if (!uptodate)
+		LOG_ERROR_STATUS(-EIO);
+
+	if (uptodate)
+		set_buffer_uptodate(bh);
+	else
+		clear_buffer_uptodate(bh);
+	unlock_buffer(bh);
+
+//	LOG_EXIT();
+	return;
+}
+
+
+int ocfs_write_bhs (ocfs_super * osb, struct buffer_head *bhs[], 
+				  int nr, int flags, struct inode *inode)
+{
+	int status = 0;
+	int i;
+	struct super_block *sb;
+	struct buffer_head *bh;
+
+#ifdef OCFS_DBG_TIMING
+	my_timing_t begin, end; 
+#endif
+	
+	LOG_ENTRY_ARGS("(bh[0]->b_blocknr = %llu, nr=%d, flags=%u, inode=%p)\n", 
+		       (unsigned long long)bhs[0]->b_blocknr, nr, flags, inode);
+#ifdef OCFS_DBG_TIMING
+	rdtsc (begin.lohi[0], begin.lohi[1]);
+#endif
+
+	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
+		LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == "
+		       "NULL");
+		status = -EINVAL;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	if (nr > 256)
+		LOG_TRACE_ARGS ("Getting write for %d blocks\n", nr);
+
+	sb = osb->sb;
+
+	/* we don't ever want cached writes -- those should go to the
+	 * journal so we can control when they actually hit disk and
+	 * so we can make sure they never get overwritten by a
+	 * subsequent read. */
+	if ((flags & OCFS_BH_CACHED) || (flags & OCFS_BH_COND_CACHED)) {
+		LOG_TRACE_STR("asking for a cached write!");
+		status = -EINVAL;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	for (i = 0 ; i < nr ; i++) {
+		bh = bhs[i];
+		if (bh == NULL) {
+			LOG_TRACE_STR("bh == NULL");
+			status = -EIO;
+			LOG_ERROR_STATUS(status);
+			goto bail;
+		}
+
+		if (check_block_zero_write(bh) < 0) {
+			status = -EIO;	
+			LOG_ERROR_STATUS(status);
+			goto bail;
+		}
+
+		if (flags & OCFS_BH_CONCURRENT_WRITE)
+			goto skip_modified_check;
+
+skip_modified_check:
+		if (!(flags & OCFS_BH_IGNORE_JBD) && buffer_jbd(bh)) {
+#ifdef VERBOSE_BH_JBD_TRACE
+			LOG_TRACE_ARGS("trying to write a jbd managed bh "
+				       "(blocknr = %llu), nr=%d\n", 
+				       (unsigned long long)bh->b_blocknr, nr);
+#endif
+			continue;
+		}
+
+		lock_buffer(bh);
+
+		set_buffer_uptodate(bh);
+		/* remove from dirty list before I/O. */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)		
+		/*
+		 * mark_buffer_clean() doesn't exist in 2.6.x kernels.
+		 * Not many places actually used mark_buffer_clean, but
+		 * at least reiserfs uses clear_buffer_dirty() as
+		 * a replacment.
+		 */
+		clear_buffer_dirty(bh);
+#else
+		mark_buffer_clean(bh);
+#endif
+
+		bh->b_end_io = ocfs_end_buffer_io_sync;
+		submit_bh(WRITE, bh);
+	}
+
+	for (i = (nr-1) ; i >= 0; i--) {
+		bh = bhs[i];
+
+		wait_on_buffer(bh);
+
+		if (inode)
+			SET_BH_SEQNUM(inode, bh);
+		else
+			CLEAR_BH_SEQNUM(bh);
+	}
+	
+bail:
+
+#ifdef OCFS_DBG_TIMING
+	IO_FUNC_TIMING_PRINT("ocfs_write_bhs", status);
+#endif
+
+	LOG_EXIT_STATUS(status);
+	return status;
+}
+
+/*
+ * ocfs_read_bhs()
+ *
+ */
+int ocfs_read_bhs(ocfs_super *osb, __u64 off, __u64 len, 
+		  struct buffer_head *bhs[], int flags, 
+	 	  struct inode *inode)
+{
+	int status = 0;
+	struct super_block *sb;
+	int nr, i, ignore_cache = 0;
+	__u64 blocknum;
+	struct buffer_head *bh;
+
+#ifdef OCFS_DBG_TIMING
+	my_timing_t begin, end; 
+#endif
+	LOG_ENTRY_ARGS("(off=(%llu), len=(%llu), flags=%d, inode=%p)\n", off, 
+		       len, flags, inode);
+#ifdef OCFS_DBG_TIMING
+	rdtsc (begin.lohi[0], begin.lohi[1]);
+#endif
+
+	if (len & (u64)(osb->sb->s_blocksize - 1)) {
+		LOG_TRACE_ARGS("len %% blocksize (len=%llu)\n", len);
+		status = -EINVAL;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
+		LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == NULL "
+			      "|| num == NULL");
+		status = -EINVAL;
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	if ((flags & OCFS_BH_COND_CACHED) && !IS_SYSTEM_FILE_INODE(inode))
+		flags |= OCFS_BH_CACHED;
+
+	if ((flags & OCFS_BH_CACHED) && IS_SYSTEM_FILE_INODE(inode)) {
+		LOG_TRACE_STR("hey bozo you are trying to write "
+			      "a system thingy cached!");
+		flags &= ~OCFS_BH_CACHED;
+	}
+
+	sb = osb->sb;
+	blocknum = off >> sb->s_blocksize_bits;
+
+	nr = (len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+	if (nr == 0) {
+		LOG_TRACE_STR("No buffers will be read!!!");
+		LOG_TRACE_ARGS("Len=%llu Off=%llu numbuffers=%u "
+			       "blocknum=%llu\n", len, off, 
+			       nr, blocknum);
+		status = 0;
+		goto bail;
+	}
+
+	for (i = 0 ; i < nr ; i++) {
+		if (bhs[i] == NULL) {
+			bhs[i] = sb_getblk(sb, blocknum++);
+			if (bhs[i] == NULL) {
+				LOG_TRACE_STR("bh == NULL");
+				status = -EIO;
+				LOG_ERROR_STATUS(status);
+				goto bail;
+			}
+		}
+		bh = bhs[i];
+		ignore_cache = 0;
+
+		/* Lock everyone else out of this bh */
+		OCFS_BH_GET_DATA_READ(bh);
+
+		if (flags & OCFS_BH_CACHED && inode && 
+		    !TEST_BH_SEQNUM(inode, bh)) {
+#ifdef VERBOSE_BH_SEQNUM_TRACE
+			LOG_TRACE_ARGS("(read) bh (%llu) seqnum (%lu) does not "
+				       "match inode (%u)\n",
+				       (unsigned long long)bh->b_blocknr, 
+				       (bh->b_state & STATE_BIT_MASK) >> 19,
+				       atomic_read(GET_INODE_CLEAN_SEQ(inode)));
+#endif
+			ignore_cache = 1;
+		}
+
+		if ((flags & OCFS_BH_CACHED) && (!buffer_uptodate(bh)))
+			ignore_cache = 1;
+
+		if (buffer_jbd(bh)) {
+#ifdef VERBOSE_BH_JBD_TRACE
+			if (!(flags & OCFS_BH_CACHED) || ignore_cache)
+				LOG_TRACE_ARGS("trying to sync read a jbd "
+					       "managed bh (blocknr = %llu)\n",
+					       (unsigned long long)bh->b_blocknr);
+#endif
+			continue;
+		}
+
+		if (!(flags & OCFS_BH_CACHED) || ignore_cache) {
+			if (buffer_dirty(bh)) {
+				/* This should probably be a BUG, or
+				 * at least return an error. */
+				LOG_TRACE_ARGS("asking me to sync read a "
+					      "dirty buffer! (blocknr = %llu)\n",
+					      (unsigned long long)bh->b_blocknr);
+				continue;
+			}
+
+			lock_buffer(bh);
+			clear_buffer_uptodate(bh);
+			bh->b_end_io = ocfs_end_buffer_io_sync;
+			if (flags & OCFS_BH_READAHEAD)
+				submit_bh(READA, bh);
+			else
+				submit_bh(READ, bh);
+			continue;
+		}
+	}
+
+	status = 0;
+
+	for (i = (nr-1); i >= 0; i--) {
+		bh = bhs[i];
+
+		wait_on_buffer(bh);
+
+		if (inode)
+			SET_BH_SEQNUM(inode, bh);
+		else
+			CLEAR_BH_SEQNUM(bh);
+
+		OCFS_BH_PUT_DATA(bh);
+	}
+	LOG_TRACE_ARGS("off=(%llu), len=(%llu), cached=%s\n", off, len, 
+		       (!(flags & OCFS_BH_CACHED) || ignore_cache) ? "no" : "yes");
+
+bail:
+
+#ifdef OCFS_DBG_TIMING
+	IO_FUNC_TIMING_PRINT("ocfs_read_bhs", status);
+#endif
+
+	LOG_EXIT_STATUS(status);
+	return status;
+}
+
+
+

Modified: trunk/src/dcache.c
===================================================================
--- trunk/src/dcache.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dcache.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * dcache.c
  *
  * dentry cache handling code
@@ -24,32 +26,29 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <linux/namei.h>
+#endif
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "dcache.h"
+#include "vote.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_DCACHE
 
 
-static int ocfs_empty_func(struct dentry *dentry, void *ignore);
-
-/*
- * ocfs_dentry_revalidate()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-int ocfs_dentry_revalidate (struct dentry *dentry, struct nameidata *nd)
-#else
-int ocfs_dentry_revalidate (struct dentry *dentry, int flags)
-#endif
+static int ocfs_dentry_revalidate24(struct dentry *dentry, int flags)
 {
-	int ret = 0;    /* if all else fails, just return false */
 	struct inode *inode = dentry->d_inode;
+	int ret = 0;    /* if all else fails, just return false */
 	ocfs_super *osb;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-	int flags = nd ? nd->flags : 0;
-#endif
 
 	LOG_ENTRY_ARGS ("(0x%p, %d, '%*s')\n", dentry, flags,
 			dentry->d_name.len, dentry->d_name.name);
@@ -73,7 +72,7 @@
 		goto bail;
 	}
 
-	if (osb->publ_map == (1 << osb->node_num)) {
+	if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
 		LOG_TRACE_STR ("Only node alive.  revalidate=true.");
 		ret = 1;
 		goto bail;
@@ -94,24 +93,21 @@
 
 	LOG_EXIT_INT (ret);
 	return ret;
-}				/* ocfs_dentry_revalidate */
-
-
-/* exits if it finds any hashed dentries */
-static int ocfs_empty_func(struct dentry *dentry, void *ignore)
-{
-	return 1;
 }
 
-
-/*
- * ocfs_empty()
- *
- */
-int ocfs_empty (struct dentry *dentry)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static int ocfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	return ocfs_foreach_child (dentry, ocfs_empty_func, NULL);
+	return ocfs_dentry_revalidate24(dentry, nd ? nd->flags : 0);
 }
+struct dentry_operations ocfs_dentry_ops = {
+	.d_revalidate		= ocfs_dentry_revalidate,
+};
+#else
+struct dentry_operations ocfs_dentry_ops = {
+	.d_revalidate		= ocfs_dentry_revalidate24,
+};
+#endif
 
 /*
  * ocfs_foreach_child()
@@ -145,6 +141,4 @@
 
 	LOG_EXIT_INT (ret);
 	return ret;
-}				/* ocfs_foreach_child */
-
-
+}

Added: trunk/src/dcache.h
===================================================================
--- trunk/src/dcache.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dcache.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,38 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dcache.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_DCACHE_H
+#define OCFS2_DCACHE_H
+
+extern struct dentry_operations ocfs_dentry_ops;
+
+int ocfs_foreach_child(struct dentry *dentry,
+		       int (*func)(struct dentry *, void *),
+		       void *data);
+
+#endif /* OCFS2_DCACHE_H */

Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dir.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,10 +1,25 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * dir.c
  *
  * Creates, reads, walks and deletes directory-nodes
  *
  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
  *
+ *  Portions of this code from linux/fs/ext3/dir.c
+ *  
+ *  Copyright (C) 1992, 1993, 1994, 1995
+ *  Remy Card (card at masi.ibp.fr)
+ *  Laboratoire MASI - Institut Blaise pascal
+ *  Universite Pierre et Marie Curie (Paris VI)
+ *  
+ *   from
+ *
+ *   linux/fs/minix/dir.c
+ *
+ *   Copyright (C) 1991, 1992 Linux Torvalds
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -24,11 +39,24 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "dir.h"
+#include "dlm.h"
+#include "inode.h"
+#include "namei.h"
+#include "util.h"
+
+#include "ocfs_buffer_head.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_DIR
 
 static unsigned char ocfs_filetype_table[] = {
@@ -58,7 +86,7 @@
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
 	while (!error && !stored && filp->f_pos < inode->i_size) {
-		blk = (filp->f_pos) >> OCFS_SB(sb)->sect_size_bits;
+		blk = (filp->f_pos) >> sb->s_blocksize_bits;
 		bh = ocfs_bread (0, inode, blk, 0, &err, 0);
 		if (!bh) {
 			LOG_ERROR_ARGS ("directory #%llu contains a hole at offset %lu\n",
@@ -71,7 +99,7 @@
 		 * Do the readahead (8k)
 		 */
 		if (!offset) {
-			for (i = 16 >> (OCFS_SB(sb)->sect_size_bits - 9), num = 0;
+			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
 			     i > 0; i--) {
 				tmp = ocfs_bread (NULL, inode, ++blk, 0, &err, 1);
 				brelse (tmp);
@@ -100,7 +128,7 @@
 				 * failure will be detected in the
 				 * dirent test below. */
 				if (le16_to_cpu(de->rec_len) <
-				    OCFS_DIR_REC_LEN(1))
+				    OCFS2_DIR_REC_LEN(1))
 					break;
 				i += le16_to_cpu(de->rec_len);
 			}
@@ -137,7 +165,7 @@
 				unsigned long version = filp->f_version;
 				unsigned char d_type = DT_UNKNOWN;
 				
-				if (de->file_type < OCFS_FT_MAX)
+				if (de->file_type < OCFS2_FT_MAX)
 					d_type = ocfs_filetype_table[de->file_type];
 				error = filldir(dirent, de->name,
 						de->name_len,
@@ -170,7 +198,12 @@
 /* ocfs_find_files_on_disk()
  * NOTE: this should always be called with parent dir ip_io_sem taken!
  */
-int ocfs_find_files_on_disk (ocfs_super * osb, const char *name, int namelen, __u64 *fe_off, struct inode *inode, int take_lock, struct buffer_head **dirent_bh, struct ocfs2_dir_entry **dirent)
+/* parent off changed to file entry offset of parent! */
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+			    int namelen, __u64 *fe_off,
+			    struct inode *inode, int take_lock,
+			    struct buffer_head **dirent_bh,
+			    struct ocfs2_dir_entry **dirent)
 {
 	int status = -ENOENT;
 	int tmpstat;
@@ -241,14 +274,15 @@
 	int err;
 
 	sb = inode->i_sb;
-	if (inode->i_size < OCFS_DIR_REC_LEN(1) + OCFS_DIR_REC_LEN(2) ||
+	if ((inode->i_size <
+	     (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
 	    !(bh = ocfs_bread (NULL, inode, 0, 0, &err, 0))) {
 	    	LOG_ERROR_ARGS ("bad directory (dir #%llu) - no data block\n", 
 				GET_INODE_FEOFF(inode));
 		return 1;
 	}
 
-	de = OCFS_BH_GET_DATA_READ(bh);
+	de = (struct ocfs2_dir_entry *) OCFS_BH_GET_DATA_READ(bh);
 	de1 = (struct ocfs2_dir_entry *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	if (le64_to_cpu(de->inode) != GET_INODE_FEOFF(inode) || 
@@ -264,10 +298,11 @@
 	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
 	de = (struct ocfs2_dir_entry *) ((char *) de1 + le16_to_cpu(de1->rec_len));
 	while (offset < inode->i_size ) {
-		if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+		if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) {
 			OCFS_BH_PUT_DATA(bh);
 			brelse (bh);
-			bh = ocfs_bread (NULL, inode, offset >> sb->s_blocksize_bits, 0, &err, 0);
+			bh = ocfs_bread(NULL, inode,
+				      	offset >> sb->s_blocksize_bits, 0, &err, 0);
 			if (!bh) {
 				LOG_ERROR_ARGS ("directory #%llu contains a hole at offset %lu\n",
 					GET_INODE_FEOFF(inode), offset);

Added: trunk/src/dir.h
===================================================================
--- trunk/src/dir.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dir.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,40 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dir.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_DIR_H
+#define OCFS2_DIR_H
+
+int empty_dir(struct inode *inode);  /* FIXME: to namei.c */
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+			    int namelen, __u64 *fe_off,
+			    struct inode *inode, int take_lock,
+			    struct buffer_head **dirent_bh,
+			    struct ocfs2_dir_entry **dirent);
+int ocfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
+
+#endif /* OCFS2_DIR_H */

Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dlm.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * dlm.c
  *
  * Distributed lock manager. Requests and processes lock votes.
@@ -24,12 +26,28 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/random.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "dcache.h"
+#include "dlm.h"
+#include "lockres.h"
+#include "nm.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 extern __u32 comm_voting;
 
 #define WAIT_FOR_VOTE_INCREMENT  200
@@ -41,16 +59,46 @@
 
 static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
 
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u64 *open_map);
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 *vote_map, __u32 time_to_wait, __u64 lock_seq_num, __u64 *open_map);
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, ocfs_node_map *open_map);
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map);
 static int ocfs_reset_voting (ocfs_super * osb);
-static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * got_vote_map, __u64 vote_map, __u64 lock_seq_num, __u64 * oin_open_map);
+static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map);
 int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode);
 static int ocfs_zap_child_buffers_func(struct dentry *dentry, void *data);
 
 
+#warning change these later
+void ocfs_set_publish_vote_map(ocfs_super *osb, ocfs_publish *publish, ocfs_node_map *vote_map)
+{
+	// publish->vote_map; // this can only hold 64 bit
+	// publish->reserved1;  // this can hold __u8[32] = 256, our current max ;-)
+	ocfs_node_map_set_to_disk(publish->reserved1, vote_map);
+}
+
+void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish, ocfs_node_map *vote_map)
+{
+	ocfs_node_map_init(osb, vote_map);
+	ocfs_node_map_set_from_disk(vote_map, publish->reserved1);
+}
+
+void ocfs_set_disk_lock_open_map(ocfs_super *osb,
+				 ocfs2_disk_lock *lock,
+				 ocfs_node_map *open_map)
+{
+	ocfs_node_map_set_to_disk(lock->dl_node_map, open_map);
+}
+
+void ocfs_get_disk_lock_open_map(ocfs_super *osb,
+				 ocfs2_disk_lock *lock,
+				 ocfs_node_map *open_map)
+{
+	ocfs_node_map_init(osb, open_map);
+	ocfs_node_map_set_from_disk(open_map, lock->dl_node_map);
+}
+	
+
 /* inode is definitely non NULL */
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u64 *open_map)
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, ocfs_node_map *open_map)
 {
 	int status = 0;
 	__u64 offset = 0;
@@ -60,21 +108,18 @@
 	__u32 i;
 	ocfs_publish *pubsect = NULL;
 	__u64 largestseqno = 0;
-	__u64 pubmap = 0;
 	__u32 wait;
 	int publish_flag = 0;
 	struct buffer_head **bhs = NULL;
 
-	LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, vm=0x%08x)\n",
-			osb, lock_id, lock_type, flags,
-			(__u32)(*vote_map & 0xFFFFFFFFULL));
+	LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u)\n",
+			osb, lock_id, lock_type, flags);
 
-	*vote_map &= ~(1 << osb->node_num);
+	ocfs_node_map_clear_bit(vote_map, osb->node_num);
 
-	pubmap = osb->publ_map;
-	offset = osb->vol_layout.publ_sect_off;
-	numnodes = OCFS_MAXIMUM_NODES;
-	size = (numnodes * osb->sect_size);
+	offset = osb->publish_blkno << osb->sb->s_blocksize_bits;
+	numnodes = osb->max_nodes;
+	size = (numnodes << osb->sb->s_blocksize_bits);
 
 	/* take lock to prevent overwrites by vote_reset and nm thread */
 	down_with_flag (&(osb->publish_lock), publish_flag);
@@ -101,8 +146,8 @@
 		largestseqno = pubsect->publ_seq_num;
 		if (pubsect->dirty) {
 			OCFS_BH_PUT_DATA(bhs[i]);
-			if (!IS_NODE_ALIVE (pubmap, i, numnodes) || 
-			    TEST_NODE_IN_RECOVERY(osb, i)) {
+			if (!ocfs_node_is_alive(&osb->publ_map, i) || 
+			    ocfs_node_is_recovering(osb, i)) {
 				LOG_TRACE_STR("Node is in recovery, trying"
 					      " again.");
 			} else {
@@ -130,16 +175,16 @@
 	pubsect->publ_seq_num = largestseqno;
 	pubsect->dirty = 1;
 	pubsect->vote = FLAG_VOTE_NODE;
-	pubsect->vote_map = *vote_map;
+	ocfs_set_publish_vote_map(osb, pubsect, vote_map);
 	pubsect->vote_type = flags;
-	pubsect->dir_ent = lock_id;
+	pubsect->lock_id = lock_id;
 
 	OCFS_BH_PUT_DATA(bhs[osb->node_num]);
 #ifdef VERBOSE_LOCKING_TRACE
 	printk("ocfs_disk_request_vote: lockid = %llu\n",lock_id);
 #endif
 
-	pub_off = osb->vol_layout.publ_sect_off + (osb->node_num * osb->sect_size);
+	pub_off = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
 
 	status = ocfs_write_bh (osb, bhs[osb->node_num], 0, NULL);
 	if (status < 0) {
@@ -183,11 +228,11 @@
 	int status = -ETIMEDOUT;
 	int tmpstat = -ETIMEDOUT;
 	__u32 timewaited = 0;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	struct buffer_head *bh = NULL;
 	__u32 curr_master;
 	__u8 lock_level;
-	int is_dir = 0, disk_vote = 0;
+	int disk_vote = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 
 	LOG_ENTRY_ARGS ("(0x%p, %llu, %u, 0x%p, %u)\n",
@@ -204,9 +249,8 @@
 			goto finally;
 		}
 		fe = OCFS_BH_GET_DATA_READ(bh);
-		curr_master = DISK_LOCK_CURRENT_MASTER (fe);
-		lock_level = DISK_LOCK_FILE_LOCK (fe);
-		is_dir = (fe->attribs & OCFS_ATTRIB_DIRECTORY);
+		curr_master = DISK_LOCK(fe)->dl_master;
+		lock_level = DISK_LOCK(fe)->dl_level;
 		OCFS_BH_PUT_DATA(bh);
 
 		if ((curr_master == OCFS_INVALID_NODE_NUM) ||
@@ -214,13 +258,13 @@
 			goto got_it;
 		}
 		
-		if ((!IS_NODE_ALIVE (osb->publ_map, curr_master, OCFS_MAXIMUM_NODES)) && 
-		    (!TEST_NODE_IN_RECOVERY(osb, curr_master))) {
+		if ((!ocfs_node_is_alive(&osb->publ_map, curr_master)) && 
+		    (!ocfs_node_is_recovering(osb, curr_master))) {
 			/* Reset the lock as not owned and return success?? */
 			/* This needs to be under some sort of cluster wide lock, */
 			fe = OCFS_BH_GET_DATA_WRITE(bh);
-			DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
-			DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+			DISK_LOCK(fe)->dl_master = OCFS_INVALID_NODE_NUM;
+			DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
 			OCFS_BH_PUT_DATA(bh);
 			tmpstat = ocfs_write_bh (osb, bh, 0, NULL);
 			if (tmpstat < 0) {
@@ -235,7 +279,7 @@
 			lockres->lock_type = lock_level;
 			lockres->master_node_num = curr_master;
 				
-			if (is_dir) {
+			if (S_ISDIR(fe->i_mode)) {
 				if (lockres->readonly_node != OCFS_INVALID_NODE_NUM) {
 					if (lockres->readonly_node == curr_master) {
 						// readonly cachelock already on this dir
@@ -298,7 +342,7 @@
 				}
 				LOG_TRACE_ARGS("broke cache lock, setting to NO_LOCK\n");
 				fe = OCFS_BH_GET_DATA_WRITE(bh);
-				DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+				DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
 				lock_level = OCFS_DLM_NO_LOCK;
 				OCFS_BH_PUT_DATA(bh);
 				tmpstat = ocfs_write_bh (osb, bh, 0, inode);
@@ -341,7 +385,7 @@
  * ocfs_get_vote_on_disk()
  *
  */
-static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * got_vote_map, __u64 vote_map, __u64 lock_seq_num, __u64 * oin_open_map)
+static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map)
 {
 	int status = 0;
 	__u32 length = 0;
@@ -350,29 +394,28 @@
 	ocfs_vote *vote;
 	struct buffer_head **bhs = NULL;
 
-	LOG_ENTRY_ARGS ("(lockid=%llu, locktype=%u, votemap=0x%08x)\n",
-			lock_id, lock_type,
-			(__u32)(vote_map & 0xFFFFFFFFULL));
+	LOG_ENTRY_ARGS ("(lockid=%llu, locktype=%u)\n", lock_id, lock_type);
 
-	numnodes = OCFS_MAXIMUM_NODES;
+	numnodes = osb->max_nodes;
 
 	/* Read the vote sectors of all the nodes */
-	length = numnodes * osb->sect_size;
+	length = numnodes << osb->sb->s_blocksize_bits;
 	bhs = ocfs_malloc(numnodes * sizeof(struct buffer_head *));
 	if (bhs == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto finally;
 	}
 	memset(bhs, 0, numnodes * sizeof(struct buffer_head *));
-	status = ocfs_read_bhs (osb, osb->vol_layout.vote_sect_off, length, bhs, 0, NULL);
+	status = ocfs_read_bhs (osb, osb->vote_blkno << osb->sb->s_blocksize_bits, 
+				length, bhs, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
 
 	for (i = 0; i < numnodes; i++) {
-		int node_alive = (IS_NODE_ALIVE (osb->publ_map, i, numnodes));
-		int node_in_map = (IS_NODE_ALIVE (vote_map, i, numnodes));
+		int node_alive = ocfs_node_is_alive(&osb->publ_map, i);
+		int node_in_map = ocfs_node_is_alive(vote_map, i);
 
 		status = 0;
 		vote = OCFS_BH_GET_DATA_READ(bhs[i]);
@@ -381,12 +424,12 @@
 		if (!node_alive) {
 			if (node_in_map) {
 				if (flags & FLAG_FILE_UPDATE_OIN)
-					(*got_vote_map) |= 1 << i;
+					ocfs_node_map_set_bit(got_vote_map, i);
 				else
 					status = -EAGAIN;
 			}
 		} else if (node_in_map && vote->vote_seq_num == lock_seq_num &&
-			   vote->dir_ent == lock_id) {
+			   vote->lock_id == lock_id) {
 			ocfs_vote_reply_ctxt ctxt;
 			ctxt.reply_method = DISK_VOTE;
 			ctxt.got_vote_map = got_vote_map;
@@ -394,6 +437,9 @@
 			ctxt.status = &status;
 			ctxt.flags = flags;
 			ctxt.u.vote = vote;
+			if (vote->node != osb->node_num)
+				LOG_ERROR_ARGS("vote->node(%u) != this(%u)\n",
+					       vote->node, osb->node_num);
 			ocfs_process_one_vote_reply(osb, &ctxt, i);
 		}
 
@@ -417,18 +463,19 @@
  * ocfs_wait_for_vote()
  *
  */
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 *vote_map, __u32 time_to_wait, __u64 lock_seq_num, __u64 *open_map)
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map)
 {
 	int status = -EAGAIN;
 	__u32 timewaited = 0;
-	__u64 gotvotemap = 0;
+	ocfs_node_map gotvotemap;
 
-	LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, vm=0x%08x, "
-		"sq:%llu)\n", osb, lock_id, lock_type, flags,
-		(__u32)(*vote_map & 0xFFFFFFFFULL), lock_seq_num);
+	LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, sq:%llu)\n", 
+			osb, lock_id, lock_type, flags, lock_seq_num);
 
-	*vote_map &= ~(1 << osb->node_num);
+	ocfs_node_map_init(osb, &gotvotemap);
 
+	ocfs_node_map_clear_bit(vote_map, osb->node_num);
+
 	while (time_to_wait > timewaited) {
 		ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
 
@@ -439,15 +486,14 @@
 		}
 
 		status = ocfs_get_vote_on_disk (osb, lock_id, lock_type, flags,
-				&gotvotemap, *vote_map, lock_seq_num, open_map);
+				&gotvotemap, vote_map, lock_seq_num, open_map);
 		if (status < 0) {
 			if (status != -EAGAIN)
 				LOG_ERROR_STATUS (status);
 			goto bail;
 		}
-
-		*vote_map &= ~gotvotemap;
-		if (*vote_map == 0ULL) {
+		ocfs_node_map_clear_bits(vote_map, &gotvotemap);
+		if (ocfs_node_map_is_empty(vote_map)) {
 			status = 0;
 			goto bail;
 		}
@@ -478,6 +524,7 @@
 	ocfs_publish *pubsect = NULL;
 	__u64 offset = 0;
 	struct buffer_head *bh = NULL;
+	ocfs_node_map vote_map;
 
 	LOG_ENTRY ();
 
@@ -485,7 +532,7 @@
 	down (&(osb->publish_lock));
 
 	/* Read node's publish sector */
-	offset = osb->vol_layout.publ_sect_off + (osb->node_num * osb->sect_size);
+	offset = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
 
 	status = ocfs_read_bh (osb, offset, &bh, 0, NULL);
 	if (status < 0) {
@@ -497,8 +544,10 @@
 	pubsect->dirty = 0;
 	pubsect->vote = 0;
 	pubsect->vote_type = 0;
-	pubsect->vote_map = 0;
-	pubsect->dir_ent = 0;
+	pubsect->lock_id = 0;
+	/* clear vote map */
+	ocfs_node_map_init(osb, &vote_map);
+	ocfs_set_publish_vote_map(osb, pubsect, &vote_map);
 
 	/* Write it back */
 	OCFS_BH_PUT_DATA(bh);
@@ -531,7 +580,7 @@
 {
 	int status = 0;
 	__u64 offset = 0;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	int changed = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 	
@@ -556,46 +605,52 @@
 	fe = OCFS_BH_GET_DATA_READ(*bh);
 
 	if (flags & DLOCK_FLAG_MASTER && 
-	    DISK_LOCK_CURRENT_MASTER (fe) != lockres->master_node_num) {
+	    DISK_LOCK(fe)->dl_master != lockres->master_node_num) {
 		if (!changed) {
 			OCFS_BH_PUT_DATA(*bh);
 			fe = OCFS_BH_GET_DATA_WRITE(*bh);
 		}
 		changed = 1;
-		DISK_LOCK_CURRENT_MASTER (fe) = lockres->master_node_num;
+		DISK_LOCK(fe)->dl_master = lockres->master_node_num;
 	}
 
 	if (flags & DLOCK_FLAG_LOCK &&
-	    DISK_LOCK_FILE_LOCK (fe) != lockres->lock_type) {
+	    DISK_LOCK(fe)->dl_level != lockres->lock_type) {
 		if (!changed) {
 			OCFS_BH_PUT_DATA(*bh);
 			fe = OCFS_BH_GET_DATA_WRITE(*bh);
 		}
 		changed = 1;
-		DISK_LOCK_FILE_LOCK (fe) = lockres->lock_type;
+		DISK_LOCK(fe)->dl_level = lockres->lock_type;
 	}
 
 	if (flags & DLOCK_FLAG_OPEN_MAP) {
-	       if (flags & DLOCK_FLAG_ADD_SELF)
-		       lockres->oin_openmap |= (1ULL << osb->node_num);
-	       if (DISK_LOCK_OIN_MAP (fe) != lockres->oin_openmap) {
+		ocfs_node_map disk_map;
+		
+		ocfs_node_map_init(osb, &disk_map);
+		ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+		
+		if (flags & DLOCK_FLAG_ADD_SELF)
+			ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
+
+		if (!ocfs_node_map_is_equal(&disk_map, &lockres->oin_openmap)) {
 			if (!changed) {
 				OCFS_BH_PUT_DATA(*bh);
 				fe = OCFS_BH_GET_DATA_WRITE(*bh);
 			}
 			changed = 1;
-			DISK_LOCK_OIN_MAP (fe) = lockres->oin_openmap;
+			ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
 	       }
 	}
 
 	if (flags & DLOCK_FLAG_SEQ_NUM &&
-	    DISK_LOCK_SEQNUM (fe) != lockres->last_upd_seq_num) {
+	    DISK_LOCK(fe)->dl_seq_num != lockres->last_upd_seq_num) {
 		if (!changed) {
 			OCFS_BH_PUT_DATA(*bh);
 			fe = OCFS_BH_GET_DATA_WRITE(*bh);
 		}
 		changed = 1;
-		DISK_LOCK_SEQNUM (fe) = lockres->last_upd_seq_num;
+		DISK_LOCK(fe)->dl_seq_num = lockres->last_upd_seq_num;
 	}
 
 	OCFS_BH_PUT_DATA(*bh);
@@ -634,14 +689,15 @@
 	/* no need to alert master for these cases */
 	if (S_ISDIR (inode->i_mode) ||
 	    lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
-	    IS_NODE_ALIVE (lockres->oin_openmap, osb->node_num, OCFS_MAXIMUM_NODES)) {
+	     ocfs_node_is_alive(&lockres->oin_openmap,
+				osb->node_num)) {
 		status = 0;
 		goto bail;
 	}
 
 	while (status == -EAGAIN) {
-		if (!IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num,
-				    OCFS_MAXIMUM_NODES)) {
+		if (!ocfs_node_is_alive(&osb->publ_map,
+					lockres->master_node_num)) {
 			LOG_TRACE_ARGS ("Master (%u) dead, lockid %llu\n",
 				lockres->master_node_num,
 				GET_INODE_FEOFF(inode));
@@ -652,11 +708,7 @@
 		ocfs_acquire_lockres(lockres, 0); // ocfs_file_open ocfs_symlink
 
 		if (lockres->master_node_num == osb->node_num) {
-			LOG_TRACE_ARGS ("Added node to map 0x%08x, lockid %llu\n",
-			     (__u32)(lockres->oin_openmap & 0xFFFFFFFFULL),
-			     GET_INODE_FEOFF(inode));
-
-			lockres->oin_openmap |= (1 << osb->node_num);
+			ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
 			status = ocfs_update_disk_lock (osb, DLOCK_FLAG_OPEN_MAP, 
 							&bh, inode, handle);
 			if (bh)
@@ -725,7 +777,7 @@
 		   __u32 flags, struct buffer_head **bh, struct inode *inode)
 {
 	int status = -EFAIL;
-	ocfs_file_entry *disklock = NULL;
+	ocfs2_dinode *disklock = NULL;
 	__u32 updated = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 	int k = 0;
@@ -781,10 +833,11 @@
 			BUG();
 
 		if (lockres->master_node_num != osb->node_num || 
-		    lockres->master_node_num != DISK_LOCK_CURRENT_MASTER (disklock)) {
-			lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (disklock);
-			lockres->lock_type = DISK_LOCK_FILE_LOCK (disklock);
-			lockres->oin_openmap = DISK_LOCK_OIN_MAP (disklock);	
+		    lockres->master_node_num != DISK_LOCK(disklock)->dl_master) {
+			lockres->master_node_num = DISK_LOCK(disklock)->dl_master;
+			lockres->lock_type = DISK_LOCK(disklock)->dl_level;
+			ocfs_get_disk_lock_open_map(osb, DISK_LOCK(disklock), 
+						    &lockres->oin_openmap);
 		}
 		updated = 1;
 		OCFS_BH_PUT_DATA(*b);
@@ -794,7 +847,7 @@
 	no_owner = (lockres->master_node_num == OCFS_INVALID_NODE_NUM);
 
 	/* master node is an invalid node */
-	if (unlikely(lockres->master_node_num >= OCFS_MAXIMUM_NODES && !no_owner)) {
+	if (unlikely(lockres->master_node_num >= osb->max_nodes && !no_owner)) {
 		LOG_ERROR_ARGS("lockres: master_node=%d, owner=%s, lockid=%llu\n",
 			       lockres->master_node_num, no_owner?"no":"yes",
 			       GET_INODE_FEOFF(inode));
@@ -803,9 +856,11 @@
 		goto finally;
 	}
 
-	wait_on_recovery = TEST_NODE_IN_RECOVERY(osb, lockres->master_node_num);
-	owner_dead = !(no_owner || IS_NODE_ALIVE(osb->publ_map, 
-			 lockres->master_node_num, OCFS_MAXIMUM_NODES));
+	wait_on_recovery =
+		ocfs_node_is_recovering(osb, lockres->master_node_num);
+	owner_dead = !(no_owner ||
+		       ocfs_node_is_alive(&osb->publ_map, 
+				 	  lockres->master_node_num));
 	if ((owner_dead || wait_on_recovery) && 
 	    lockres->readonly_node == lockres->master_node_num) {
 		// if owner is dead or in recovery and the lockres 
@@ -890,9 +945,9 @@
 		       lockres->master_node_num, lockres->lock_type, flags|extra_lock_flags);
 	
 #ifdef VERBOSE_LOCKING_TRACE
-	printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, flags=%08x, ronode=%d, romap=%08x\n",
+	printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, flags=%08x, ronode=%d\n",
 		       GET_INODE_FEOFF(inode), osb->node_num, lockres->master_node_num, lockres->lock_type, 
-		       flags|extra_lock_flags, lockres->readonly_node, (__u32)(lockres->readonly_map & 0xFFFFFFFFULL));
+		       flags|extra_lock_flags, lockres->readonly_node);
 #endif	
 	if (wait_on_recovery && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
 		int waitcnt = 0;
@@ -900,7 +955,7 @@
 			       	lockres->master_node_num);
 		while (1) {
 			LOG_TRACE_ARGS("waitcnt = %d\n", waitcnt);
-			if (!TEST_NODE_IN_RECOVERY(osb, lockres->master_node_num))
+			if (!ocfs_node_is_recovering(osb, lockres->master_node_num))
 				break;
 			ocfs_sleep(500);
 		}
@@ -947,23 +1002,33 @@
  */
 int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode)
 {
-	__u64 votemap = 0ULL;
-	__u32 i;
+	ocfs_node_map votemap;
 	int status = 0, vote_status = 0;
 	int tmpstat;
 	__u64 lockseqno;
 	int cachelock = 0;
 	int disk_vote = 0;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	struct buffer_head *tmpbh = NULL, **b = NULL;
-	__u64 oin_node_map;
 	__u32 curr_master;
-	int lockflags = (lock_id >= osb->vol_layout.bitmap_off ? OCFS_BH_CACHED : 0);
+	int lockflags = (IS_SYSTEM_FILE_INODE(inode) ? 0 : OCFS_BH_CACHED);
 	int clear_tmp = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 
+	/* TODO: the ifdefed out code here used to be used
+	 *       right now, we *broadcast* on lock releases
+	 *       because of the issues surrounding lookup_file_allocation
+	 *       need to restrict this to only the openmap
+	 *       at some later date */
+#if 0
+	ocfs_node_map openmap;
+#endif
+
 	LOG_ENTRY_ARGS ("(0x%p, %llu, %u, %u, 0x%p)\n",
-			osb, lock_id, lock_type, flags, lockres);
+			osb, lock_id, lock_type, flags, lockres);	
+	
+	/* TODO: figure out how to properly handle inode updates w/no oin */
+	ocfs_node_map_dup(osb, &votemap, &osb->publ_map);  // temporary hack, forces broadcast
 
 	if (bh != NULL)
 		b = &bh;
@@ -980,8 +1045,11 @@
 		}
 	}
 	fe = OCFS_BH_GET_DATA_READ(*b);
-	oin_node_map = DISK_LOCK_OIN_MAP (fe);
-	curr_master = DISK_LOCK_CURRENT_MASTER (fe);
+#if 0
+	ocfs_node_map_init(osb, &openmap);
+	ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &openmap);
+#endif
+	curr_master = DISK_LOCK(fe)->dl_master;
 	OCFS_BH_PUT_DATA(*b);
 
 	if (!IS_VALID_NODE_NUM (curr_master)) {
@@ -995,26 +1063,16 @@
 		goto finito;
 	}
 
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
-		if (((1 << i) & oin_node_map) &&
-		    IS_NODE_ALIVE (osb->publ_map, i, OCFS_MAXIMUM_NODES)) {
-			votemap |= (1ULL << i);
-		}
-	}
+#if 0
+	/* publish_map & openmap for operations that dont need broadcast */
+	if (!(flags & (FLAG_FILE_RENAME | FLAG_FILE_DELETE | 
+		       FLAG_FILE_RELEASE_DENTRY | FLAG_FILE_TRUNCATE |
+		       FLAG_FILE_EXTEND)))
+		ocfs_node_map_and(&votemap, &openmap);
+#endif
 
-	/* Send an update to all nodes alive, can be optimized later TODO */
-	if ((flags & FLAG_FILE_RENAME) || (flags & FLAG_FILE_DELETE)
-	    || (flags & FLAG_RELEASE_DENTRY) || (flags & FLAG_FILE_TRUNCATE) 
-	    || (flags & FLAG_FILE_EXTEND))
-		votemap = osb->publ_map;
-
-	/* TODO: figure out how to properly handle inode updates w/no oin */
-	votemap = osb->publ_map;	// temporary hack, forces broadcast
-
-	/* remove current node from the votemap */
-	votemap &= ~(1ULL << osb->node_num);
-
-	if (votemap == 0ULL)
+	ocfs_node_map_clear_bit(&votemap, osb->node_num);
+	if (ocfs_node_map_is_empty(&votemap))
 		goto finally;
 
 	if (!(flags & FLAG_FILE_UPDATE_OIN))
@@ -1045,9 +1103,9 @@
 
 		LOG_ERROR_ARGS("DISKVOTE!!: lock_type=%u, flags=%08x, offset=%llu, inode=%llu\n",
 		       lock_type, flags, lock_id, inode?GET_INODE_FEOFF(inode):0ULL);
-		LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d, romap=%08x\n",
+		LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d\n",
 		       osb->node_num, lockres->master_node_num, lockres->lock_type, 
-		       lockres->readonly_node, (__u32)(lockres->readonly_map & 0xFFFFFFFFULL));
+		       lockres->readonly_node);
 
 		ocfs_show_trace(NULL);
 
@@ -1078,20 +1136,20 @@
 
 	LOG_TRACE_ARGS("writing lock now... releasemaster: %s, level: %d, master: %d\n",
 		       flags & FLAG_FILE_RELEASE_MASTER ? "yes" : "no", 
-		       DISK_LOCK_FILE_LOCK (fe),
-		       DISK_LOCK_CURRENT_MASTER (fe));
+		       DISK_LOCK(fe)->dl_level,
+		       DISK_LOCK(fe)->dl_master);
 	if (flags & FLAG_FILE_RELEASE_MASTER)
-		DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
+		DISK_LOCK(fe)->dl_master = OCFS_INVALID_NODE_NUM;
 
-	if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
-	    (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) {
+	if ((DISK_LOCK(fe)->dl_level == OCFS_DLM_ENABLE_CACHE_LOCK) &&
+	    (DISK_LOCK(fe)->dl_master == osb->node_num)) {
 		lockres->lock_type = OCFS_DLM_ENABLE_CACHE_LOCK; 
 		cachelock = 1;
 		LOG_TRACE_STR("keeping at CACHE_LOCK");
 	}
 	else {
 		LOG_TRACE_STR("setting to NO_LOCK");
-		DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+		DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
 	}
 
 	OCFS_BH_PUT_DATA(*b);
@@ -1201,10 +1259,11 @@
 /* inode is definitely non NULL */
 int new_lock_function(ocfs_super * osb, __u32 requested_lock, __u32 flags, struct buffer_head *bh, int *disk_vote, struct inode *inode)	
 {
-	__u64 vote_map = 0ULL, open_map = 0ULL;
+	ocfs_node_map vote_map;
+	ocfs_node_map open_map;
 	__u64 lockseqnum = 0;
 	int tmpstat;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	__u64 lock_id;
 	__u32 lock_write_flags = DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | DLOCK_FLAG_OPEN_MAP;
 	__u32 lock_type = requested_lock;
@@ -1244,33 +1303,37 @@
 		if (bh) {
 			fe = OCFS_BH_GET_DATA_READ(bh);
 			/* may not need to rewrite the lock later if we already have a cachelock */
-			need_lock_write = (DISK_LOCK_CURRENT_MASTER (fe) != osb->node_num ||
-					DISK_LOCK_FILE_LOCK (fe) != OCFS_DLM_ENABLE_CACHE_LOCK);
+			need_lock_write = (DISK_LOCK(fe)->dl_master != osb->node_num ||
+					DISK_LOCK(fe)->dl_level != OCFS_DLM_ENABLE_CACHE_LOCK);
 			OCFS_BH_PUT_DATA(bh);
 		} else
 			need_lock_write = 0;
 		need_to_zap_buffers = 0; 
 	}
 
+	ocfs_node_map_init(osb, &open_map);
+
 	/* that's why it's called fast path */
 	if (flags & FLAG_FAST_PATH_LOCK)
 		goto vote_success;
 
 
-	
+
 	/* figure out who to vote with */
-	if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE | FLAG_FILE_RENAME 
-		     | FLAG_RELEASE_DENTRY))
-		vote_map = osb->publ_map; /* broadcast */
-	else if (flags & FLAG_DROP_READONLY) {
-		/* all nodes that see this is readonly */
-		vote_map = osb->publ_map & lockres->readonly_map;
+	if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE |
+		     FLAG_FILE_RENAME | FLAG_DROP_READONLY |
+		     FLAG_RELEASE_DENTRY)) {
+		ocfs_node_map_dup(osb, &vote_map, &osb->publ_map); /* broadcast */
+		/* only nodes that see this is readonly */
+		if (flags & FLAG_DROP_READONLY)
+			ocfs_node_map_and(&vote_map, &lockres->readonly_map);
 	} else {
-		vote_map = (1 << lockres->master_node_num);  /* just owner */
+		ocfs_node_map_init(osb, &vote_map);
+		ocfs_node_map_set_bit(&vote_map, lockres->master_node_num); /* just owner */
 		lock_type = lockres->lock_type;
 	}
-	vote_map &= ~(1 << osb->node_num); // remove this node
-	if (vote_map == 0) {
+	ocfs_node_map_clear_bit(&vote_map, osb->node_num);
+	if (ocfs_node_map_is_empty(&vote_map)) {
 		/* As this is the only node alive, make it master of the lock */
 		/* no need to update open map */
 		lock_write_flags &= ~(DLOCK_FLAG_OPEN_MAP);
@@ -1300,9 +1363,9 @@
 
 	LOG_ERROR_ARGS("DISKVOTE!!: req_lock=%u, flags=%08x, offset=%llu, inode=%llu\n",
 		       requested_lock, flags, lock_id, inode?GET_INODE_FEOFF(inode):0ULL);
-	LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d, romap=%08x\n",
+	LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d\n",
 		       osb->node_num, lockres->master_node_num, lockres->lock_type, 
-		       lockres->readonly_node, (__u32)(lockres->readonly_map & 0xFFFFFFFFULL));
+		       lockres->readonly_node);
 
 	ocfs_show_trace(NULL);
 
@@ -1318,7 +1381,7 @@
 
 vote_success:
 	if (status >= 0 && flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE))
-		lockres->oin_openmap = open_map;
+		ocfs_node_map_set(&lockres->oin_openmap, &open_map);
 
 	if (need_to_zap_buffers)
 		ocfs_break_cache_lock_zap_buffers(osb, inode);
@@ -1329,7 +1392,7 @@
 
 	/* drop readonly should remove anyone who has responded */
        	if (flags & FLAG_DROP_READONLY) {
-		lockres->readonly_map = vote_map;
+		ocfs_node_map_set(&lockres->readonly_map, &vote_map);
 		goto bail;
 	}
 
@@ -1354,7 +1417,8 @@
 	/* pick up any change to the open map */
 	if (bh) {
 		fe = OCFS_BH_GET_DATA_READ(bh);
-		lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
+		ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), 
+					    &lockres->oin_openmap);
 		OCFS_BH_PUT_DATA(bh);
 	}
 bail:
@@ -1387,7 +1451,7 @@
 	int status = 0;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
 
-	if (lockres->readonly_map == 0ULL)
+	if (ocfs_node_map_is_empty(&lockres->readonly_map))
 		return status;
 	status = ocfs_drop_readonly_cache_lock(osb, inode, 0);
 	return status;	

Added: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dlm.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,62 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlm.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_DLM_H
+#define OCFS2_DLM_H
+
+int new_lock_function(ocfs_super *osb, __u32 requested_lock,
+		      __u32 flags, struct buffer_head *bh,
+		      int *disk_vote, struct inode *inode);
+int ocfs_acquire_lock(ocfs_super *osb, __u32 lock_type,
+		      __u32 flags, struct buffer_head **bh,
+		      struct inode *inode);
+int ocfs_break_cache_lock_zap_buffers(ocfs_super *osb,
+				      struct inode *inode);
+void ocfs_compute_dlm_stats(int status, int vote_status,
+			    ocfs_dlm_stats *stats);
+int ocfs_release_lock(ocfs_super *osb, __u32 lock_type,
+		      __u32 flags, struct buffer_head *bh,
+		      struct inode *inode);
+int ocfs_update_disk_lock(ocfs_super *osb, __u32 flags,
+			  struct buffer_head **bh, struct inode *inode,
+			  ocfs_journal_handle *handle);
+int ocfs_update_master_on_open(ocfs_super *osb, struct inode *inode,
+			       ocfs_journal_handle *handle);
+int ocfs_wait_for_lock_release(ocfs_super *osb, __u64 offset,
+			       __u32 time_to_wait, __u32 lock_type,
+			       struct inode *inode);
+void ocfs_set_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
+			       ocfs_node_map *vote_map);
+void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
+			       ocfs_node_map *vote_map);
+void ocfs_set_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
+				 ocfs_node_map *open_map);
+void ocfs_get_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
+				 ocfs_node_map *open_map);
+
+#endif /* OCFS2_DLM_H */

Modified: trunk/src/extmap.c
===================================================================
--- trunk/src/extmap.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/extmap.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * extmap.c
  *
  * Creates, adds, coalesces, deletes extent maps
@@ -24,11 +26,17 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "extmap.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_EXTMAP
 
 /* Crazy wacky extent map stuff */
@@ -449,11 +457,11 @@
 
 	if ((ByteCount) && (ByteCount != 0xFFFFFFFF)) {
 		ByteCount--;
-		ByteCount >>= osb->sect_size_bits;
+		ByteCount >>= osb->sb->s_blocksize_bits;
 		ByteCount++;
 	}
 
-	Vbo >>= osb->sect_size_bits;
+	Vbo >>= osb->sb->s_blocksize_bits;
 
 	ocfs_extent_map_remove (Map, Vbo, (__s64) ByteCount);
 
@@ -510,15 +518,15 @@
 	Results = ocfs_extent_map_next_entry (Map, RunIndex, Vbo, Lbo,
 					  &LiSectorCount);
 
-	(*Vbo) <<= osb->sect_size_bits;
-	(*Lbo) <<= osb->sect_size_bits;
+	(*Vbo) <<= osb->sb->s_blocksize_bits;
+	(*Lbo) <<= osb->sb->s_blocksize_bits;
 
 	if (LiSectorCount > 0x00000000ffffffffLL) {
 		/* overflow */
 		Results = 0;
 		*SectorCount = 0xffffffffUL;
 	} else {
-		*SectorCount = ((__u32)(LiSectorCount & 0xFFFFFFFFULL)) << osb->sect_size_bits;
+		*SectorCount = ((__u32)(LiSectorCount & 0xFFFFFFFFULL)) << osb->sb->s_blocksize_bits;
 	}
 		
 	LOG_EXIT_INT (Results);
@@ -542,12 +550,12 @@
 	LOG_ENTRY ();
 
 	/* Sector Align the vbo */
-	remainder = Vbo & (osb->sect_size - 1);
+	remainder = Vbo & (osb->sb->s_blocksize - 1);
 
-	ret = ocfs_extent_map_lookup (Map, (Vbo >> osb->sect_size_bits), &LiLbo,
+	ret = ocfs_extent_map_lookup (Map, (Vbo >> osb->sb->s_blocksize_bits), &LiLbo,
 				   &LiSectorCount, Index);
 	if ((__u32) LiLbo != -1) {
-		*Lbo = (((__s64) LiLbo) << (__s64) osb->sect_size_bits);
+		*Lbo = (((__s64) LiLbo) << (__s64) osb->sb->s_blocksize_bits);
 		if (ret) {
 			*Lbo += remainder;
 		}
@@ -558,7 +566,7 @@
 
 	*SectorCount = LiSectorCount;
 	if (*SectorCount) {
-		*SectorCount <<= (__s64) osb->sect_size_bits;
+		*SectorCount <<= (__s64) osb->sb->s_blocksize_bits;
 		if (*SectorCount == 0) {
 			*SectorCount = (__u32) - 1;
 		}
@@ -567,6 +575,7 @@
 			*SectorCount -= remainder;
 		}
 	}
+	
 
 	LOG_EXIT_INT (ret);
 	return ret;
@@ -576,32 +585,31 @@
 /* ocfs_update_extent_map()
  *
  */
-int ocfs_update_extent_map (ocfs_super * osb, ocfs_extent_map * Map, void *Buffer, __s64 * localVbo, __u64 * remainingLength, ocfs_ext_flag Flag)
+int ocfs_update_extent_map(ocfs_super *osb, ocfs_extent_map *Map,
+			   void *Buffer, __s64 *localVbo,
+			   __u64 *remainingLength, ocfs_ext_flag Flag)
 {
 	int ret = -EFAIL;
-	ocfs_file_entry *FileEntry;
-	ocfs_extent_group *OcfsExtent;
+	ocfs2_dinode *fe;
+	ocfs2_extent_block *eb;
+	ocfs2_extent_list *el;
 	__s64 tempVbo;
-	__s64 tempLbo;
 	__u64 tempSize;
 	int j;
 
 	LOG_ENTRY ();
 
 	if (Flag == LOCAL_EXT) {
-		FileEntry = Buffer;
+		fe = Buffer;
+		el = &fe->id2.i_list;
 
-		OCFS_ASSERT (FileEntry->local_ext);
+		OCFS_ASSERT(el->l_tree_depth < 0);
 
-		for (j = 0; j < FileEntry->next_free_ext; j++) {
-			tempVbo = FileEntry->extents[j].file_off;
-			tempLbo = FileEntry->extents[j].disk_off;
-			tempSize = FileEntry->extents[j].num_bytes;
-
+		for (j = 0; j < el->l_next_free_rec; j++) {
 			/* Add the Extent to extent map list */
-			ret =
-			    ocfs_add_extent_map_entry (osb, Map, tempVbo, tempLbo,
-						   tempSize);
+			ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+								 Map,
+								 &el->l_recs[j]);
 			if (!ret) {
 				LOG_ERROR_STATUS (ret = -ENOMEM);
 				goto bail;
@@ -611,21 +619,18 @@
 	} else {
 		__u64 localLength = 0;
 
-		OcfsExtent = Buffer;
+		eb = Buffer;
+		el = &eb->h_list;
 
-		for (j = 0; j < OcfsExtent->next_free_ext; j++) {
-			if ((__s64) (OcfsExtent->extents[j].file_off +
-				   OcfsExtent->extents[j].num_bytes) >
-			    (*localVbo)) {
-				tempVbo = OcfsExtent->extents[j].file_off;
-				tempLbo = OcfsExtent->extents[j].disk_off;
-				tempSize = OcfsExtent->extents[j].num_bytes;
+		for (j = 0; j < el->l_next_free_rec; j++) {
+			if ((__s64)((u64)(el->l_recs[j].e_cpos + el->l_recs[j].e_clusters) << osb->s_clustersize_bits) > (*localVbo)) {
+				tempVbo = (u64)el->l_recs[j].e_cpos << osb->s_clustersize_bits;
+				tempSize = (u64)el->l_recs[j].e_clusters << osb->s_clustersize_bits;
 
 				/* Add the Extent to extent map list */
-				ret =
-				    ocfs_add_extent_map_entry (osb, Map, tempVbo,
-							   tempLbo,
-							   (__u32) tempSize);
+				ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+								Map,
+								&el->l_recs[j]);
 				if (!ret) {
 					LOG_ERROR_STATUS (ret = -ENOMEM);
 					goto bail;
@@ -647,10 +652,10 @@
 			}
 		}
 
-		if ((OcfsExtent->next_free_ext != OCFS_MAX_DATA_EXTENTS) &&
+		if ((el->l_next_free_rec != el->l_count) &&
 		    (*remainingLength)) {
-			LOG_ERROR_ARGS ("next_free_extent=%d, rem_len=%llu",
-				OcfsExtent->next_free_ext, *remainingLength);
+			LOG_ERROR_ARGS ("l_next_free_rec=%d, rem_len=%llu",
+				el->l_next_free_rec, *remainingLength);
 		} else
 			ret = 0;
 	}
@@ -660,7 +665,29 @@
 	return ret;
 }				/* ocfs_update_extent_map */
 
+
 /*
+ * ocfs_add_extent_map_entry_from_rec()
+ *
+ * Add and entry using the fields of an ocfs2_extent_rec.
+ *
+ * FIXME This should be coalesced with ocfs_add_extent_map_entry
+ * once all callers use this function.
+ */
+int ocfs_add_extent_map_entry_from_rec(struct super_block *sb,
+				       ocfs_extent_map *extmap,
+				       ocfs2_extent_rec *rec)
+{
+	u64 vbo = (u64)rec->e_cpos << OCFS_SB(sb)->s_clustersize_bits;
+	u64 lbo = rec->e_blkno << sb->s_blocksize_bits;
+	u64 bytes =
+		(u64)rec->e_clusters << OCFS_SB(sb)->s_clustersize_bits;
+	return ocfs_add_extent_map_entry(OCFS_SB(sb), extmap,
+					 vbo, lbo, bytes);
+}
+
+
+/*
  * ocfs_add_extent_map_entry()
  *
  * adds an entry to an extent map
@@ -669,6 +696,7 @@
  */
 int ocfs_add_extent_map_entry (ocfs_super * osb, ocfs_extent_map * Map, __s64 Vbo, __s64 Lbo, __u64 ByteCount)
 {
+	/* FIXME take ocfs2_extent_rec */
 	int ret;
 
 	LOG_ENTRY ();
@@ -677,13 +705,13 @@
 	if (ByteCount) {
 		/* Round up sectors */
 		ByteCount--;
-		ByteCount >>= osb->sect_size_bits;
+		ByteCount >>= osb->sb->s_blocksize_bits;
 		ByteCount++;
 	}
 
 	/* Make the ByteOffsets in to Sector numbers. */
-	Vbo >>= osb->sect_size_bits;
-	Lbo >>= osb->sect_size_bits;
+	Vbo >>= osb->sb->s_blocksize_bits;
+	Lbo >>= osb->sb->s_blocksize_bits;
 
 	ret = ocfs_extent_map_add (Map, ((__s64) Vbo), ((__s64) Lbo), ((__s64) ByteCount));
 	if (!ret)

Added: trunk/src/extmap.h
===================================================================
--- trunk/src/extmap.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/extmap.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,51 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * extmap.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_EXTMAP_H
+#define OCFS2_EXTMAP_H
+
+int ocfs_add_extent_map_entry_from_rec(struct super_block *sb,
+				       ocfs_extent_map *extmap,
+				       ocfs2_extent_rec *rec);
+int ocfs_add_extent_map_entry(ocfs_super *osb, ocfs_extent_map *Map,
+			      __s64 Vbo, __s64 Lbo, __u64 ByteCount);
+void ocfs_delete_all_extent_maps(ocfs_super *osb, struct inode *oin);
+void ocfs_extent_map_destroy(ocfs_extent_map *map);
+void ocfs_extent_map_init(ocfs_extent_map *map);
+int ocfs_get_next_extent_map_entry(ocfs_super *osb,
+				   ocfs_extent_map *Map, __u32 RunIndex,
+				   __s64 *Vbo, __s64 *Lbo,
+				   __u32 *SectorCount);
+int ocfs_lookup_extent_map_entry(ocfs_super *osb, ocfs_extent_map *Map,
+				 __s64 Vbo, __s64 *Lbo,
+				 __u64 *SectorCount, __u32 *Index);
+int ocfs_update_extent_map(ocfs_super *osb, ocfs_extent_map *Map,
+			   void *Buffer, __s64 *localVbo,
+			   __u64 *remainingLength, ocfs_ext_flag Flag);
+
+#endif /* OCFS2_EXTMAP_H */

Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/file.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * file.c
  *
  * File open, close, extend, truncate
@@ -24,12 +26,27 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "file.h"
+#include "sysfile.h"
+#include "inode.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_FILE
 
 extern struct semaphore recovery_list_sem;
@@ -38,20 +55,23 @@
 static int ocfs_change_file_attrib (ocfs_super * osb, __u64 file_off, struct iattr *attr, struct inode *inode);
 static int ocfs_truncate_file (ocfs_super * osb, __u64 file_off, __u64 file_size, struct inode *inode);
 
-static void ocfs_fe_set_attributes(ocfs_file_entry *fe, struct iattr *attr)
+
+static void ocfs_fe_set_attributes(ocfs2_dinode *fe, struct iattr *attr)
 {
 	if (attr->ia_valid & ATTR_SIZE)
-		fe->file_size = attr->ia_size;
+		fe->i_size = attr->ia_size;
 	if (attr->ia_valid & ATTR_UID)
-		fe->uid = attr->ia_uid;
+		fe->i_uid = attr->ia_uid;
 	if (attr->ia_valid & ATTR_GID)
-		fe->gid = attr->ia_gid;
+		fe->i_gid = attr->ia_gid;
 	if (attr->ia_valid & ATTR_MODE)
-		fe->prot_bits = attr->ia_mode & 0007777;
+		fe->i_mode = attr->ia_mode;
 	if (attr->ia_valid & ATTR_CTIME)
-		fe->create_time = ocfs_get_seconds(attr->ia_ctime);
+		fe->i_ctime = ocfs_get_seconds(attr->ia_ctime);
+	if (attr->ia_valid & ATTR_ATIME)
+		fe->i_atime = ocfs_get_seconds(attr->ia_atime);
 	if (attr->ia_valid & ATTR_MTIME)
-		fe->modify_time = ocfs_get_seconds(attr->ia_mtime);
+		fe->i_mtime = ocfs_get_seconds(attr->ia_mtime);
 }
 
 static inline int ocfs_sync_inode(struct inode *inode)
@@ -82,7 +102,7 @@
 			   ocfs_journal_handle *handle, struct inode *inode)
 {
 	int status = 0;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	int local_handle = 1;
 
 	LOG_ENTRY ();
@@ -99,9 +119,10 @@
 	}
 
 	/* why do we update these here? */
-	OCFS_I(inode)->alloc_size = fe->alloc_size;
-	OCFS_I(inode)->chng_seq_num = DISK_LOCK_SEQNUM (fe);
-	if (fe->this_sector == 0)
+	OCFS_I(inode)->alloc_size =
+		(u64)fe->i_clusters << osb->s_clustersize_bits;
+	OCFS_I(inode)->chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
+	if (fe->i_blkno == 0)
 		LOG_ERROR_STR ("this_sector=0");
 
 	OCFS_BH_PUT_DATA(fe_bh);
@@ -383,7 +404,6 @@
 	}
 
 bail:
-
 	if (dec)
 		iput(inode);
 
@@ -395,17 +415,16 @@
  * ocfs_inode_fill_ext_map()
  *
  */
-int ocfs_inode_fill_ext_map(ocfs_super * osb, struct buffer_head * fe_bh, struct inode *inode)
+int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
+			    struct inode *inode)
 {
 	int ret = 0;
 	int j;
-	__s64 tempVbo;
-	__s64 tempLbo;
-	__u64 tempSize;
 	__u64 tempoff;
-	ocfs_extent_group *extent = NULL;
-	struct buffer_head *extent_bh = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_extent_block *eb = NULL;
+	ocfs2_extent_list *el;
+	struct buffer_head *eb_bh = NULL;
+	ocfs2_dinode *fe = NULL;
 	void *tmp;
 
 	LOG_ENTRY ();
@@ -413,68 +432,63 @@
 
 	/* get a copy of fe, used readonly in this path and */
 	/* ocfs_create_new_oin will deadlock if fe_bh is locked */
-	fe = ocfs_allocate_file_entry();
+#warning markflar, fix the fe copy
+	fe = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
 	if (fe == NULL) {
 		LOG_ERROR_STATUS(ret = -ENOMEM);
 		goto leave;
 	}
 	tmp = OCFS_BH_GET_DATA_READ(fe_bh);
-	memcpy(fe, tmp, sizeof(ocfs_file_entry));
+	memcpy(fe, tmp, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(fe_bh);
-	tempoff = fe->this_sector;
+	tempoff = fe->i_blkno << osb->sb->s_blocksize_bits;
 
+#warning I think we DO want extent map stuff for dirs now
+#if 0
 	/* we don't want to do the extent map stuff for a directory. */
-	if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+	if (S_ISDIR(fe->i_mode))
 		goto leave;
+#endif
 
-	if (fe->local_ext) {
-		for (j = 0; j < fe->next_free_ext; j++) {
-			tempVbo = fe->extents[j].file_off;
-			tempLbo = fe->extents[j].disk_off;
-			tempSize = fe->extents[j].num_bytes;
-			
+	el = &fe->id2.i_list;
+	if (el->l_tree_depth < 0) {
+		for (j = 0; j < el->l_next_free_rec; j++) {
 			/* Add the Extent to extent map */
-			ret = ocfs_add_extent_map_entry (osb, 
-							 &OCFS_I(inode)->map,
-							  tempVbo, 
-							  tempLbo, 
-							  tempSize);
+			ret = ocfs_add_extent_map_entry_from_rec(osb->sb, 
+					       		&OCFS_I(inode)->map,
+				      			&el->l_recs[j]);
 			if (!ret) {
 				LOG_ERROR_STATUS (ret = -ENOMEM);
 				goto leave;
 			}
 		}
 	} else {
-		__u64 next_data_ext;
+		u64 next_leaf;
 		
 		/* Extents are branched and we are no longer using */
 		/* Local Extents for this File Entry. */
 		
-		ret = ocfs_get_leaf_extent (osb, fe, 0, &extent_bh, inode);
+		ret = ocfs_get_leaf_extent (osb, fe, 0, &eb_bh, inode);
 		if (ret < 0) {
 			LOG_ERROR_STATUS (ret);
 			goto leave;
 		}
 		
 		while (1) {
-			extent = OCFS_BH_GET_DATA_READ(extent_bh);
+			eb = OCFS_BH_GET_DATA_READ(eb_bh);
+			el = &eb->h_list;
 			
-			if (!IS_VALID_EXTENT_DATA (extent)) {
+			if (!IS_VALID_EXTENT_BLOCK(eb) ||
+			    (el->l_tree_depth != -1)) {
 				LOG_ERROR_STATUS(ret = -EFAIL);
 				goto leave;
 			}
 			
-			for (j = 0; j < extent->next_free_ext; j++) {
-				tempVbo = extent->extents[j].file_off;
-				tempLbo = extent->extents[j].disk_off;
-				tempSize = extent->extents[j].num_bytes;
-				
+			for (j = 0; j < el->l_next_free_rec; j++) {
 				/* Add the Extent to extent map */
-				ret = ocfs_add_extent_map_entry (osb,
-								 &OCFS_I(inode)->map,
-								 tempVbo,
-								 tempLbo,
-								 tempSize);
+				ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+						       		&OCFS_I(inode)->map,
+								&el->l_recs[j]);
 				if (!ret) {
 					LOG_ERROR_STATUS (ret =
 							  -ENOMEM);
@@ -482,19 +496,19 @@
 				}
 			}
 			
-			if (extent->next_data_ext > 0) {
-				if (!extent->next_data_ext) {
+			if (eb->h_next_leaf_blk > 0) {
+				if (!eb->h_next_leaf_blk) {
 					LOG_ERROR_STATUS (ret = -EFAIL);
 					goto leave;
 				}
-				next_data_ext = extent->next_data_ext;
-				OCFS_BH_PUT_DATA(extent_bh);
-				extent = NULL;
-				extent_bh = NULL;
+				next_leaf = eb->h_next_leaf_blk;
+				OCFS_BH_PUT_DATA(eb_bh);
+				eb = NULL;
+				eb_bh = NULL;
 
 				ret = ocfs_read_bh(osb, 
-						   next_data_ext, 
-						   &extent_bh, 
+						   next_leaf << osb->sb->s_blocksize_bits, 
+						   &eb_bh, 
 						   OCFS_BH_COND_CACHED, inode);
 				if (ret < 0) {
 					LOG_ERROR_STATUS(ret);
@@ -508,12 +522,12 @@
 leave:
 	/* this fe was a copy */
 	if (fe)
-		ocfs_release_file_entry (fe);
+		kfree(fe);
 
-	if (extent_bh) {
-		if (extent)
-			OCFS_BH_PUT_DATA(extent_bh);
-		brelse(extent_bh);
+	if (eb_bh) {
+		if (eb)
+			OCFS_BH_PUT_DATA(eb_bh);
+		brelse(eb_bh);
 	}
 
 	LOG_EXIT_STATUS (ret);
@@ -569,12 +583,11 @@
 {
 	int status = 0;
 	int tmpstat;
-	ocfs_file_entry *fileEntry = NULL;
+	ocfs2_dinode *fileEntry = NULL;
 	int have_disk_lock = 0;
 	__u32 lockFlags = 0, locktype = 0;
 	struct buffer_head *bh = NULL;
 	ocfs_journal_handle *handle = NULL;
-	int flags = 0;
 
 	LOG_ENTRY ();
 
@@ -593,8 +606,8 @@
 		goto leave;
 	}
 
-	if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
-	    (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num))
+	if ((DISK_LOCK(fileEntry)->dl_level == OCFS_DLM_ENABLE_CACHE_LOCK) &&
+	    (DISK_LOCK(fileEntry)->dl_master == osb->node_num))
 		locktype = OCFS_DLM_ENABLE_CACHE_LOCK;
 	else
 		locktype = OCFS_DLM_EXCLUSIVE_LOCK;
@@ -630,15 +643,14 @@
 
 	fileEntry = OCFS_BH_GET_DATA_WRITE(bh);
 
-	fileEntry->modify_time = OCFS_CURRENT_TIME;
+	fileEntry->i_mtime = OCFS_CURRENT_TIME;
 
-	DISK_LOCK_SEQNUM (fileEntry) = 0;
+	DISK_LOCK(fileEntry)->dl_seq_num = 0;
 	ocfs_fe_set_attributes(fileEntry, attr);
 
 	/* Set the valid bit here */
-	SET_VALID_BIT (fileEntry->sync_flags);
+	fileEntry->i_flags |= OCFS2_VALID_FL;
 
-	flags = OCFS_FE_CACHE_FLAGS(osb, fileEntry);
 	OCFS_BH_PUT_DATA(bh);
 
 	status = ocfs_journal_dirty(handle, bh);
@@ -795,7 +807,8 @@
 		    ("Will need more allocation: have=%llu, need=%llu\n",
 		     OCFS_I(inode)->alloc_size, newsize);
 		down_write(&OCFS_I(inode)->ip_io_sem);
-		status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode, NULL);
+		status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode, 
+					   NULL, 0, NULL);
 		up_write(&OCFS_I(inode)->ip_io_sem);
 		if (status < 0) {
 			if (status != -EINTR && status != -ENOSPC) {
@@ -827,7 +840,7 @@
 		LOG_TRACE_STR
 		    ("Generic_file_write ok, asking for OIN update now");
 		inode->i_size = newsize;
-		inode->i_blocks = (newsize + sb->s_blocksize) >> sb->s_blocksize_bits;
+		inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 		up(&osb->extend_sem);
 	}
 
@@ -918,12 +931,11 @@
 static int ocfs_truncate_file (ocfs_super * osb, __u64 file_off, __u64 file_size, struct inode *inode)
 {
 	int status = 0, tmpstat;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	__u32 lockFlags = 0, locktype = 0;
 	int have_disk_lock = 0;
 	__u64 new_alloc_size;
 	struct buffer_head *bh = NULL;
-	int flags = 0;
 	ocfs_journal_handle *handle = NULL;
 
 	LOG_ENTRY_ARGS ("(file_off = %llu, file_size = %llu\n", 
@@ -957,8 +969,8 @@
 
 	lockFlags = FLAG_FILE_TRUNCATE;
 
-	if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK)
-	    && (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num))
+	if ((DISK_LOCK(fe)->dl_level == OCFS_DLM_ENABLE_CACHE_LOCK)
+	    && (DISK_LOCK(fe)->dl_master == osb->node_num))
 		locktype = OCFS_DLM_ENABLE_CACHE_LOCK;
 	else 
 		locktype = OCFS_DLM_EXCLUSIVE_LOCK;
@@ -990,9 +1002,9 @@
 	 * waiting on the lock). make sure the size is still a valid
 	 * one. This really ought to check for other things too, like
 	 * a valid bit, etc. */
-	if (file_size > fe->file_size) {
+	if (file_size > fe->i_size) {
 		LOG_TRACE_ARGS("asked to truncate file with size (%llu) "
-			       "to size (%llu)!\n", fe->file_size, 
+			       "to size (%llu)!\n", fe->i_size, 
 			       file_size);
 		OCFS_BH_PUT_DATA(bh);
 		up(&OCFS_I(inode)->priv_sem);
@@ -1001,8 +1013,8 @@
 		goto leave;
 	}
 
-	fe->file_size = file_size;
-	fe->alloc_size = new_alloc_size;
+	fe->i_size = file_size;
+	fe->i_clusters = (u32)(new_alloc_size >> osb->s_clustersize_bits);
 
 	status = ocfs_free_extents_for_truncate (osb, fe, handle, inode);
 	if (status < 0) {
@@ -1012,11 +1024,10 @@
 		goto leave;
 	}
 
-	DISK_LOCK_SEQNUM (fe) = 0;
-	SET_VALID_BIT (fe->sync_flags);
-	fe->modify_time = OCFS_CURRENT_TIME;
+	DISK_LOCK(fe)->dl_seq_num = 0;
+	fe->i_flags |= OCFS2_VALID_FL;
+	fe->i_mtime = OCFS_CURRENT_TIME;
 	
-	flags = OCFS_FE_CACHE_FLAGS(osb, fe);
 	OCFS_BH_PUT_DATA(bh);
 
 	status = ocfs_journal_dirty(handle, bh);
@@ -1060,14 +1071,17 @@
 /* ocfs_extend_file()
  *
  */
-int ocfs_extend_file (ocfs_super * osb, __u64 file_size, __u64 file_off, ocfs_journal_handle *passed_handle, struct inode *inode, struct iattr *attr)
+int ocfs_extend_file(ocfs_super *osb, __u64 file_size, __u64 file_off,
+		     ocfs_journal_handle *passed_handle,
+		     struct inode *inode, struct iattr *attr,
+		     int system_file, struct buffer_head *fe_bh)
 {
 	int status = 0;
 	int tmpstat;
-	ocfs_file_entry *fileEntry = NULL;
+	ocfs2_dinode *fe = NULL;
 	__u64 tempOffset = 0;
-	__u64 allocSize = 0;
-	__u32 size;
+	__u64 current_alloc;
+	__u64 alloc_size = 0;
 	__u64 bitmapOffset = 0;
 	__u64 numClustersAlloc = 0;
 	__u32 lockFlags = 0;
@@ -1076,12 +1090,12 @@
 	__u64 actualDiskOffset = 0;
 	__u64 actualLength = 0;
 	struct buffer_head *bh = NULL;
-	int flags = 0;
 	ocfs_journal_handle *handle = NULL;
 	int credits;
 	struct inode *ext_alloc_inode = NULL;
 
-	LOG_ENTRY ();
+	LOG_ENTRY_ARGS("(off=%llu, file_size=%llu, system=%s)\n",
+		       file_off, file_size, system_file?"yes":"no");
 
 	if (!inode)
 		BUG();
@@ -1089,29 +1103,36 @@
 	if (file_size == 0)
 		goto leave;
 
-	status = ocfs_read_bh (osb, file_off, &bh, OCFS_BH_CACHED, inode);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
+	if (fe_bh)
+		bh = fe_bh;
+	else {
+		status = ocfs_read_bh (osb, file_off, &bh, OCFS_BH_CACHED, inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			goto leave;
+		}
 	}
 
-	fileEntry = OCFS_BH_GET_DATA_READ(bh);
+	fe = OCFS_BH_GET_DATA_READ(bh);
 
-	if (!IS_VALID_FILE_ENTRY(fileEntry)) {
-		printk("fe->signature=%8s\n", fileEntry->signature);
+	if (!IS_VALID_FILE_ENTRY(fe)) {
+		printk("fe->signature=%8s\n", fe->i_signature);
 		LOG_ERROR_ARGS ("Invalid fe at offset %llu", file_off);
 		status = -EFAIL;
 		OCFS_BH_PUT_DATA(bh);
 		goto leave;
 	}
 
-	allocSize = file_size - fileEntry->alloc_size;
+	current_alloc = (u64)fe->i_clusters << osb->s_clustersize_bits;
+	alloc_size = file_size - current_alloc;
 	OCFS_BH_PUT_DATA(bh);
-	fileEntry = NULL;
+	fe = NULL;
 
+	LOG_TRACE_ARGS("current_alloc=%llu, alloc_size=%llu\n",
+		       current_alloc, alloc_size);
 	if (passed_handle == NULL) {
-		credits = ocfs_calc_extend_credits(((__u32) allocSize), 
-						   osb->vol_layout.cluster_size);
+		credits = ocfs_calc_extend_credits(osb->sb,
+						   (__u32) alloc_size); 
 
 		/* cannot call start_trans with a locked buffer head. */
 		handle = ocfs_start_trans(osb, credits);
@@ -1126,7 +1147,7 @@
 
 		/* Grab a lock on the entry found if we have more than
 		 * 1 extents and also make this node the master*/
-		fileEntry = OCFS_BH_GET_DATA_READ(bh);
+		fe = OCFS_BH_GET_DATA_READ(bh);
 
 		/* now we always take an EXTEND lock */
 		lockFlags = FLAG_FILE_EXTEND;
@@ -1152,37 +1173,40 @@
 		goto leave;
 	}
 
-	fileEntry = OCFS_BH_GET_DATA_WRITE(bh);
+	fe = OCFS_BH_GET_DATA_WRITE(bh);
 
-	if (file_size > (__s64) fileEntry->alloc_size) {
-		allocSize = file_size - fileEntry->alloc_size;
+	if (file_size > (__s64)current_alloc) {
+		alloc_size = file_size - current_alloc;
 
 		/* TODO: We can add something here so that after 2-3 allocations, */
-		/* we give a lot more disk space to the file than the allocSize so */
+		/* we give a lot more disk space to the file than the alloc_size so */
 		/* in order to try to use the Extents of File Entry only and ofcourse */
 		/* the file will have more contigous disk space. */
-		{
+		if (!system_file) {
 			int one_percentish_bits = 7;
-			__u64 tempSize = fileEntry->alloc_size;
+			__u64 tempSize = current_alloc;
 
 			if (tempSize > ONE_MEGA_BYTE)
 				tempSize = ONE_MEGA_BYTE;
-			allocSize += (tempSize * 2);
+			alloc_size += (tempSize * 2);
 			
-			if (allocSize < fileEntry->alloc_size >> one_percentish_bits) {
-				allocSize = fileEntry->alloc_size >> one_percentish_bits;
-				tempSize = allocSize;
+			if (alloc_size <
+			    (current_alloc >> one_percentish_bits)) {
+				alloc_size = current_alloc >> one_percentish_bits;
+				tempSize = alloc_size;
 				// avoid using 64 bit mod
 				while (tempSize > (10*ONE_MEGA_BYTE))
 					tempSize -= (10*ONE_MEGA_BYTE);
 				tempSize = (10*ONE_MEGA_BYTE) - tempSize;
-				allocSize += tempSize;					
+				alloc_size += tempSize;					
 			}
 
 		}
 
-		status = ocfs_find_space(osb, allocSize, &bitmapOffset,
-					 &numClustersAlloc, 0, handle);
+		status = ocfs_find_space(osb, alloc_size, &bitmapOffset,
+					 &numClustersAlloc, system_file, handle);
+		LOG_TRACE_ARGS("find_space: alloc_size=%llu, returned off=%llu, num=%llu\n",
+			       alloc_size, bitmapOffset, numClustersAlloc);
 		if (status < 0) {
 			OCFS_BH_PUT_DATA(bh);
 			if (status != -ENOSPC && status != -EINTR)
@@ -1190,30 +1214,67 @@
 			goto leave;
 		}
 
-		actualDiskOffset =
-		    (bitmapOffset << osb->cluster_size_bits) +
-		    osb->vol_layout.data_start_off;
-		actualLength = numClustersAlloc << osb->cluster_size_bits;
+		actualDiskOffset = bitmapOffset << osb->s_clustersize_bits;
+		actualLength = numClustersAlloc << osb->s_clustersize_bits;
 
 		OCFS_BH_PUT_DATA(bh);
-		fileEntry = NULL;
+		fe = NULL;
 
-		{
+		if (system_file) {
+			struct buffer_head **bhs = NULL;
+			int numbhs = actualLength >> osb->sb->s_blocksize_bits;
+			int i;
+			char *data;
+			
+			bhs = kmalloc(numbhs*sizeof(struct buffer_head *), GFP_KERNEL);
+			if (!bhs) {
+				status = -ENOMEM;
+				LOG_ERROR_STATUS(status);
+				goto leave;
+			}
+			memset(bhs, 0, numbhs * sizeof(struct buffer_head *));
+			
+			status = ocfs_read_bhs(osb, actualDiskOffset,
+						actualLength, bhs, 0, NULL);
+			if (status < 0) {
+				kfree(bhs);
+				LOG_ERROR_STATUS(status);
+				goto leave;
+			}
+			
+			for(i = 0; i < numbhs; i++) {
+				data = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+				memset(data, 0, osb->sb->s_blocksize);
+				OCFS_BH_PUT_DATA(bhs[i]);
+			}
+			
+			status = ocfs_write_bhs(osb, bhs, numbhs, 0, NULL);
+			for(i = 0; i < numbhs; i++)
+				brelse(bhs[i]);
+			kfree(bhs);
+			if (status < 0) {
+				LOG_ERROR_STATUS(status);
+				goto leave;
+			}
+		} else {
 			struct buffer_head *alloc_bh;
 			unsigned long block;
 			struct super_block *sb = osb->sb;
 
-			for (block = actualDiskOffset >> osb->sect_size_bits; 
-			     block < (actualDiskOffset+actualLength) >> osb->sect_size_bits;
+			for (block = (unsigned long)(actualDiskOffset >> sb->s_blocksize_bits); 
+			     block < (unsigned long)((actualDiskOffset+actualLength) >> sb->s_blocksize_bits);
 			     block++) {
+				alloc_bh = sb_getblk(sb, block);
+				if (!alloc_bh) {
+					LOG_ERROR_STATUS(status=-EIO);
+					goto leave;
+				}
 				LOG_TRACE_ARGS("setting block %lu as new!\n", block);
-				alloc_bh = getblk(OCFS_GET_BLOCKDEV(sb), block, sb->s_blocksize);
 				alloc_bh->b_state |= (1UL << BH_New);
 				brelse(alloc_bh);
 			}
 		}
-
-		ext_alloc_inode = igrab(osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE]);
+		ext_alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
 		if (!ext_alloc_inode) {
 			status = -EFAIL;
 			LOG_ERROR_STATUS(status);
@@ -1221,40 +1282,53 @@
 		}
 
 		ocfs_handle_add_inode(handle, ext_alloc_inode);
-		status = ocfs_allocate_extent (osb, bh, handle,
-					actualDiskOffset, actualLength, inode);
+		status = ocfs_allocate_extent(osb, bh, handle,
+					      actualDiskOffset >> osb->sb->s_blocksize_bits,
+					      actualLength >> osb->s_clustersize_bits,
+					      inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		fileEntry = OCFS_BH_GET_DATA_WRITE(bh);
+		fe = OCFS_BH_GET_DATA_WRITE(bh);
 
 		/* update the total allocation size here */
-		fileEntry->alloc_size += actualLength;
+		fe->i_clusters += 
+			(u32)(actualLength >> osb->s_clustersize_bits);
 
 		down (&(OCFS_I(inode)->priv_sem));
-		OCFS_I(inode)->alloc_size = fileEntry->alloc_size;
+		OCFS_I(inode)->alloc_size =
+			(u64)fe->i_clusters << osb->s_clustersize_bits;
 		up (&(OCFS_I(inode)->priv_sem));
 
 		/* no need to do OCFS_SECTOR_ALIGN once the allocation size is correct. */
-		DISK_LOCK_SEQNUM (fileEntry) = 0;
+		DISK_LOCK(fe)->dl_seq_num = 0;
 	}
 
 	/* Update tha file size and add the new one to old one. */
-	fileEntry->file_size = file_size;
-	LOG_TRACE_ARGS("fileEntry->alloc_size = %llu\n", fileEntry->alloc_size);
+	fe->i_size = file_size;
+	LOG_TRACE_ARGS("fe: i_clusters = %u, i_size=%llu\n", 
+		       fe->i_clusters, fe->i_size);
 
+	/* NOTE: this is a bit of a hack; unlike regular files, 
+	 * system files do not have another opportunity to update
+	 * the inode/i_private fields */
+	if (system_file) {
+		OCFS_I(inode)->alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
+		inode->i_size = fe->i_size;
+		inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+	}
+	LOG_TRACE_ARGS("inode: alloc_size=%llu, i_size=%llu\n",
+		       OCFS_I(inode)->alloc_size, inode->i_size);
+
 	if (attr)
-		ocfs_fe_set_attributes(fileEntry, attr);
+		ocfs_fe_set_attributes(fe, attr);
 	/* Set the Valid bit and reset the change bit here... TODO */
-	SET_VALID_BIT (fileEntry->sync_flags);
-	fileEntry->modify_time = OCFS_CURRENT_TIME;
+	fe->i_flags |= OCFS2_VALID_FL;
+	fe->i_mtime = OCFS_CURRENT_TIME;
 
-	tempOffset = fileEntry->this_sector;
-	size = osb->sect_size;
+	tempOffset = fe->i_blkno << osb->sb->s_blocksize_bits;
 
-	flags = OCFS_FE_CACHE_FLAGS(osb, fileEntry);
-
 	OCFS_BH_PUT_DATA(bh);
 
 	status = ocfs_journal_dirty(handle, bh);
@@ -1292,7 +1366,7 @@
 	if (bh != NULL)
 		LOG_TRACE_ARGS("bh->b_count = %d\n", 
 			       atomic_read(&(bh->b_count)));
-	if (bh != NULL)
+	if (bh != NULL && fe_bh == NULL)
 		brelse(bh);
 
 	if (ext_alloc_inode)
@@ -1389,7 +1463,7 @@
 						    inode);
 		else {
 			status = ocfs_extend_file(osb, newsize, fileOff, NULL, 
-						  inode, attr);
+						  inode, attr, 0, NULL);
 			extended = 1;
 		}
 		if (status < 0) {
@@ -1404,7 +1478,7 @@
 			ocfs_delete_all_extent_maps(osb, inode);
 		}
 		inode->i_size = newsize;
-		inode->i_blocks = (newsize + sb->s_blocksize) >> sb->s_blocksize_bits;
+		inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 		up (&(OCFS_I(inode)->priv_sem));
 	}
 
@@ -1451,7 +1525,7 @@
 	generic_fillattr(inode, stat);
 
 	/* We set the blksize from the cluster size for performance */
-	stat->blksize = osb->vol_layout.cluster_size;
+	stat->blksize = osb->s_clustersize;
 
 bail:
 	LOG_EXIT_INT (err);

Added: trunk/src/file.h
===================================================================
--- trunk/src/file.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/file.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,57 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * file.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_FILE_H
+#define OCFS2_FILE_H
+
+int ocfs_extend_file (ocfs_super * osb, __u64 file_size, __u64 file_off, 
+		      ocfs_journal_handle *passed_handle, 
+		      struct inode *inode, struct iattr *attr, 
+		      int system_file, struct buffer_head *fe_bh);
+int ocfs_file_open(struct inode *inode, struct file *file);
+ssize_t ocfs_file_read(struct file *filp, char *buf, size_t count,
+		       loff_t * ppos);
+int ocfs_file_release(struct inode *inode, struct file *file);
+ssize_t ocfs_file_write(struct file *filp, const char *buf,
+			size_t count, loff_t *ppos);
+int ocfs_flush(struct file *file);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+int ocfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat);
+#else
+int ocfs_getattr(struct dentry *dentry, struct iattr *attr);
+#endif
+int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
+			    struct inode *inode);
+int ocfs_inode_notify_open(ocfs_super *osb, struct buffer_head *fe_bh, 
+			   ocfs_journal_handle *handle,
+			   struct inode *inode);
+int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
+int ocfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
+
+#endif /* OCFS2_FILE_H */

Modified: trunk/src/heartbeat.c
===================================================================
--- trunk/src/heartbeat.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/heartbeat.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * heartbeat.c
  *
  * Keeps track of alive nodes in the cluster.
@@ -24,18 +26,24 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
-#if !defined(USERSPACE_TOOL)
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
 
-#if defined(USERSPACE_TOOL)
-#include <format.h>
-#endif
+#include "alloc.h"
+#include "heartbeat.h"
+#include "util.h"
+#include "vote.h"
 
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_HEARTBEAT
 
@@ -57,8 +65,8 @@
 	int status = 0;
 	ocfs_publish *publish = NULL;
 	int publish_idx = OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num;
-	struct buffer_head **pub_bh = &osb->cfg_bhs[publish_idx];
-	__u64 node_publ_off = osb->vol_layout.publ_sect_off + (osb->node_num * osb->sect_size);
+	struct buffer_head **pub_bh = &osb->autoconfig_bhs[publish_idx];
+	__u64 node_publ_off = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
 
 	LOG_ENTRY_ARGS ("(0x%p, %u, %s)\n", osb, flag,
 			read_publish ? "true" : "false");
@@ -81,8 +89,10 @@
 			publish->dirty = 0; 
 		}
 		/* Write the current time in local node's publish sector */
-		publish->time = ocfs_get_publish_time();
-		publish->hbm[osb->node_num] = osb->hbm;
+		publish->time = jiffies;
+		/* Dissallow 0 */
+		if (!publish->time)
+			publish->time = 1;
 		spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
 		publish->comm_seq_num = OcfsGlobalCtxt.comm_seq_num;
 		spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
@@ -129,8 +139,8 @@
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u)\n", osb, bhs, first_time);
 
-	num_nodes = OCFS_MAXIMUM_NODES;
-	node_map = &(osb->vol_node_map);
+	num_nodes = osb->max_nodes;
+	node_map = osb->vol_node_map;
 
 	/* First time thru, update buffer with timestamps for all nodes */
 	if (first_time) {
@@ -145,17 +155,15 @@
 		/* Refresh local buffers */
 		for (i = 0;  i < num_nodes; i++) {
 			publish = OCFS_BH_GET_DATA_READ(bhs[i]);
-			node_map->time[i] = publish->time;
+			node_map[i].time = publish->time;
 			if (publish->mounted && i != osb->node_num) {
 				printk("ocfs2: Adding %s (node %d) to "
 				       "clustered device (%u,%u)\n",
 				       osb->node_cfg_info[i]->node_name, i,
 				       MAJOR(osb->sb->s_dev), 
 				       MINOR(osb->sb->s_dev));
-				node_map->miss_cnt[i] = 0;
-				UPDATE_PUBLISH_MAP(osb->publ_map, i, 
-						   OCFS_PUBLISH_SET, 
-						   num_nodes);
+				node_map[i].miss_cnt = 0;
+				ocfs_publish_map_set(&osb->publ_map, i);
 			}
 			OCFS_BH_PUT_DATA(bhs[i]);
 		}
@@ -173,18 +181,16 @@
 
 		/* Check if the node is hung or not by comparing the disk */
 		/* and memory timestamp values */
-		if (node_map->time[i] == publish->time) {
-			if (IS_NODE_ALIVE(osb->publ_map, i, num_nodes)) {
-				if (atomic_read (&(node_map->dismount[i]))) {
-					node_map->miss_cnt[i] = MISS_COUNT_VALUE;
-					atomic_set (&(node_map->dismount[i]), 0);
-					UPDATE_PUBLISH_MAP (osb->publ_map, i,
-							    OCFS_PUBLISH_CLEAR,
-							    num_nodes);
+		if (node_map[i].time == publish->time) {
+			if (ocfs_node_is_alive(&osb->publ_map, i)) {
+				if (atomic_read (&(node_map[i].dismount))) {
+					node_map[i].miss_cnt = MISS_COUNT_VALUE;
+					atomic_set (&(node_map[i].dismount), 0);
+					ocfs_publish_map_clear(&osb->publ_map, i);
 				} else
-					(node_map->miss_cnt[i])++;
+					(node_map[i].miss_cnt)++;
 
-				if (node_map->miss_cnt[i] > MISS_COUNT_VALUE) {
+				if (node_map[i].miss_cnt > MISS_COUNT_VALUE) {
 #if !defined(USERSPACE_TOOL)
 					printk ("ocfs2: Removing %s (node %d) "
 						"from clustered device (%u,%u)\n",
@@ -193,9 +199,8 @@
 						MINOR(osb->sb->s_dev));
 #endif
 
-					SET_NODE_IN_RECOVERY(osb, i);
-					UPDATE_PUBLISH_MAP (osb->publ_map, i,
-					    OCFS_PUBLISH_CLEAR, num_nodes);
+					ocfs_recovery_map_set(osb, i);
+					ocfs_publish_map_clear(&osb->publ_map, i);
 
 					/* Ok, we'd better recover him now...*/
 					ocfs_recovery_thread(osb, i);
@@ -203,34 +208,16 @@
 			}
 		} else {
 #if !defined(USERSPACE_TOOL)
-			if (!IS_NODE_ALIVE(osb->publ_map, i, num_nodes) && osb->node_num != i)
+			if (!ocfs_node_is_alive(&osb->publ_map, i) &&
+			    (osb->node_num != i))
 				printk ("ocfs2: Adding %s (node %d) to clustered device (%u,%u)\n",
 					osb->node_cfg_info[i]->node_name, i,
 					MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 #endif
-			node_map->miss_cnt[i] = 0;
-			node_map->time[i] = publish->time;
-			UPDATE_PUBLISH_MAP (osb->publ_map, i, OCFS_PUBLISH_SET,
-					    num_nodes);
+			node_map[i].miss_cnt = 0;
+			node_map[i].time = publish->time;
+			ocfs_publish_map_set(&osb->publ_map, i);
 
-#if !defined(USERSPACE_TOOL)
-			/* Update the multiple the other node wants us to beat */
-			if ((publish->hbm[osb->node_num] != DISK_HBEAT_INVALID)
-			    && (osb->hbm > publish->hbm[osb->node_num])) {
-				/* Go to the lowest multiplier any of the nodes */
-				/* alive want us to heartbeat with. */
-				osb->hbm = publish->hbm[osb->node_num];
-
-				if (osb->hbm == 0)
-					osb->hbm = DISK_HBEAT_NO_COMM;
-
-				if (OcfsGlobalCtxt.hbm > osb->hbm)
-						OcfsGlobalCtxt.hbm = osb->hbm;
-
-				if (OcfsGlobalCtxt.hbm == 0)
-					OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM;
-			}
-#endif
 		}
 		OCFS_BH_PUT_DATA(bhs[i]);
 	}

Added: trunk/src/heartbeat.h
===================================================================
--- trunk/src/heartbeat.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/heartbeat.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * heartbeat.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_HEARTBEAT_H
+#define OCFS2_HEARTBEAT_H
+
+int ocfs_nm_heart_beat(ocfs_super *osb, __u32 flag, int read_publish);
+void ocfs_update_publish_map(ocfs_super *osb, struct buffer_head *bhs[],
+			     int first_time);
+
+#endif /* OCFS2_HEARTBEAT_H */

Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/inode.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * inode.c
  *
  * vfs' aops, fops, dops and iops
@@ -24,14 +26,49 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include <asm/byteorder.h>
 
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "dir.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "file.h"
+#include "inode.h"
+#include "ioctl.h"
+#include "lockres.h"
+#include "namei.h"
+#include "super.h"
+#include "symlink.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_INODE
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+# include <linux/iobuf.h>
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)
+#  define free_kiovec_sz(nr, buf, bh)     free_kiovec(nr, buf)
+#  define alloc_kiovec_sz(nr, buf, bh)    alloc_kiovec(nr, buf)
+# endif
+#endif /* for 2.6 - no more kiovec, kiobuf structures - vfs handles
+	* this for us (direct i/o) */
+
+
 extern struct semaphore recovery_list_sem;
 
 typedef struct _ocfs_find_inode_args
@@ -167,34 +204,6 @@
 
 	LOG_ENTRY_ARGS("(feoff = %llu)\n", feoff);
 
-	/* Shortcut: if they ask for the root dirnode, just return
-	 * it. */
-	if (feoff == OCFS_ROOT_INODE_FE_OFF(osb)) {
-		LOG_TRACE_ARGS("Asked for root dirnode (%llu)\n",
-			       feoff);
-
-		if (osb->root_inode) {
-			inode = osb->root_inode;
-
-			/* should we iget it or not? i suppose if you're in
-			 * here and you've asked for the root inode you don't
-			 * know what it is and will prolly iput it later... */
-			if (inode) {
-				if (ocfs_inc_icount(inode) < 0)
-					BUG();
-			}
-			goto bail;
-		}
-		/* If we fall through here, then it's mount time! */
-	}
-
-	/* This shouldn't happen anymore. */
-	if (osb->vol_layout.root_start_off == 0) {
-		LOG_ERROR_ARGS("root_start_off = 0! Skipping inode create for "
-		       "%llu\n", feoff);
-		goto bail;
-	}
-
 	/* Ok. By now we've either got the offsets passed to us by the
 	 * caller, or we just pulled them off the bh. Lets do some
 	 * sanity checks to make sure they're OK. */
@@ -203,10 +212,6 @@
 		goto bail;
 	}
 
-	/* try to detect whether this is a system file. */
-	if (feoff < osb->vol_layout.root_start_off)
-		flags |= OCFS_FIND_INODE_FLAG_SYSFILE;
-
 	args.feoff = feoff;
 	args.flags = flags;
 	args.ino = ino_from_off(sb, feoff);
@@ -297,7 +302,7 @@
  */
 int ocfs_inode_init_private(struct inode *inode)
 {
-	ocfs_inode_private *i = NULL;
+	ocfs_inode_private *i;
 
 	if (!inode)
 		BUG();
@@ -337,18 +342,19 @@
  * ocfs_populate_inode()
  *
  */
-void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, int create_ino)
+void ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,
+		     	 int create_ino)
 {
 	struct super_block *sb;
 	ocfs_super *osb;
-	__u64 offset;
 
-	LOG_ENTRY_ARGS ("(0x%p, %u, size:%llu)\n", inode, mode, fe->file_size);
+	LOG_ENTRY_ARGS ("(0x%p, size:%llu)\n", inode, fe->i_size);
 
 	sb = inode->i_sb;
 	osb = OCFS_SB(sb);
-	offset = fe->this_sector;
 
+	// this means that read_inode cannot create a superblock 
+	// inode today.  change if needed.
 	if (!IS_VALID_FILE_ENTRY(fe)) {
 		printk("ocfs2: invalid file entry!\n");
 		BUG();
@@ -362,65 +368,89 @@
 	}
 
 	inode->i_version = 1;
-	inode->i_rdev = MKDEV (fe->dev_major, fe->dev_minor);
-	inode->i_mode = mode;
-	inode->i_uid = fe->uid;
-	inode->i_gid = fe->gid;
-	inode->i_blksize = (__u32) osb->vol_layout.cluster_size;	// sb->s_blocksize;
-	inode->i_blocks = (fe->file_size + sb->s_blocksize) >> sb->s_blocksize_bits;
+	inode->i_generation = le32_to_cpu(fe->i_generation);
+	inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+	inode->i_mode = fe->i_mode;
+	inode->i_uid = fe->i_uid;
+	inode->i_gid = fe->i_gid;
+	inode->i_blksize = (u32)osb->s_clustersize;	// sb->s_blocksize;
+	inode->i_blocks = (fe->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 	inode->i_mapping->a_ops = &ocfs_aops;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 	inode->i_attr_flags |= ATTR_FLAG_NOATIME;
 #endif
 	inode->i_flags |= S_NOATIME;
-	OCFS_SET_INODE_TIME(inode, i_atime, fe->modify_time);
-	OCFS_SET_INODE_TIME(inode, i_mtime, fe->modify_time);
-	OCFS_SET_INODE_TIME(inode, i_ctime, fe->create_time);
+	OCFS_SET_INODE_TIME(inode, i_atime, fe->i_atime);
+	OCFS_SET_INODE_TIME(inode, i_mtime, fe->i_mtime);
+	OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
 
-	SET_INODE_FEOFF(inode, offset);
+	SET_INODE_FEOFF(inode,
+			fe->i_blkno << osb->sb->s_blocksize_bits);
 
-	OCFS_I(inode)->alloc_size = fe->alloc_size;
+	OCFS_I(inode)->alloc_size =
+		(u64)fe->i_clusters << osb->s_clustersize_bits;
 	OCFS_I(inode)->inode = inode;
-	OCFS_I(inode)->chng_seq_num = DISK_LOCK_SEQNUM (fe);
-	OCFS_I(inode)->u.fe_private = fe->u.fe_private;
+	OCFS_I(inode)->chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
+	OCFS_I(inode)->u.fe_private = fe->id1.i_pad1;
 
-	if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+	if (S_ISDIR(fe->i_mode))
 		OCFS_I(inode)->oin_flags |= OCFS_OIN_DIRECTORY;
 
 	if (create_ino)
-		inode->i_ino = ino_from_off(inode->i_sb, offset);
+		inode->i_ino =
+			ino_from_off(inode->i_sb,
+				     fe->i_blkno << osb->sb->s_blocksize_bits);
 
-	LOG_TRACE_ARGS("offset = %llu, ino = %lu, create_ino = %s\n",
-		       offset, inode->i_ino,
+	LOG_TRACE_ARGS("blkno = %llu, ino = %lu, create_ino = %s\n",
+		       fe->i_blkno, inode->i_ino,
 		       create_ino ? "true" : "false");
 
-	inode->i_nlink = fe->link_cnt;
+	inode->i_nlink = fe->i_links_count;
+
+	if (le32_to_cpu(fe->i_flags) & OCFS2_LOCAL_ALLOC_FL) {
+		LOG_TRACE_ARGS("local alloc inode: i_ino=%lu\n", inode->i_ino);
+	} else if (le32_to_cpu(fe->i_flags) & OCFS2_BITMAP_FL) {
+		OCFS_I(inode)->u.ip_bitinfo.used_bits = 
+			le32_to_cpu(fe->id1.bitmap1.i_used);
+		OCFS_I(inode)->u.ip_bitinfo.total_bits = 
+			le32_to_cpu(fe->id1.bitmap1.i_total);
+		LOG_TRACE_ARGS("bitmap inode: i_ino=%lu, used=%u, total=%u\n",
+			       inode->i_ino, OCFS_I(inode)->u.ip_bitinfo.used_bits,
+			       OCFS_I(inode)->u.ip_bitinfo.total_bits);
+	} else if (le32_to_cpu(fe->i_flags) & OCFS2_SUPER_BLOCK_FL) {
+		LOG_TRACE_ARGS("superblock inode: i_ino=%lu\n", inode->i_ino);
+		// we can't actually hit this as read_inode can't handle
+		// superblocks today ;-)
+		BUG();
+	}
+	
 	switch (inode->i_mode & S_IFMT) {
 	    case S_IFREG:
 		    atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
 		    inode->i_fop = &ocfs_fops;
 		    inode->i_op = &ocfs_file_iops;
-		    inode->i_size = fe->file_size;
+		    inode->i_size = fe->i_size;
 		    break;
 	    case S_IFDIR:
 		    if (inode->i_nlink < 2) {
 			    LOG_ERROR_ARGS("inlink=%d for %llu\n", inode->i_nlink, 
-					   offset);
+					   fe->i_blkno);
 			    inode->i_nlink = 2;
 		    }
 		    atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
 		    inode->i_op = &ocfs_dir_iops;
 		    inode->i_fop = &ocfs_dops;
-		    inode->i_size = fe->file_size;
+		    inode->i_size = fe->i_size;
 		    break;
 	    case S_IFLNK:
 		    atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
 		    inode->i_op = &ocfs_symlink_inode_operations;
 		    //inode->i_fop = &ocfs_fops;
-		    inode->i_size = fe->file_size;
+		    inode->i_size = fe->i_size;
 		    break;
 	    default:
-		    init_special_inode (inode, mode, inode->i_rdev);
+		    init_special_inode(inode, inode->i_mode,
+				       inode->i_rdev);
 		    break;
 	}
 
@@ -470,9 +500,8 @@
 {
 	struct super_block *sb;
 	ocfs_super *osb;
-	umode_t mode;
 	__u64 feoff = 0ULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	struct buffer_head *bh = NULL;
 	int status;
 	int sysfile = 0;
@@ -493,7 +522,6 @@
 	}
 
 	feoff = args->feoff;
-	sysfile = (args->flags & OCFS_FIND_INODE_FLAG_SYSFILE);
 
 	/* Read the FE off disk. This is safe because the kernel only
 	 * does one read_inode2 for a new inode, and if it doesn't
@@ -506,39 +534,14 @@
 	}
 
 	fe = OCFS_BH_GET_DATA_READ(bh);
+	sysfile = le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL;
+	if (S_ISCHR(fe->i_mode) || S_ISBLK(fe->i_mode))
+    		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
 
-	mode = fe->prot_bits;
+	ocfs_populate_inode (inode, fe, 0);
 
-	switch (fe->attribs) {
-	    case OCFS_ATTRIB_DIRECTORY:
-		    mode |= S_IFDIR;
-		    break;
-	    case OCFS_ATTRIB_CHAR:
-		    inode->i_rdev = MKDEV (fe->dev_major, fe->dev_minor);
-		    mode |= S_IFCHR;
-		    break;
-	    case OCFS_ATTRIB_BLOCK:
-		    inode->i_rdev = MKDEV (fe->dev_major, fe->dev_minor);
-		    mode |= S_IFBLK;
-		    break;
-	    case OCFS_ATTRIB_FIFO:
-		    mode |= S_IFIFO;
-		    break;
-	    case OCFS_ATTRIB_SYMLINK:
-		    mode |= S_IFLNK;
-		    break;
-	    case OCFS_ATTRIB_SOCKET:
-		    mode |= S_IFSOCK;
-		    break;
-	    case OCFS_ATTRIB_REG:
-	    default:
-		    mode |= S_IFREG;
-		    break;
-	}
-	ocfs_populate_inode (inode, fe, mode, 0);
-
 	/* eventually this case has to GO! */
-	if (feoff != fe->this_sector)
+	if (feoff != (fe->i_blkno << osb->sb->s_blocksize_bits))
 		BUG();
 
 	if (sysfile)
@@ -581,10 +584,6 @@
 		goto bail;
 
 	args = opaque;
-	if (args->ino == OCFS_ROOT_INODE_NUMBER) {
-		printk("find_actor: asked about root inode\n");
-		goto bail;
-	}
 
 	if (GET_INODE_FEOFF(inode) != args->feoff) {
 		LOG_ERROR_STATUS(-EINVAL);
@@ -656,7 +655,7 @@
 	struct buffer_head *orphan_dir_bh = NULL;
 	struct buffer_head *fe_bh = NULL;
 	__u32 lock_flags = FLAG_FILE_DELETE;
-	ocfs_file_entry *fe;
+	ocfs2_dinode *fe;
 
 	LOG_ENTRY_ARGS("(inode->i_ino = %lu)\n", inode->i_ino);
 
@@ -665,7 +664,7 @@
 		goto clear_inode;
 	}
 
-	if (inode->i_ino == OCFS_ROOT_INODE_NUMBER) {
+	if (inode == osb->root_inode) {
 		LOG_TRACE_STR("Skipping root inode delete.");
 		goto clear_inode;
 	}
@@ -733,7 +732,7 @@
 
 	/* check OCFS_SYNC_FLAG_ORPHANED */
 	fe = OCFS_BH_GET_DATA_READ(fe_bh);
-	if (!(fe->sync_flags & OCFS_SYNC_FLAG_ORPHANED)) {
+	if (!(fe->i_flags & OCFS2_ORPHANED_FL)) {
 		OCFS_BH_PUT_DATA(fe_bh);
 		/* for lack of a better error? */
 		status = -EEXIST;
@@ -742,7 +741,7 @@
 	}
 
 	/* has someone already deleted us?! baaad... */
-	if (fe->u.i_dtime) {
+	if (fe->i_dtime) {
 		OCFS_BH_PUT_DATA(fe_bh);
 
 		status = -EEXIST;
@@ -762,9 +761,8 @@
 	}
 
 	fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-	fe->u.i_dtime = OCFS_CURRENT_TIME;
-	fe->sync_flags &= (~OCFS_SYNC_FLAG_VALID);
-	fe->sync_flags &= (~OCFS_SYNC_FLAG_ORPHANED);
+	fe->i_dtime = OCFS_CURRENT_TIME;
+	fe->i_flags &= (~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
 	OCFS_BH_PUT_DATA(fe_bh);
 
 	status = ocfs_journal_dirty(handle, fe_bh);
@@ -928,11 +926,7 @@
 	memcpy (kaddr, symname, len - 1);
 	mapping->a_ops->commit_write (NULL, page, 0, len - 1);
 	err = mapping->a_ops->readpage (NULL, page);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 	wait_on_page_locked (page);
-#else
-	wait_on_page (page);
-#endif
 	page_cache_release (page);
 	if (err < 0)
 		goto fail;
@@ -940,11 +934,7 @@
 	return 0;
 
 fail_map:
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 	unlock_page (page);
-#else
-	UnlockPage (page);
-#endif
 	page_cache_release (page);
 fail:
 	return err;
@@ -957,9 +947,8 @@
 static int ocfs_symlink_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
 {
 	int err = -EIO;
-	ocfs_super *osb;
 	int status;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	__u64 entryOffset;
 	struct buffer_head *bh = NULL;
 
@@ -971,37 +960,38 @@
 		goto bail;
 	}
 
-	osb = OCFS_SB(inode->i_sb);
-
-	if ((iblock << 9) > PATH_MAX + 1) {
-		LOG_ERROR_ARGS ("file offset > PATH_MAX: %llu",
-				(unsigned long long)iblock << 9);
+	if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
+		LOG_ERROR_ARGS ("block offset > PATH_MAX: %llu",
+				(unsigned long long)iblock);
 		goto bail;
 	}
 
 	entryOffset = GET_INODE_FEOFF(inode);
 
-	status = ocfs_read_bh (osb, entryOffset, &bh, OCFS_BH_CACHED, inode);
+	status = ocfs_read_bh(OCFS_SB(inode->i_sb), entryOffset, &bh,
+			      OCFS_BH_CACHED, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto bail;
 	}
 	fe = OCFS_BH_GET_DATA_READ(bh);
 
-	if (!IS_VALID_FILE_ENTRY (fe)) {
+	if (!IS_VALID_FILE_ENTRY(fe)) {
 		OCFS_BH_PUT_DATA(bh);
 		LOG_ERROR_ARGS ("Invalid fe at offset %llu", entryOffset);
 		goto bail;
 	}
 
-	if ((iblock << 9) >= (__s64)fe->alloc_size) {
+	if ((u64)iblock >= ocfs_clusters_to_blocks(inode->i_sb,
+					      fe->i_clusters)) {
 		OCFS_BH_PUT_DATA(bh);
-		LOG_ERROR_ARGS ("file offset is outside the allocated size: %llu",
-		     (unsigned long long)iblock << 9);
+		LOG_ERROR_ARGS ("block offset is outside the allocated size: %llu",
+		     (unsigned long long)iblock);
 		goto bail;
 	}
 
-	map_bh(bh_result, inode->i_sb, (fe->extents[0].disk_off >> osb->sect_size_bits) + iblock);
+	map_bh(bh_result, inode->i_sb,
+	       fe->id2.i_list.l_recs[0].e_blkno + iblock);
 	OCFS_BH_PUT_DATA(bh);
 
 	err = 0;
@@ -1020,10 +1010,11 @@
 			       int block, int create, int *err, int reada)
 {
 	struct buffer_head * bh = NULL;
-	int fatal = 0, tmperr = 0, new = 0;
-	ocfs_super *osb = NULL;
-	__s64 vbo = 0LL, lbo = 0LL;
+	int fatal = 0, tmperr, new = 0;
+	ocfs_super *osb;
+	__s64 vbo, lbo;
 	int readflags = OCFS_BH_CACHED;
+	char *buf;
 
 	osb = OCFS_SB(inode->i_sb);
 	vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
@@ -1044,27 +1035,42 @@
 		new = 1;
 	}
 
-	// ???: do we need priv_sem?  should have i_sem i think
+	/* ???: do we need priv_sem?  should have i_sem i think */
 	if (vbo >= OCFS_I(inode)->alloc_size) {
-		*err = ocfs_extend_file(osb, vbo + 512LL, GET_INODE_FEOFF(inode),
-				       handle, inode, NULL);
+		int vbo_pad;
+		
+		vbo_pad = inode->i_sb->s_blocksize;
+		vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
+
+		*err = ocfs_extend_file(osb, 
+					vbo + vbo_pad, 
+					GET_INODE_FEOFF(inode),
+				        handle, inode, NULL, 0, NULL);
 		if (*err < 0) {
 			*err = -ENOSPC;
 			return NULL;
 		}
-		// fe->file_size will be vbo+512 here, and alloc_size will be whatever
-		// i_size will be changed by caller (ocfs_add_entry) if we return !NULL
+		/*
+		 * fe->i_size will be vbo + padding to blocksize here,
+		 * and i_blocks will be whatever is actually allocated.
+		 * i_size will be changed by caller (ocfs_add_entry) if
+		 * we return !NULL.
+		 */
 	}
 
-	// do we need extend sem?  no extend dlm message for dirs
-	tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL, inode,1);
+	/* do we need extend sem?  no extend dlm message for dirs */
+	/*
+	 * UGLY: last argument to lookup_file_allocation() (locked) is
+	 * forced to '1' here, even though we don't have the lock.  This
+	 * is to force fast, unlocked operation.  Get A Real DLM.
+	 */
+	tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL,
+					     inode, 1);
 	if (tmperr < 0)
 		goto fail;
 
 	if (new) {
-		bh = getblk(OCFS_GET_BLOCKDEV(osb->sb), 
-			    lbo >> osb->sb->s_blocksize_bits, 
-			    osb->sb->s_blocksize);
+		bh = sb_getblk(osb->sb, lbo >> osb->sb->s_blocksize_bits);
 		if (!bh) {
 			tmperr = -EIO;
 			goto fail;
@@ -1077,8 +1083,8 @@
 		if (fatal)
 			goto fail;
 
-		char *buf = OCFS_BH_GET_DATA_WRITE(bh);
-		memset(buf, 0, osb->sect_size);
+		buf = OCFS_BH_GET_DATA_WRITE(bh);
+		memset(buf, 0, osb->sb->s_blocksize);
 		OCFS_BH_PUT_DATA(bh);
 		fatal = ocfs_journal_dirty(handle, bh);	
 		if (fatal)
@@ -1139,13 +1145,18 @@
 	vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
 
 	if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->alloc_size) {
+		int vbo_pad;
+		
+		vbo_pad = inode->i_sb->s_blocksize;
+		vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
+
 		LOG_TRACE_STR("Extending allocation");
 		LOG_ERROR_ARGS("extending inode %lu in get_block!!\n", 
 			       inode->i_ino);
 		down_write(&OCFS_I(inode)->ip_io_sem);
-		err = ocfs_extend_file(osb, vbo + osb->sect_size, 
-				       GET_INODE_FEOFF(inode), NULL, inode, 
-				       NULL);
+		err = ocfs_extend_file(osb, vbo + vbo_pad, 
+				       GET_INODE_FEOFF(inode),
+				       NULL, inode, NULL, 0, NULL);
 		up_write(&OCFS_I(inode)->ip_io_sem);
 		if (err < 0) {
 			err = -ENOSPC;
@@ -1357,7 +1368,7 @@
 	__s64 lbo; /* logical (disk) offset */
 	__s64 vbo_max; /* file offset, max_blocks from iblock */
 	int set_new = 0; /* flag */
-	u32 new_size; /* In sectors, the size of the contiguous block */
+	__u64 new_size; /* In bytes, the size of the contiguous block */
 	unsigned char blocksize_bits;
 	int needs_trunc = 0;
 
@@ -1394,7 +1405,7 @@
 	if (create && vbo_max > OCFS_I(inode)->alloc_size) {
 		/* WARNING: How much do we really want to extend the file? */
 		status = ocfs_extend_file(osb, vbo_max, GET_INODE_FEOFF(inode),
-					  NULL, inode, NULL);
+					  NULL, inode, NULL, 0, NULL);
 		if (status < 0) {
 			status = -ENOSPC;
 			LOG_ERROR_STR("ocfs_direct_IO_get_blocks: failed to extend the file!");
@@ -1422,9 +1433,9 @@
 
 	/* make sure we don't map more than max_blocks blocks here as
 	   that's all the kernel will handle at this point. */
-	if (new_size > max_blocks)
-		new_size = max_blocks;
-	bh_result->b_size = new_size << blocksize_bits;
+	if (new_size > (__u64)max_blocks << blocksize_bits)
+		new_size = (__u64)max_blocks << blocksize_bits;
+	bh_result->b_size = new_size;
 
 	ret = 0;
 bail:
@@ -1516,7 +1527,7 @@
 	struct inode *inode = filp->f_dentry->d_inode;
 	int max_sectors;
 	int nbhs;
-	int sector_size, sector_bits, sector_mask;
+	int sector_size, sector_bits, sector_mask, sectors_per_page;
 	int ret = 0;
 	int large_io = 0;
 	int inuse = 0;
@@ -1533,9 +1544,10 @@
 	saved_size = size;
 	
 	/* FIXME: Need to differentiate between sectors and blocksize */
-	sector_size = 512;
-	sector_bits = 9;
-	sector_mask = 511;
+	sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
+	sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
+	sector_mask = sector_size - 1;
+	sectors_per_page = PAGE_SIZE / sector_size;
 	/* max sectors is 1024 in 2.4.9
 	 * max data is 512kb  
 	 */
@@ -1553,7 +1565,7 @@
 	      size = inode->i_size - *offp;
 	}
 
-	/* make sure we are aligned to either 4kb or 512 byte IO */
+	/* make sure aligned to either PAGE_SIZE or sect_size IO */
 #ifndef LARGEIOS
 	if ((*offp & sector_mask) || (size & sector_mask))
 	   /* if not, then fail, we need either to do dio */
@@ -1563,19 +1575,19 @@
 	large_io = 0;
 #endif
 #ifdef LARGEIOS	
-	if ((*offp & 4095) || (size & 4095)) {
-		/* if it's not 4kb, then 512 */
+	if ((*offp & ~PAGE_MASK) || (size & ~PAGE_MASK)) {
+		/* if it's not PAGE_SIZE, then sect_size */
 		 if ((*offp & sector_mask) || (size & sector_mask))
 			 /* if not, then fail, we need either to do dio */
 			 return err;
 		 max_sectors = KIO_MAX_SECTORS; /* for 2.4.9 - 1024 */
-	} /* ok we 're 4kb aligned, lets see if the buffer is */
+	} /* ok we 're PAGE_SIZE aligned, lets see if the buffer is */
 	else {
-		if (!((unsigned long) buf & 4095)) {
-			/* yippie we are .. we can do 4kb size io's */
+		if (!((unsigned long) buf & ~PAGE_MASK)) {
+			/* yippie we are .. we can do PAGE_SIZE size io's */
 			large_io = 1;
 			/* for 2.4.9 */
-			max_sectors = KIO_MAX_SECTORS / 8;
+			max_sectors = KIO_MAX_SECTORS / sectors_per_page;
 		} else {
 			max_sectors = KIO_MAX_SECTORS;
 			large_io = 0;
@@ -1637,7 +1649,7 @@
 doio:
 		size = totalioblocks << sector_bits;
 		if (large_io)
-			nbhs = (size >> 12);
+			nbhs = (size >> PAGE_SHIFT);
 		else
 			nbhs = (size >> sector_bits);
 		if (nbhs > max_sectors)
@@ -1666,10 +1678,10 @@
 		totalioblocks = 0;
 		while (size > 0) {
 			if (large_io) {
-				blocks = size >> 12;
+				blocks = size >> PAGE_SHIFT;
 				if (blocks > max_sectors)
 					blocks = max_sectors;
-				iosize = blocks << 12;
+				iosize = blocks << PAGE_SHIFT;
 			} else {
 				blocks = size >> sector_bits;
 				if (blocks > max_sectors)
@@ -1686,22 +1698,22 @@
 			OCFS_KIO_BLOCKS(iobuf)[0] = firstphys + totalioblocks;
 
 			if (large_io) {
-				blocknr+=8;
-				OCFS_KIO_BLOCKS(iobuf)[0] = OCFS_KIO_BLOCKS(iobuf)[0] / 8;
+				blocknr += sectors_per_page;
+				OCFS_KIO_BLOCKS(iobuf)[0] = OCFS_KIO_BLOCKS(iobuf)[0] / sectors_per_page;
 			} else {
 				blocknr++;
 			}
 
 			for (i = 1; i < blocks; i++) {
 				if (large_io) {
-					blocknr+=8;
+					blocknr += sectors_per_page;
 				} else {
 					blocknr++;
 				}
 				OCFS_KIO_BLOCKS(iobuf)[i] = OCFS_KIO_BLOCKS(iobuf)[0] + i;
 			}
 			err = brw_kiovec (rw, 1, &iobuf, inode->i_dev, OCFS_KIO_BLOCKS(iobuf),
-					large_io ? 4096 : sector_size);
+					large_io ? PAGE_SIZE : sector_size);
 #ifdef SUSE
 			if (rw == READ &&  err > 0)
 				mark_dirty_kiobuf(iobuf, err);
@@ -1711,7 +1723,8 @@
 				size -= err;
 				buf += err;
 				if (large_io) {
-					totalioblocks += (blocks * 8);
+					totalioblocks +=
+					    (blocks * sectors_per_page);
 				} else {
 					totalioblocks += blocks;
 				}
@@ -1770,7 +1783,7 @@
 
 int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos) 
 {
-	int             err = 0;
+	int	     err = 0;
 	int max_sectors = 25000;
 	struct inode *inode = filp->f_dentry->d_inode;
 	unsigned long blocknr, blocks, iosize,myiosize;
@@ -1779,9 +1792,9 @@
 	unsigned long blocks_end_cluster = 0;
  
 	/* FIXME: Need to differentiate betwen sectors and blocksize */
-	int sector_bits = 9;
-	int sector_size = 512;
-	int sector_mask = 511;
+	int sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
+	int sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
+	int sector_mask = sector_size - 1;
 
 	int ret;
 	unsigned long firstlogic;
@@ -1817,7 +1830,7 @@
 	iosize = blocks << sector_bits;
 	clustersize = inode->i_blksize >> sector_bits;
 	blocks_end_cluster = clustersize - (blocknr % clustersize);
-	myiosize = size >> 9;
+	myiosize = size >> sector_bits;
 	firstlogic = blocknr;
 	totalioblocks = 0;
 
@@ -1897,7 +1910,7 @@
 	}
 	up (&(OCFS_I(inode)->priv_sem));
 
-	if (osb->publ_map == (1 << osb->node_num)) {
+	if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
 		LOG_TRACE_STR ("Only node alive.");
 		goto bail;
 	}
@@ -1937,7 +1950,8 @@
 {
 	int status = 0;
 	struct buffer_head *fe_bh = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
+	ocfs2_extent_list *fel;
 	__u64 offset;
 	int drop_priv_sem = 0;
 
@@ -1984,10 +1998,10 @@
 		goto leave;
 
 	/* Add checks as needed */
-	if ((fe->u.i_dtime) || (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
-		if (fe->u.i_dtime)
+	if ((fe->i_dtime) || (!(fe->i_flags & OCFS2_VALID_FL))) {
+		if (fe->i_dtime)
 			LOG_ERROR_ARGS("Inode %lu has dtime = %llu\n", 
-				       inode->i_ino, fe->u.i_dtime);
+				       inode->i_ino, fe->i_dtime);
 		else
 			LOG_TRACE_STR ("File Entry is invalid");
 
@@ -1995,90 +2009,73 @@
 		goto leave;
 	}
 
-	if ((OCFS_I(inode)->alloc_size != (__s64) fe->alloc_size) ||
-	    (inode->i_size != (__s64) fe->file_size) ||
-	    (OCFS_I(inode)->chng_seq_num != DISK_LOCK_SEQNUM (fe)) ||
-	    inode->i_uid != fe->uid ||
-	    inode->i_gid != fe->gid || 
-	    inode->i_mode != fe->prot_bits ||
-	    inode->i_nlink != fe->link_cnt){
+	if ((OCFS_I(inode)->alloc_size !=
+	     (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) ||
+	    (inode->i_size != (__s64) fe->i_size) ||
+	    (OCFS_I(inode)->chng_seq_num != DISK_LOCK(fe)->dl_seq_num) ||
+	    inode->i_uid != fe->i_uid ||
+	    inode->i_gid != fe->i_gid || 
+	    inode->i_mode != fe->i_mode ||
+	    inode->i_nlink != fe->i_links_count){
 
-		if (OCFS_I(inode)->alloc_size > (__s64)fe->alloc_size){
+		if (OCFS_I(inode)->alloc_size >
+		    (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) {
 			ocfs_extent_map_destroy (&OCFS_I(inode)->map);
 			ocfs_extent_map_init (&OCFS_I(inode)->map);
 		}
 
+		if (le32_to_cpu(fe->i_flags) & OCFS2_BITMAP_FL) {
+			OCFS_I(inode)->u.ip_bitinfo.used_bits = 
+				le32_to_cpu(fe->id1.bitmap1.i_used);
+			OCFS_I(inode)->u.ip_bitinfo.total_bits = 
+				le32_to_cpu(fe->id1.bitmap1.i_total);
+			LOG_TRACE_ARGS("updated bitmap inode: i_ino=%lu, used=%u, total=%u\n",
+			       inode->i_ino, OCFS_I(inode)->u.ip_bitinfo.used_bits,
+			       OCFS_I(inode)->u.ip_bitinfo.total_bits);
+		}
+
 		LOG_TRACE_STR("Allocsize, filesize or seq no did not match");
-		OCFS_I(inode)->alloc_size = fe->alloc_size;
-		inode->i_size = fe->file_size;
-printk("verifyupdate: setting nlink from %d to %d for %llu\n", inode->i_nlink, fe->link_cnt, GET_INODE_FEOFF(inode));
-		inode->i_nlink = fe->link_cnt;
-		OCFS_I(inode)->chng_seq_num = DISK_LOCK_SEQNUM (fe);
-		inode->i_blocks = (inode->i_size + osb->sect_size) >> osb->sect_size_bits;
-		inode->i_uid = fe->uid;
-		inode->i_gid = fe->gid;
-		inode->i_mode = fe->prot_bits;
-		inode->i_blksize = (__u32) osb->vol_layout.cluster_size;
-		OCFS_SET_INODE_TIME(inode, i_ctime, fe->create_time);
-		OCFS_SET_INODE_TIME(inode, i_atime, fe->modify_time);
-		OCFS_SET_INODE_TIME(inode, i_mtime, fe->modify_time);
+		OCFS_I(inode)->alloc_size = 
+			(u64)fe->i_clusters << osb->s_clustersize_bits;
+		inode->i_size = fe->i_size;
+		LOG_TRACE_ARGS("verifyupdate: setting nlink from %d to %d for %llu\n", 
+			       inode->i_nlink, fe->i_links_count, GET_INODE_FEOFF(inode));
+		inode->i_nlink = fe->i_links_count;
+		OCFS_I(inode)->chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
+		inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+		inode->i_uid = fe->i_uid;
+		inode->i_gid = fe->i_gid;
+		inode->i_mode = fe->i_mode;
+		inode->i_blksize = (__u32) osb->s_clustersize;
+		OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
+		OCFS_SET_INODE_TIME(inode, i_atime, fe->i_atime);
+		OCFS_SET_INODE_TIME(inode, i_mtime, fe->i_mtime);
 		if (!S_ISDIR (inode->i_mode) &&
-		    (OCFS_I(inode)->alloc_size != (__s64) fe->alloc_size ||
-		     inode->i_size != (__s64) fe->file_size ||
-		     OCFS_I(inode)->chng_seq_num != DISK_LOCK_SEQNUM (fe))) {
+		    (OCFS_I(inode)->alloc_size !=
+		     (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits) ||
+		     inode->i_size != (__s64) fe->i_size ||
+		     OCFS_I(inode)->chng_seq_num != DISK_LOCK(fe)->dl_seq_num)) {
 			*needs_trunc = 1;
 		}
 
-		switch (fe->attribs) {
-		case OCFS_ATTRIB_DIRECTORY:
-			inode->i_mode |= S_IFDIR;
-			break;
-		case OCFS_ATTRIB_SYMLINK:
-			inode->i_mode |= S_IFLNK;
-			break;
-		case OCFS_ATTRIB_REG:
-			inode->i_mode |= S_IFREG;
-			break;
-		case OCFS_ATTRIB_CHAR:
-		case OCFS_ATTRIB_BLOCK:
-		case OCFS_ATTRIB_FIFO:
-		case OCFS_ATTRIB_SOCKET:
-			if (fe->attribs == OCFS_ATTRIB_CHAR)
-				inode->i_mode |= S_IFCHR;
-			else if (fe->attribs == OCFS_ATTRIB_BLOCK)
-				inode->i_mode |= S_IFBLK;
-			else if (fe->attribs == OCFS_ATTRIB_FIFO)
-				inode->i_mode |= S_IFIFO;
-			else if (fe->attribs == OCFS_ATTRIB_SOCKET)
-				inode->i_mode |= S_IFSOCK;
-
+		if (S_ISCHR(fe->i_mode) ||
+	       	    S_ISBLK(fe->i_mode) ||
+	      	    S_ISFIFO(fe->i_mode) ||
+	     	    S_ISSOCK(fe->i_mode)) {
 			inode->i_rdev = OCFS_NODEV;
-			init_special_inode (inode, inode->i_mode,
-					    MKDEV(fe->dev_major,
-						  fe->dev_minor));
-			break;
-		default:
-			LOG_ERROR_ARGS ("attribs=%d", fe->attribs);
-			inode->i_mode |= S_IFREG;
-			break;
+			init_special_inode(inode, inode->i_mode,
+					   huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)));
 		}
 
-		if (fe->local_ext) {
-			__s64 tempVbo;
-			__s64 tempLbo;
-			__u64 tempSize;
+		fel = &fe->id2.i_list;
+		if (fel->l_tree_depth < 0) {
 			__u32 j;
 
 			/* Add the Extents to extent map */
-			for (j = 0; j < fe->next_free_ext; j++) {
-				tempVbo = fe->extents[j].file_off;
-				tempLbo = fe->extents[j].disk_off;
-				tempSize = fe->extents[j].num_bytes;
-
-				if (!ocfs_add_extent_map_entry (osb, 
-								&OCFS_I(inode)->map,
-								tempVbo, tempLbo,
-								tempSize))
+			for (j = 0; j < fel->l_next_free_rec; j++) {
+				if (!ocfs_add_extent_map_entry_from_rec(osb->sb, 
+									&OCFS_I(inode)->map,
+									&fel->l_recs[j]))
 					goto leave;
 			}
 		}

Added: trunk/src/inode.h
===================================================================
--- trunk/src/inode.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/inode.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,53 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * inode.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_INODE_H
+#define OCFS2_INODE_H
+
+int ocfs_block_symlink(struct inode *inode, const char *symname,
+		       int len);
+struct buffer_head *ocfs_bread(ocfs_journal_handle *handle,
+			       struct inode * inode, int block,
+			       int create, int *err, int reada);
+void ocfs_clear_inode(struct inode *inode);
+void ocfs_delete_inode(struct inode *inode);
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
+int ocfs_inode_init_private(struct inode *inode);
+int ocfs_inode_revalidate(struct dentry *dentry);
+void ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,
+			 int create_ino);
+void ocfs_put_inode(struct inode *inode);
+void ocfs_read_inode(struct inode *inode);
+void ocfs_read_inode2(struct inode *inode, void *opaque);
+ssize_t ocfs_rw_direct(int rw, struct file *filp, char *buf,
+		       size_t size, loff_t *offp);
+void ocfs_sync_blockdev(struct super_block *sb);
+int ocfs_verify_update_inode(ocfs_super *osb, struct inode *inode,
+			     int *needs_trunc, int lockres_locked);
+
+#endif /* OCFS2_INODE_H */

Deleted: trunk/src/io.c
===================================================================
--- trunk/src/io.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/io.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,322 +0,0 @@
-/*
- * io.c
- *
- * Buffer cache handling
- *
- * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
- *	    Manish Singh, Neeraj Goyal, Suchit Kaura
- */
-
-#include <linux/types.h>
-
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-
-#define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_IO
-
-static void ocfs_end_buffer_io_sync(struct buffer_head *bh,
-                                    int uptodate)
-{
-//	LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
-//		       uptodate);
-
-	if (!uptodate)
-		LOG_ERROR_STATUS(-EIO);
-
-	if (uptodate)
-		set_buffer_uptodate(bh);
-	else
-		clear_buffer_uptodate(bh);
-	unlock_buffer(bh);
-
-//	LOG_EXIT();
-	return;
-}
-
-
-int ocfs_write_bhs (ocfs_super * osb, struct buffer_head *bhs[], 
-				  int nr, int flags, struct inode *inode)
-{
-	int status = 0;
-	int i;
-	struct super_block *sb;
-	ocfs_blockdev dev;
-	struct buffer_head *bh;
-
-#ifdef OCFS_DBG_TIMING
-	my_timing_t begin, end; 
-#endif
-	
-	LOG_ENTRY_ARGS("(bh[0]->b_blocknr = %llu, nr=%d, flags=%u, inode=%p)\n", 
-		       (unsigned long long)bhs[0]->b_blocknr, nr, flags, inode);
-#ifdef OCFS_DBG_TIMING
-	rdtsc (begin.lohi[0], begin.lohi[1]);
-#endif
-
-	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
-		LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == "
-		       "NULL");
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	if (nr > 256)
-		LOG_TRACE_ARGS ("Getting write for %d blocks\n", nr);
-
-	sb = osb->sb;
-	dev = OCFS_GET_BLOCKDEV(sb);
-
-	/* we don't ever want cached writes -- those should go to the
-	 * journal so we can control when they actually hit disk and
-	 * so we can make sure they never get overwritten by a
-	 * subsequent read. */
-	if ((flags & OCFS_BH_CACHED) || (flags & OCFS_BH_COND_CACHED)) {
-		LOG_TRACE_STR("asking for a cached write!");
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	for (i = 0 ; i < nr ; i++) {
-		bh = bhs[i];
-		if (bh == NULL) {
-			LOG_TRACE_STR("bh == NULL");
-			status = -EIO;
-			LOG_ERROR_STATUS(status);
-			goto bail;
-		}
-
-		if (check_block_zero_write(bh) < 0) {
-			status = -EIO;	
-			LOG_ERROR_STATUS(status);
-			goto bail;
-		}
-
-		if (flags & OCFS_BH_CONCURRENT_WRITE)
-			goto skip_modified_check;
-
-skip_modified_check:
-		if (!(flags & OCFS_BH_IGNORE_JBD) && buffer_jbd(bh)) {
-#ifdef VERBOSE_BH_JBD_TRACE
-			LOG_TRACE_ARGS("trying to write a jbd managed bh "
-				       "(blocknr = %llu), nr=%d\n", 
-				       (unsigned long long)bh->b_blocknr, nr);
-#endif
-			continue;
-		}
-
-		lock_buffer(bh);
-
-		set_buffer_uptodate(bh);
-		/* remove from dirty list before I/O. */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)		
-		/*
-		 * mark_buffer_clean() doesn't exist in 2.6.x kernels.
-		 * Not many places actually used mark_buffer_clean, but
-		 * at least reiserfs uses clear_buffer_dirty() as
-		 * a replacment.
-		 */
-		clear_buffer_dirty(bh);
-#else
-		mark_buffer_clean(bh);
-#endif
-
-		bh->b_end_io = ocfs_end_buffer_io_sync;
-		submit_bh(WRITE, bh);
-	}
-
-	for (i = (nr-1) ; i >= 0; i--) {
-		bh = bhs[i];
-
-		wait_on_buffer(bh);
-
-		if (inode)
-			SET_BH_SEQNUM(inode, bh);
-		else
-			CLEAR_BH_SEQNUM(bh);
-	}
-	
-bail:
-
-#ifdef OCFS_DBG_TIMING
-	IO_FUNC_TIMING_PRINT("ocfs_write_bhs", status);
-#endif
-
-	LOG_EXIT_STATUS(status);
-	return status;
-}
-
-/*
- * ocfs_read_bhs()
- *
- */
-int ocfs_read_bhs (ocfs_super * osb, __u64 off, __u64 len, 
-				 struct buffer_head *bhs[], int flags, 
-				 struct inode *inode)
-{
-	int status = 0;
-	struct super_block *sb;
-	int nr, i, ignore_cache = 0;
-	__u64 blocknum;
-	ocfs_blockdev dev;
-	struct buffer_head *bh;
-
-#ifdef OCFS_DBG_TIMING
-	my_timing_t begin, end; 
-#endif
-	LOG_ENTRY_ARGS("(off=(%llu), len=(%llu), flags=%d, inode=%p)\n", off, 
-		       len, flags, inode);
-#ifdef OCFS_DBG_TIMING
-	rdtsc (begin.lohi[0], begin.lohi[1]);
-#endif
-
-	if (len % 512) {
-		LOG_TRACE_ARGS("len %% 512 (len=%llu)\n", len);
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
-		LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == NULL "
-			      "|| num == NULL");
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-
-	if ((flags & OCFS_BH_COND_CACHED) && 
-	    (off >= osb->vol_layout.bitmap_off))
-			flags |= OCFS_BH_CACHED;
-
-	if (OCFS_NONCACHED(osb, off)) {
-		if (flags & OCFS_BH_CACHED)
-			LOG_TRACE_STR("hey bozo you are trying to write "
-				      "a system thingy cached!");
-		flags &= ~OCFS_BH_CACHED;
-	}
-
-	sb = osb->sb;
-	dev = OCFS_GET_BLOCKDEV(sb);
-	blocknum = off >> sb->s_blocksize_bits;
-
-	nr = (len + 511) >> 9;
-	if (nr == 0) {
-		LOG_TRACE_STR("No buffers will be read!!!");
-		LOG_TRACE_ARGS("Len=%llu Off=%llu numbuffers=%u "
-			       "blocknum=%llu\n", len, off, 
-			       nr, blocknum);
-		status = 0;
-		goto bail;
-	}
-
-	for (i = 0 ; i < nr ; i++) {
-		if (bhs[i] == NULL) {
-			bhs[i] = getblk (dev, blocknum++, sb->s_blocksize);
-			if (bhs[i] == NULL) {
-				LOG_TRACE_STR("bh == NULL");
-				status = -EIO;
-				LOG_ERROR_STATUS(status);
-				goto bail;
-			}
-		}
-		bh = bhs[i];
-		ignore_cache = 0;
-
-		/* Lock everyone else out of this bh */
-		OCFS_BH_GET_DATA_READ(bh);
-
-		if (flags & OCFS_BH_CACHED && inode && 
-		    !TEST_BH_SEQNUM(inode, bh)) {
-#ifdef VERBOSE_BH_SEQNUM_TRACE
-			LOG_TRACE_ARGS("(read) bh (%llu) seqnum (%lu) does not "
-				       "match inode (%u)\n",
-				       (unsigned long long)bh->b_blocknr, 
-				       (bh->b_state & STATE_BIT_MASK) >> 19,
-				       atomic_read(GET_INODE_CLEAN_SEQ(inode)));
-#endif
-			ignore_cache = 1;
-		}
-
-		if ((flags & OCFS_BH_CACHED) && (!buffer_uptodate(bh)))
-			ignore_cache = 1;
-
-		if (buffer_jbd(bh)) {
-#ifdef VERBOSE_BH_JBD_TRACE
-			if (!(flags & OCFS_BH_CACHED) || ignore_cache)
-				LOG_TRACE_ARGS("trying to sync read a jbd "
-					       "managed bh (blocknr = %llu)\n",
-					       (unsigned long long)bh->b_blocknr);
-#endif
-			continue;
-		}
-
-		if (!(flags & OCFS_BH_CACHED) || ignore_cache) {
-			if (buffer_dirty(bh)) {
-				/* This should probably be a BUG, or
-				 * at least return an error. */
-				LOG_TRACE_ARGS("asking me to sync read a "
-					      "dirty buffer! (blocknr = %llu)\n",
-					      (unsigned long long)bh->b_blocknr);
-				continue;
-			}
-
-			lock_buffer(bh);
-			clear_buffer_uptodate(bh);
-			bh->b_end_io = ocfs_end_buffer_io_sync;
-			if (flags & OCFS_BH_READAHEAD)
-				submit_bh(READA, bh);
-			else
-				submit_bh(READ, bh);
-			continue;
-		}
-	}
-
-	status = 0;
-
-	for (i = (nr-1); i >= 0; i--) {
-		bh = bhs[i];
-
-		wait_on_buffer(bh);
-
-		if (inode)
-			SET_BH_SEQNUM(inode, bh);
-		else
-			CLEAR_BH_SEQNUM(bh);
-
-		OCFS_BH_PUT_DATA(bh);
-	}
-	LOG_TRACE_ARGS("off=(%llu), len=(%llu), cached=%s\n", off, len, 
-		       (!(flags & OCFS_BH_CACHED) || ignore_cache) ? "no" : "yes");
-
-bail:
-
-#ifdef OCFS_DBG_TIMING
-	IO_FUNC_TIMING_PRINT("ocfs_read_bhs", status);
-#endif
-
-	LOG_EXIT_STATUS(status);
-	return status;
-}
-
-
-

Modified: trunk/src/ioctl.c
===================================================================
--- trunk/src/ioctl.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ioctl.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * ioctl.c
  *
  * ioctl interface
@@ -24,11 +26,19 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <asm/uaccess.h>
 
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "ioctl.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_IOCTL
 

Added: trunk/src/ioctl.h
===================================================================
--- trunk/src/ioctl.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ioctl.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,35 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+	       unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/journal.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * journal.c
  *
  * Defines functions of journalling api
@@ -24,12 +26,30 @@
  *	    Manish Singh, Joel Becker
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "inode.h"
+#include "journal.h"
+#include "nm.h"
+#include "super.h"
+#include "util.h"
+#include "vote.h"
+#include "sysfile.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_JOURNAL
 
 typedef enum _release_locks_action {
@@ -62,6 +82,8 @@
  * #included.
  */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#include <linux/smp_lock.h>
+
 static inline handle_t *ocfs_journal_start(journal_t *journal, int nblocks)
 {
 	handle_t *h;
@@ -285,7 +307,7 @@
 		iput(lock->inode);
 		list_del(&(lock->lock_list));
 		handle->num_locks--;
-		ocfs_free(lock);
+		kfree(lock);
 	}
 
 	LOG_EXIT_STATUS(status);
@@ -301,21 +323,16 @@
 	
 	LOG_ENTRY_ARGS("(%llu)\n", lockid);
 
-	/* make sure that we have a root_start off     */
-	/* this can be called early in the first mount */
-	if (osb->vol_layout.root_start_off != 0 &&
-	    lockid >= osb->vol_layout.root_start_off) {
-		journal = osb->journal;
-		down(&journal->commit_sem);
-		list_for_each(p1, &(journal->commited)) {
-			handle = list_entry(p1, ocfs_journal_handle, h_list);
-			if (handle->new_file_lockid == lockid) {
-				status = 0;
-				break;
-			}
+	journal = osb->journal;
+	down(&journal->commit_sem);
+	list_for_each(p1, &(journal->commited)) {
+		handle = list_entry(p1, ocfs_journal_handle, h_list);
+		if (handle->new_file_lockid == lockid) {
+			status = 0;
+			break;
 		}
-		up(&journal->commit_sem);
 	}
+	up(&journal->commit_sem);
 
 	LOG_EXIT_STATUS(status);
 	return status;
@@ -430,7 +447,7 @@
 	}
 	handle->num_buffs = 0;
 	if (handle->buffs) {
-		ocfs_free(handle->buffs);
+		kfree(handle->buffs);
 		handle->buffs = NULL;
 	}
 
@@ -464,7 +481,7 @@
 	ocfs_free_bitmap_free_head(commit_head);
 
 	if (checkpoint)
-		ocfs_free(handle);
+		kfree(handle);
 
 	LOG_EXIT();
 
@@ -519,9 +536,9 @@
 			 * turned off later. */
 			LOG_ERROR_ARGS("block %lu was modified but never "
 				       "dirtied!\n", co->blocknr);
-			bh = getblk(OCFS_GET_BLOCKDEV(osb->sb), co->blocknr,
-				    osb->sect_size);
+			bh = sb_getblk(osb->sb, co->blocknr);
 			if (bh == NULL)
+#warning bingo, oops on oom
 				BUG();
 
 			dirtied = 0;
@@ -608,8 +625,8 @@
 	ocfs_free_bitmap_free_head(handle->commit_bits);
 
 	if (handle->buffs)
-		ocfs_free(handle->buffs);
-	ocfs_free(handle);
+		kfree(handle->buffs);
+	kfree(handle);
 
 	LOG_EXIT();
 	return;
@@ -744,7 +761,10 @@
 			(unsigned long long)bh->b_blocknr);
 
 	if (handle->num_buffs >= handle->max_buffs) {
-		LOG_ERROR_STR("Cannot add buffer to full transaction!");
+		LOG_ERROR_ARGS("Cannot add buffer to full transaction! "
+			       "num_buffs=%d, max_buffs=%d, block=%llu\n",
+			       handle->num_buffs, handle->max_buffs,
+			       (unsigned long long)bh->b_blocknr);
 		goto done;
 	}
 
@@ -843,7 +863,7 @@
 	int status = -1;
 	struct inode *inode = NULL; /* the journal inode */
 	journal_t * k_journal = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	__u64 lock_id = 0;
 	struct buffer_head *bh = NULL;
 	__u64 alloc_size;
@@ -856,11 +876,8 @@
 	INIT_LIST_HEAD(&(osb->journal->commited));
 	init_MUTEX(&(osb->journal->commit_sem));
 
-	lock_id = ((OCFS_JOURNAL_FILE + osb->node_num) * osb->sect_size) +
-		  osb->vol_layout.root_int_off;
-
-	/* Ok, look up the inode for our journal */
-	inode = ocfs_iget(osb, lock_id);
+	/* already have the inode for our journal */
+	inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, osb->node_num);
 	if (inode == NULL) {
 		LOG_ERROR_STR("access error");
 		status = -EACCES;
@@ -873,6 +890,7 @@
 		status = -EACCES;
 		goto done;
 	}
+	lock_id = GET_INODE_FEOFF(inode);
 
 	down_write(&OCFS_I(inode)->ip_io_sem);
 
@@ -892,10 +910,10 @@
 	fe = OCFS_BH_GET_DATA_READ(bh);
 
 	/* should be exactly eight megs.  need to run tuneocfs otherwise. */
-	if (fe->file_size != OCFS_JOURNAL_DEFAULT_SIZE) {
+	if (fe->i_size != OCFS_JOURNAL_DEFAULT_SIZE) {
 		LOG_ERROR_ARGS("Journal file size (%llu) is not the default (%u).  "
 			       "You must run tuneocfs to add a journal for this node.\n",
-			       fe->file_size, OCFS_JOURNAL_DEFAULT_SIZE);
+			       fe->i_size, OCFS_JOURNAL_DEFAULT_SIZE);
 		status = -EINVAL;
 	}
 
@@ -907,13 +925,13 @@
 		goto done;
 	}
 
-	LOG_TRACE_ARGS("fe->file_size = %llu\n", fe->file_size);
-	LOG_TRACE_ARGS("fe->alloc_size = %llu\n", fe->alloc_size);
+	LOG_TRACE_ARGS("fe->i_size = %llu\n", fe->i_size);
+	LOG_TRACE_ARGS("fe->i_clusters = %u\n", fe->i_clusters);
 
 	/* gonna need this later */
-	alloc_size = fe->alloc_size;
-	LOG_TRACE_ARGS("fe->this_sector = %llu\n", fe->this_sector);
-	inode->i_size = fe->file_size;
+	alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
+	LOG_TRACE_ARGS("fe->i_blkno = %llu\n", fe->i_blkno);
+	inode->i_size = fe->i_size;
 	OCFS_BH_PUT_DATA(bh);
 	fe = NULL;
 
@@ -1031,7 +1049,7 @@
 		send_sig (SIGINT, osb->commit->c_task, 0);
 		wait_for_completion(&osb->commit->c_complete);
 		osb->commit->c_task = NULL;
-		ocfs_free(osb->commit);
+		kfree(osb->commit);
 	}
 	
 	/* Shutdown the kernel journal system */
@@ -1134,7 +1152,7 @@
 	struct buffer_head **bhs = NULL;
 
 	LOG_ENTRY();
-	totalblks = size >> osb->sect_size_bits;
+	totalblks = size >> osb->sb->s_blocksize_bits;
 
 	/* Ok, allocate the maximum number of blocks we'll possibly
 	 * need. This is heavy, but it doesn't happen often. If need
@@ -1152,7 +1170,7 @@
 
 	LOG_TRACE_ARGS("Force reading %u blocks\n", totalblks);
 
-	status = ocfs_lookup_file_allocation(osb, vbo, &lbo, size, NULL, 
+	status = ocfs_lookup_file_allocation(osb, vbo, &lbo, totalblks, NULL, 
 					     inode, 1);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
@@ -1179,7 +1197,7 @@
 			       "bytes\n", run, lbo, numbytes);
 
 		/* there are actually returned in bytes. need blocks. */
-		numblks = numbytes / osb->sect_size;
+		numblks = numbytes >> osb->sb->s_blocksize_bits;
 
 		status = ocfs_read_bhs(osb, lbo, numbytes, bhs, 0, NULL);
 		if (status < 0) {
@@ -1198,7 +1216,7 @@
 		for(i = 0; i < totalblks; i++)
 			if (bhs[i])
 				brelse(bhs[i]);
-		ocfs_free(bhs);
+		kfree(bhs);
 	}
 	LOG_EXIT_STATUS(status);
 	return(status);
@@ -1241,7 +1259,7 @@
 
 	LOG_EXIT_STATUS(status);
 
-	ocfs_free(arg);
+	kfree(arg);
 	return status;
 }
 
@@ -1281,7 +1299,7 @@
 {
 	int status = -1;
 	__u64 lock_id = 0;
-	ocfs_file_entry *fe;
+	ocfs2_dinode *fe;
 	struct inode *inode = NULL;
 	journal_t *k_journal = NULL;
 	struct buffer_head *bh = NULL;
@@ -1308,12 +1326,8 @@
 		goto done;
 	}
 
-	/* get the journal file fe and lock */
-	lock_id = ((OCFS_JOURNAL_FILE + node_num) * osb->sect_size) 
-		+ osb->vol_layout.root_int_off;
-
 	/* Ok, look up the inode for our journal */
-	inode = ocfs_iget(osb, lock_id);
+	inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, node_num);
 	if (inode == NULL) {
 		LOG_ERROR_STR("access error");
 		status = -EACCES;
@@ -1326,6 +1340,7 @@
 		status = -EACCES;
 		goto done;
 	}
+	lock_id = GET_INODE_FEOFF(inode);
 
 	down_write(&OCFS_I(inode)->ip_io_sem);
 
@@ -1363,7 +1378,7 @@
 
 	fe = OCFS_BH_GET_DATA_READ(bh);
 	/* gonna need this later */
-	alloc_size = fe->alloc_size;
+	alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
 	OCFS_BH_PUT_DATA(bh);
 
 	OCFS_I(inode)->alloc_size = alloc_size;
@@ -1433,7 +1448,7 @@
 	status = 0;
 
 clear_node:
-	CLEAR_NODE_IN_RECOVERY(osb, node_num);
+	ocfs_recovery_map_clear(osb, node_num);
 	ocfs_recover_oin_locks(osb, node_num);
 done:
 	if (recovery_lock)
@@ -1482,8 +1497,7 @@
 	down (&(osb->publish_lock));
 
 	/* Read the publish sector */
-	node_publ_off = osb->vol_layout.publ_sect_off +
-			(node_num * osb->sect_size);
+	node_publ_off = (osb->publish_blkno + node_num) << osb->sb->s_blocksize_bits;
 	status = ocfs_read_bh(osb, node_publ_off, &publish_bh, 
 			      OCFS_BH_COND_CACHED, NULL);
 	if (status < 0) {
@@ -1554,26 +1568,14 @@
 				 * one last commit cache and then exit */
 				if (journal->state == OCFS_JOURNAL_IN_SHUTDOWN)
 					finish = 1;
+
+				/* ignore the actual signal */
 				if (signal_pending(current)) {
-					/* ignore the actual signal */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 					dequeue_signal_lock(current, 
 							    &current->blocked, 
 							    &info);
-#else
-#ifdef HAVE_NPTL
-					spin_lock_irq (&current->sighand->siglock);
-					dequeue_signal(&current->blocked, 
-						       &info);
-					spin_unlock_irq(&current->sighand->siglock);					
-#else 
-					spin_lock_irq(&current->sigmask_lock);
-					dequeue_signal(&current->blocked, 
-						       &info);
-					spin_unlock_irq(&current->sigmask_lock);
-#endif /* !HAVE_NPTL */
-#endif /* 2.4.x kernel */
 				}
+
 				LOG_TRACE_STR("FLUSH_EVENT: interrupted");
 				break;
 			case 0:
@@ -1685,7 +1687,7 @@
 		if (tmpstat < 0)
 			LOG_ERROR_STATUS((status = tmpstat));
 		list_del(&(handle->h_list));
-		ocfs_free(handle);
+		kfree(handle);
 
 		atomic_dec(&journal->num_trans);
 	}

Added: trunk/src/journal.h
===================================================================
--- trunk/src/journal.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/journal.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * journal.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_JOURNAL_H
+#define OCFS2_JOURNAL_H
+
+int ocfs_commit_thread(void *arg);
+
+#endif /* OCFS2_JOURNAL_H */

Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/lockres.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * lockres.c
  *
  * lock resource handling
@@ -24,11 +26,23 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "dlm.h"
+#include "lockres.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_buffer_head.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_LOCKRES
 
@@ -50,7 +64,7 @@
 {
 	int status = 0;
 	struct buffer_head *tmpbh = NULL, **b = NULL;
-	ocfs_file_entry *fe;
+	ocfs2_dinode *fe;
 	int flags;
 	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);;
 
@@ -60,8 +74,8 @@
 	b = (bh == NULL) ? &tmpbh : bh;
 
 	if (reread) {
-		flags = OCFS_NONCACHED(osb, lock_id) ? 0 : 
-		  	lockres->master_node_num == osb->node_num ? 
+		flags = (inode && OCFS_I(inode)->flags & OCFS_INODE_SYSTEM_FILE) ?
+			0 : lockres->master_node_num == osb->node_num ? 
 		    	OCFS_BH_CACHED : 0;	
 		status = ocfs_read_bh(osb, lock_id, b, flags, inode);
 		if (status < 0) {
@@ -83,9 +97,9 @@
 		if (!IS_VALID_FILE_ENTRY(fe))
 			BUG();
 
-		lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
-		lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
-		lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
+		lockres->lock_type = DISK_LOCK(fe)->dl_level;
+		lockres->master_node_num = DISK_LOCK(fe)->dl_master;
+		ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
 
 		if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
     	    	    lockres->readonly_node != lockres->master_node_num) {
@@ -191,13 +205,13 @@
 	lockres->lock_type = OCFS_DLM_NO_LOCK;
 	lockres->master_node_num = OCFS_INVALID_NODE_NUM;
 	lockres->last_upd_seq_num = 0;
-	lockres->oin_openmap = 0;
+	ocfs_node_map_init(osb, &lockres->oin_openmap);
 	lockres->lock_state = 0;
 
 	init_MUTEX(&lockres->lock_mutex);
 	atomic_set (&lockres->lr_ref_cnt, 0);
 
-	lockres->readonly_map = 0ULL;
+	ocfs_node_map_init(osb, &lockres->readonly_map);
 	lockres->readonly_node = OCFS_INVALID_NODE_NUM;
 
 	lockres->lock_holders = 0;

Added: trunk/src/lockres.h
===================================================================
--- trunk/src/lockres.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/lockres.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * lockres.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_LOCKRES_H
+#define OCFS2_LOCKRES_H
+
+/* lockres.c */
+int ocfs_acquire_lockres(ocfs_lock_res *lockres, __u32 timeout);
+void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
+void ocfs_release_lockres(ocfs_lock_res *lockres);
+int ocfs_update_lockres(ocfs_super *osb, __u64 lock_id,
+			struct buffer_head **bh, __u32 *updated,
+			__u32 timeout, struct inode *inode, int reread,
+			int locked);
+
+#endif /* OCFS2_LOCKRES_H */

Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/namei.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,10 +1,25 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * namei.c
  *
  * Create and rename file, directory, symlinks
  *
  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
  *
+ *  Portions of this code from linux/fs/ext3/dir.c
+ *  
+ *  Copyright (C) 1992, 1993, 1994, 1995
+ *  Remy Card (card at masi.ibp.fr)
+ *  Laboratoire MASI - Institut Blaise pascal
+ *  Universite Pierre et Marie Curie (Paris VI)
+ *  
+ *   from
+ *
+ *   linux/fs/minix/dir.c
+ *
+ *   Copyright (C) 1991, 1992 Linux Torvalds
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -24,34 +39,59 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "dcache.h"
+#include "dir.h"
+#include "dlm.h"
+#include "file.h"
+#include "sysfile.h"
+#include "inode.h"
+#include "lockres.h"
+#include "namei.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_NAMEI
 
-static int inline search_dirblock(struct buffer_head * bh, struct inode *dir, 
+static int inline search_dirblock(struct buffer_head *bh,
+				  struct inode *dir, 
 				  const char *name, int namelen, 
 				  unsigned long offset, 
-				  struct ocfs2_dir_entry ** res_dir);
-static int ocfs_delete_entry (ocfs_journal_handle *handle, struct inode * dir, 
-			      struct ocfs2_dir_entry * de_del, 
-			      struct buffer_head * bh);
-static int __ocfs_add_entry (ocfs_journal_handle *handle, struct inode *dir,
-			     const char *name, int namelen, 
-			     struct inode *inode, __u64 inode_off, 
-			     struct buffer_head *parent_fe_bh);
-static inline int ocfs_match (int len, const char * const name, struct ocfs2_dir_entry * de);
+				  struct ocfs2_dir_entry **res_dir);
+static int ocfs_delete_entry(ocfs_journal_handle *handle,
+			     struct inode *dir, 
+			     struct ocfs2_dir_entry *de_del, 
+			     struct buffer_head *bh);
+static int __ocfs_add_entry(ocfs_journal_handle *handle,
+			    struct inode *dir,
+			    const char *name, int namelen, 
+			    struct inode *inode, __u64 inode_off, 
+			    struct buffer_head *parent_fe_bh);
+static inline int ocfs_match(int len, const char *const name,
+			     struct ocfs2_dir_entry *de);
 
 static int ocfs_mknod_locked(ocfs_super *osb, struct inode *dir, 
 			     struct dentry *dentry, int mode, 
-			     ocfs_dev dev, struct buffer_head **new_fe_bh, 
+			     ocfs_dev dev,
+			     struct buffer_head **new_fe_bh, 
 			     struct buffer_head *parent_fe_bh,
 			     ocfs_journal_handle *handle,
 			     struct inode *inode);
-static int ocfs_double_lock(ocfs_super *osb, ocfs_journal_handle *handle,
+static int ocfs_double_lock(ocfs_super *osb,
+			    ocfs_journal_handle *handle,
 			    __u32 type1, __u32 flags1, 
 			    struct buffer_head **bh1,
 		     	    struct inode *inode1,
@@ -59,13 +99,9 @@
 			    struct buffer_head **bh2,
 		     	    struct inode *inode2);
 
-static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle, 
-			   struct inode *inode, ocfs_file_entry *fe);
+static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
+			   struct inode *inode, ocfs2_dinode *fe);
 
-static struct dentry_operations ocfs_dentry_ops = {
-	.d_revalidate = ocfs_dentry_revalidate
-};
-
 static inline int ocfs_add_entry(ocfs_journal_handle *handle, 
 				 struct dentry *dentry, 
 				 struct inode *inode, __u64 inode_off, 
@@ -99,7 +135,7 @@
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
 			dentry->d_name.len, dentry->d_name.name);
 
-	if (dentry->d_name.len > OCFS_MAX_FILENAME_LENGTH) {
+	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LENGTH) {
 		LOG_ERROR_STR ("name too long");
 		ret = ERR_PTR (-ENAMETOOLONG);
 		goto bail;
@@ -109,9 +145,9 @@
 		       dir);
 
 	down_read(&OCFS_I(dir)->ip_io_sem);
-	status = ocfs_find_files_on_disk(osb, dentry->d_name.name, 
-					 dentry->d_name.len, &fe_off, dir, 1, 
-					 &dirent_bh, &dirent);
+	status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+					 dentry->d_name.len, &fe_off,
+					 dir, 1, &dirent_bh, &dirent);
 	up_read(&OCFS_I(dir)->ip_io_sem);
 	if (status < 0)
 		goto bail_add;
@@ -144,7 +180,7 @@
 	__u64 file_off;
 	ocfs_journal_handle *handle = NULL;
 	ocfs_super *osb;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	struct buffer_head *new_fe_bh = NULL;
 	struct inode *inode = NULL;
 
@@ -209,10 +245,11 @@
 
 	fe = OCFS_BH_GET_DATA_READ(new_fe_bh);
 
-	ocfs_populate_inode (inode, fe, mode, 1);
+	ocfs_populate_inode (inode, fe, 1);
 
-	file_off = fe->this_sector;
-	handle->new_file_lockid = fe->this_sector;
+	file_off = fe->i_blkno << osb->sb->s_blocksize_bits;
+	handle->new_file_lockid =
+    		fe->i_blkno << osb->sb->s_blocksize_bits;
 	OCFS_BH_PUT_DATA(new_fe_bh);
 	fe = NULL;
 
@@ -239,12 +276,14 @@
 		de = OCFS_BH_GET_DATA_WRITE(newdirbh);
 		de->inode = cpu_to_le64(file_off);
 		de->name_len = 1;
-		de->rec_len = cpu_to_le16(OCFS_DIR_REC_LEN(de->name_len));
+		de->rec_len =
+			cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
 		strcpy (de->name, ".");
 		ocfs_set_de_type(dir->i_sb, de, S_IFDIR);
 		de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
 		de->inode = cpu_to_le64(GET_INODE_FEOFF(dir));
-		de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-OCFS_DIR_REC_LEN(1));
+		de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
+					  OCFS2_DIR_REC_LEN(1));
 		de->name_len = 2;
 		strcpy (de->name, "..");
 		ocfs_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -264,7 +303,7 @@
 			goto leave;
 		}
 		fe = OCFS_BH_GET_DATA_WRITE(parent_fe_bh);
-		fe->link_cnt++;
+		fe->i_links_count++;
 		OCFS_BH_PUT_DATA(parent_fe_bh);
 		fe = NULL;
 		status = ocfs_journal_dirty(handle, parent_fe_bh);
@@ -313,25 +352,30 @@
  */
 static int ocfs_mknod_locked(ocfs_super *osb, struct inode *dir, 
 			     struct dentry *dentry, int mode, 
-			     ocfs_dev dev, struct buffer_head **new_fe_bh, 
+			     ocfs_dev dev,
+			     struct buffer_head **new_fe_bh, 
 			     struct buffer_head *parent_fe_bh,
 			     ocfs_journal_handle *handle,
 			     struct inode *inode)
 {
 	int status = 0;
-	ocfs_file_entry *fe = NULL;
-	__u64 bitmapOffset = 0;
+	ocfs2_dinode *fe = NULL;
+	ocfs2_extent_list *fel;
+	__u64 disk_off = 0;
 	__u64 fileOffset = 0;
 	struct inode *inode_alloc_inode = NULL;
-	struct super_block *sb = osb->sb;
+	ocfs_node_map just_me;
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
 			dev, dentry->d_name.len, dentry->d_name.name);
 
 	OCFS_ASSERT(new_fe_bh);
 	*new_fe_bh = NULL;
+	
+	ocfs_node_map_init(osb, &just_me);
+	ocfs_node_map_set_bit(&just_me, osb->node_num);
 
-	inode_alloc_inode = igrab(osb->system_inodes[INODE_ALLOC_BITMAP_SYSTEM_INODE]);
+	inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
 	if (!inode_alloc_inode) {
 		status = -EFAIL;
 		LOG_ERROR_STATUS(status);
@@ -339,18 +383,16 @@
 	}
 
 	ocfs_handle_add_inode(handle, inode_alloc_inode);
-	status = ocfs_alloc_node_block (osb, osb->inode_size,
-					&bitmapOffset, &fileOffset, 
-					osb->node_num, DISK_ALLOC_INODE, 
-					handle);
+	status = ocfs_alloc_node_block(osb, osb->sb->s_blocksize,
+			      	       &disk_off, &fileOffset, 
+			       	       osb->node_num, DISK_ALLOC_INODE, 
+		       		       handle);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
 
-	*new_fe_bh = getblk (OCFS_GET_BLOCKDEV(sb), 
-			     bitmapOffset >> sb->s_blocksize_bits, 
-			     sb->s_blocksize);
+	*new_fe_bh = sb_getblk(osb->sb, disk_off >> osb->sb->s_blocksize_bits);
 	if (!*new_fe_bh) {
 		status = -EIO;
 		LOG_ERROR_STATUS(status);
@@ -366,59 +408,46 @@
 	}
 
 	fe = OCFS_BH_GET_DATA_WRITE(*new_fe_bh);
-	memset (fe, 0, osb->sect_size);
-	inode->i_ino = ino_from_off(osb->sb, bitmapOffset);
-	fe->this_sector = bitmapOffset;
-	fe->alloc_file_off = fileOffset;
-	fe->alloc_node = osb->node_num;
-	fe->uid = current->fsuid;
-	fe->gid = current->fsgid;
-	fe->prot_bits = mode & 0007777;
-	if (S_ISCHR (mode) || S_ISBLK (mode)) {
-		fe->dev_major = MAJOR (dev);
-		fe->dev_minor = MINOR (dev);
-	} else {
-		fe->dev_major = MAJOR (dir->i_sb->s_dev);
-		fe->dev_minor = MINOR (dir->i_sb->s_dev);
-	}
+	memset(fe, 0, osb->sb->s_blocksize);
+	inode->i_ino = ino_from_off(osb->sb, disk_off);
+	/*
+	 * FIXME This needs to be
+	 * (node_num << 24) | (nodeconf->gen_counter++ & 0x00FFFFFF)
+	 */
+	inode->i_generation = 0;
+	fe->i_generation = cpu_to_le32(inode->i_generation);
+	fe->i_blkno = disk_off >> osb->sb->s_blocksize_bits;
+	fe->i_suballoc_blkno = fileOffset >> osb->sb->s_blocksize_bits;
+	fe->i_suballoc_node = osb->node_num;
+	fe->i_uid = current->fsuid;
+	fe->i_gid = current->fsgid;
+	fe->i_mode = mode;
+	if (S_ISCHR (mode) || S_ISBLK (mode))
+		fe->id1.dev1.i_rdev = huge_encode_dev(dev);
 
 	if (S_ISLNK (mode) || S_ISDIR (mode) || S_ISREG (mode)) 
 		atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
 
-	if (S_ISLNK (mode))
-		fe->attribs |= OCFS_ATTRIB_SYMLINK;
-	else if (S_ISCHR (mode))
-		fe->attribs |= OCFS_ATTRIB_CHAR;
-	else if (S_ISBLK (mode))
-		fe->attribs |= OCFS_ATTRIB_BLOCK;
-	else if (S_ISFIFO (mode))
-		fe->attribs |= OCFS_ATTRIB_FIFO;
-	else if (S_ISSOCK (mode))
-		fe->attribs |= OCFS_ATTRIB_SOCKET;
-	else if (S_ISDIR (mode))
-		fe->attribs |= OCFS_ATTRIB_DIRECTORY;
-	else
-		fe->attribs |= OCFS_ATTRIB_REG;
-
 	if (S_ISDIR (mode))
-		fe->link_cnt = 2;
+		fe->i_links_count = 2;
 	else
-		fe->link_cnt = 1;
+		fe->i_links_count = 1;
 
-	fe->local_ext = 1;
-	fe->granularity = -1;
-	fe->next_free_ext = 0;
-	fe->last_ext_ptr = 0;
-	strcpy (fe->signature, OCFS_FILE_ENTRY_SIGNATURE);
-	SET_VALID_BIT (fe->sync_flags);
-	DISK_LOCK_SEQNUM (fe) = 0;
-	DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num;
-	DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_ENABLE_CACHE_LOCK;
-	DISK_LOCK_READER_NODE (fe) = osb->node_num;
-	DISK_LOCK_WRITER_NODE (fe) = osb->node_num;
-	DISK_LOCK_OIN_MAP(fe) = (1 << osb->node_num);
-	fe->create_time = fe->modify_time = OCFS_CURRENT_TIME;
-	fe->dir_node_ptr = GET_INODE_FEOFF(dir);
+	fe->i_last_eb_blk = 0;
+	strcpy (fe->i_signature, OCFS2_FILE_ENTRY_SIGNATURE);
+	fe->i_flags |= OCFS2_VALID_FL;
+	DISK_LOCK(fe)->dl_seq_num = 0;
+	DISK_LOCK(fe)->dl_master = osb->node_num;
+	DISK_LOCK(fe)->dl_level = OCFS_DLM_ENABLE_CACHE_LOCK;
+	ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &just_me);
+	fe->i_atime = fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
+	fe->i_dtime = 0;
+
+	fel = &fe->id2.i_list;
+	fel->l_tree_depth = -1;
+	fel->l_next_free_rec = 0;
+	fel->l_count = ocfs2_extent_recs_per_inode(osb->sb);
+
 	OCFS_BH_PUT_DATA(*new_fe_bh);
 	fe = NULL;
 
@@ -432,7 +461,8 @@
 	 * for add_entry. */
 	inode->i_mode = mode;
 
-	status = ocfs_add_entry (handle, dentry, inode, bitmapOffset, parent_fe_bh);
+	status = ocfs_add_entry(handle, dentry, inode, disk_off,
+				parent_fe_bh);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
@@ -519,7 +549,7 @@
 	ocfs_super *osb = OCFS_SB(dir->i_sb);
 	__u64 fe_off = GET_INODE_FEOFF(inode);
 	struct inode *parentInode = dentry->d_parent->d_inode;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	__u32 lockFlags = (S_ISDIR (inode->i_mode) ? (FLAG_RELEASE_DENTRY | FLAG_DIR) : FLAG_RELEASE_DENTRY);
 	struct buffer_head *fe_bh = NULL;
 	struct buffer_head *parent_node_bh = NULL; /* parent locknode */
@@ -568,10 +598,11 @@
 	got_parent = 1;
 
 	/* this will re-read the directory now with the EXCLUSIVE */
-	/* lock already held */
-	status = ocfs_find_files_on_disk (osb, dentry->d_name.name, 
-					  dentry->d_name.len, &fe_off, 
-					  parentInode, 0, &dirent_bh, &dirent);
+	/* lock already held; it will also return the fe_off to us */
+	status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+					 dentry->d_name.len, &fe_off,
+					 parentInode, 0, &dirent_bh,
+					 &dirent);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto leave;
@@ -619,17 +650,17 @@
 	}
 
 	fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-	if (fe->link_cnt != inode->i_nlink) {
-		printk("ocfs_unlink: hmm, inode has nlink = %u, fe has link_cnt = %u. Setting inode from fe.\n", inode->i_nlink, fe->link_cnt);
-		inode->i_nlink = fe->link_cnt;
+	if (fe->i_links_count != inode->i_nlink) {
+		printk("ocfs_unlink: hmm, inode has nlink = %u, fe has link_cnt = %u. Setting inode from fe.\n",
+		       inode->i_nlink, fe->i_links_count);
+		inode->i_nlink = fe->i_links_count;
 	}
 
 	if (S_ISDIR (inode->i_mode))
-		fe->link_cnt = 0;
+		fe->i_links_count = 0;
 	else
-		fe->link_cnt--;
-
-	if (!fe->link_cnt) {
+		fe->i_links_count--;
+	if (!fe->i_links_count) {
 		status = ocfs_orphan_add(osb, handle, inode, fe);
 		if (status < 0) {
 			OCFS_BH_PUT_DATA(fe_bh);
@@ -653,7 +684,7 @@
 	}
 
 	if (S_ISDIR (inode->i_mode)) {
-		ocfs_file_entry *dirfe;
+		ocfs2_dinode *dirfe;
 		status = ocfs_journal_access(handle, parent_node_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
@@ -661,7 +692,7 @@
 			goto leave;
 		}
 		dirfe = OCFS_BH_GET_DATA_WRITE(parent_node_bh);
-		dirfe->link_cnt--;
+		dirfe->i_links_count--;
 		OCFS_BH_PUT_DATA(parent_node_bh);
 		status = ocfs_journal_dirty(handle, fe_bh);
 		if (status < 0) {
@@ -855,10 +886,8 @@
 	int status = 0;
 	struct inode *old_inode = old_dentry->d_inode;
 	struct inode *new_inode = new_dentry->d_inode;
-	ocfs_file_entry *newfe = NULL, *oldfe = NULL;
-	struct buffer_head *oldfe_bh = NULL;
+	ocfs2_dinode *newfe = NULL;
 	struct buffer_head *newfe_bh = NULL;
-	struct buffer_head *insert_bh = NULL;
 	ocfs_super *osb = NULL;
 	__u64 oldfe_lockid;
 	__u64 newfe_lockid = 0;
@@ -970,14 +999,14 @@
 			goto finally;
 		status = -EMLINK;
 		if (!new_inode && new_dir!=old_dir &&
-		    new_dir->i_nlink >= OCFS_LINK_MAX)
+		    new_dir->i_nlink >= OCFS2_LINK_MAX)
 			goto finally;
 	}
 	
 	status = -ENOENT;
-	old_de_bh = ocfs_find_entry (old_dentry->d_name.name,
-				     old_dentry->d_name.len,
-				     old_dir, &old_de);
+	old_de_bh = ocfs_find_entry(old_dentry->d_name.name, 
+				    old_dentry->d_name.len,
+				    old_dir, &old_de);
 	if (!old_de_bh)
 		goto finally;
 
@@ -992,9 +1021,10 @@
 
 	/* check if the target already exists (in which case we need
 	 * to delete it */
-	status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name, 
-					 new_dentry->d_name.len, &newfe_lockid,
-					 new_dir, 0, &new_de_bh, &new_de);
+	status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name,
+					 new_dentry->d_name.len, 
+					 &newfe_lockid, new_dir, 0,
+					 &new_de_bh, &new_de);
 	/* The only error we allow here is -ENOENT because the new
 	 * file not existing is perfectly valid. */
 	if ((status < 0) && (status != -ENOENT)) {
@@ -1019,7 +1049,7 @@
 
 		if (S_ISDIR(new_inode->i_mode))
 			newfe_flags = FLAG_DIR;
-		status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK, 
+		status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
 					   newfe_flags, &newfe_bh, 
 					   new_inode);
 		if (status < 0) {
@@ -1057,12 +1087,12 @@
 
 		newfe = OCFS_BH_GET_DATA_WRITE(newfe_bh);
 		if (S_ISDIR (new_inode->i_mode))
-			newfe->link_cnt = 0;
+			newfe->i_links_count = 0;
 		else
-			newfe->link_cnt--;
+			newfe->i_links_count--;
 
-		if (!newfe->link_cnt) {
-			status = ocfs_orphan_add(osb, handle, new_inode, 
+		if (!newfe->i_links_count) {
+			status = ocfs_orphan_add(osb, handle, new_inode,
 						 newfe);
 			if (status < 0) {
 				OCFS_BH_PUT_DATA(newfe_bh);
@@ -1192,15 +1222,8 @@
 		iput(new_inode);
 	}
 
-	if (oldfe_bh) {
-		if (oldfe)
-			OCFS_BH_PUT_DATA(oldfe_bh);
-		brelse(oldfe_bh);
-	}
 	if (newfe_bh)
 		brelse(newfe_bh);
-	if (insert_bh)
-		brelse(insert_bh);
 	if (old_dir_bh)
 		brelse(old_dir_bh);
 	if (new_dir_bh)
@@ -1231,7 +1254,7 @@
 	int l;
 	struct buffer_head *new_fe_bh = NULL;
 	struct buffer_head *parent_fe_bh = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	ocfs_journal_handle *handle = NULL;
 	int got_lock = 0;
 
@@ -1289,11 +1312,11 @@
 	}
 
 	fe = OCFS_BH_GET_DATA_READ(new_fe_bh);
-	file_off = fe->this_sector;
+	file_off = fe->i_blkno << osb->sb->s_blocksize_bits;
 	OCFS_BH_PUT_DATA(new_fe_bh);
 	fe = NULL;
 
-	status = ocfs_extend_file (osb, newsize, file_off, handle, inode, NULL);
+	status = ocfs_extend_file (osb, newsize, file_off, handle, inode, NULL, 0, new_fe_bh);
 	if (status < 0) {
 		if (status != -ENOSPC && status != -EINTR) {
 			LOG_ERROR_ARGS ("Failed to extend file to %llu", newsize);
@@ -1305,12 +1328,12 @@
 	inode->i_rdev = OCFS_NODEV;
 
 	fe = OCFS_BH_GET_DATA_READ(new_fe_bh);
-	ocfs_populate_inode (inode, fe, S_IFLNK | S_IRWXUGO, 1);
+	ocfs_populate_inode (inode, fe, 1);
 	OCFS_BH_PUT_DATA(new_fe_bh);
 	fe = NULL;
 
 	inode->i_size = newsize;
-	inode->i_blocks = (newsize + sb->s_blocksize) >> sb->s_blocksize_bits;
+	inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 
 	ocfs_init_lockres (osb, inode);
 	status = ocfs_inode_notify_open(osb, new_fe_bh, handle, inode);
@@ -1325,6 +1348,7 @@
 		goto abort_trans;
 	}
 
+
 abort_trans:
 	if (status < 0) {
 		ocfs_abort_trans(handle);
@@ -1349,7 +1373,6 @@
 
 bail:
 	up_write(&OCFS_I(dir)->ip_io_sem);
-
 	if (new_fe_bh) {
 		if (fe)
 			OCFS_BH_PUT_DATA(new_fe_bh);
@@ -1369,11 +1392,11 @@
 	const char * error_msg = NULL;
 	const int rlen = le16_to_cpu(de->rec_len);
 	
-	if (rlen < OCFS_DIR_REC_LEN(1))
+	if (rlen < OCFS2_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
 	else if (rlen % 4 != 0)
 		error_msg = "rec_len % 4 != 0";
-	else if (rlen < OCFS_DIR_REC_LEN(de->name_len))
+	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
 		error_msg = "rec_len is too small for name_len";
 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
 		error_msg = "directory entry across blocks";
@@ -1410,7 +1433,7 @@
 	struct super_block * sb;
 	int retval, status;
 	char *buf = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 
 	sb = dir->i_sb;
 
@@ -1419,7 +1442,7 @@
 	bh = ocfs_bread (handle, dir, 0, 0, &retval, 0);
 	if (!bh)
 		return retval;
-	rec_len = OCFS_DIR_REC_LEN(namelen);
+	rec_len = OCFS2_DIR_REC_LEN(namelen);
 	offset = 0;
 	de = (struct ocfs2_dir_entry *) bh->b_data;
 	while (1) {
@@ -1449,8 +1472,8 @@
 				/* update the parent file entry file size */
 				status = ocfs_journal_access(handle, parent_fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
 				buf = OCFS_BH_GET_DATA_WRITE(parent_fe_bh);
-				fe = (ocfs_file_entry *)buf;
-				fe->file_size = dir->i_size;
+				fe = (ocfs2_dinode *)buf;
+				fe->i_size = dir->i_size;
 				OCFS_BH_PUT_DATA(parent_fe_bh);
 				buf = NULL;
 				status = ocfs_journal_dirty(handle, parent_fe_bh);
@@ -1472,8 +1495,10 @@
 			retval = -EEXIST;
 			goto bail;
 		}
-		if ((le64_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) ||
-		    (le16_to_cpu(de->rec_len) >= OCFS_DIR_REC_LEN(de->name_len) + rec_len)) {
+		if (((le64_to_cpu(de->inode) == 0) &&
+		     (le16_to_cpu(de->rec_len) >= rec_len)) ||
+		    (le16_to_cpu(de->rec_len) >=
+		     (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
 			OCFS_BH_PUT_DATA(bh);
 			buf = NULL;
 			status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
@@ -1482,14 +1507,14 @@
 			offset += le16_to_cpu(de->rec_len);
 			if (le64_to_cpu(de->inode)) {
 				de1 = (struct ocfs2_dir_entry *) ((char *) de +
-					OCFS_DIR_REC_LEN(de->name_len));
+					OCFS2_DIR_REC_LEN(de->name_len));
 				de1->rec_len =
 					cpu_to_le16(le16_to_cpu(de->rec_len) -
-					OCFS_DIR_REC_LEN(de->name_len));
-				de->rec_len = cpu_to_le16(OCFS_DIR_REC_LEN(de->name_len));
+					OCFS2_DIR_REC_LEN(de->name_len));
+				de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
 				de = de1;
 			}
-			de->file_type = OCFS_FT_UNKNOWN;
+			de->file_type = OCFS2_FT_UNKNOWN;
 			if (inode_off) {
 				de->inode = cpu_to_le64(inode_off);
 				ocfs_set_de_type(dir->i_sb, de, inode->i_mode);
@@ -1532,7 +1557,7 @@
 
 	i = 0;
 	pde = NULL;
-	de = (struct ocfs2_dir_entry *) OCFS_BH_GET_DATA_READ(bh);
+	de = OCFS_BH_GET_DATA_READ(bh);
 	while (i < bh->b_size) {
 		if (!ocfs_check_dir_entry(dir, de, bh, i)) {
 			OCFS_BH_PUT_DATA(bh);
@@ -1613,9 +1638,9 @@
 
 
 
-struct buffer_head * ocfs_find_entry (const char *name, int namelen, 
-				      struct inode *dir, 
-				      struct ocfs2_dir_entry ** res_dir)
+struct buffer_head *ocfs_find_entry(const char *name, int namelen, 
+				    struct inode *dir, 
+				    struct ocfs2_dir_entry **res_dir)
 {
 	struct super_block * sb;
 	struct buffer_head * bh_use[NAMEI_RA_SIZE];
@@ -1631,7 +1656,7 @@
 	*res_dir = NULL;
 	sb = dir->i_sb;
 
-	nblocks = dir->i_size >> OCFS_SB(sb)->sect_size_bits;
+	nblocks = dir->i_size >> sb->s_blocksize_bits;
 	start = OCFS_I(dir)->i_dir_start_lookup;
 	if (start >= nblocks)
 		start = 0;
@@ -1675,7 +1700,7 @@
 			goto next;
 		}
 		i = search_dirblock(bh, dir, name, namelen,
-			    block << OCFS_SB(sb)->sect_size_bits, res_dir);
+				    block << sb->s_blocksize_bits, res_dir);
 		if (i == 1) {
 			OCFS_I(dir)->i_dir_start_lookup = block;
 			ret = bh;
@@ -1695,7 +1720,7 @@
 	 * search the last part of the directory before giving up.
 	 */
 	block = nblocks;
-	nblocks = dir->i_size >> OCFS_SB(sb)->sect_size_bits;
+	nblocks = dir->i_size >> sb->s_blocksize_bits;
 	if (block < nblocks) {
 		start = 0;
 		goto restart;
@@ -1712,22 +1737,27 @@
  * ocfs_orphan_add()
  *
  */
-static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle, 
-			   struct inode *inode, ocfs_file_entry *fe)
+static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
+			   struct inode *inode, ocfs2_dinode *fe)
 {
 	struct inode *orphan_dir_inode = NULL;
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 	char *name = NULL;
 	int namelen;
-	ocfs_file_entry *orphan_fe;
+	ocfs2_dinode *orphan_fe;
 
 	LOG_ENTRY_ARGS("(inode->i_ino = %lu)\n", inode->i_ino);
 
 	/* create a unique name here. */
-	name = ocfs_malloc(OCFS_MAX_FILENAME_LENGTH+1);
+	name = kmalloc(OCFS2_MAX_FILENAME_LENGTH+1, GFP_KERNEL);
+	if (!name) {
+		status = -EFAIL;
+		LOG_ERROR_STATUS(status);
+		goto leave;
+	}
 
-	namelen = snprintf(name, OCFS_MAX_FILENAME_LENGTH+1, "%llu", 
+	namelen = snprintf(name, OCFS2_MAX_FILENAME_LENGTH+1, "%llu", 
 			   GET_INODE_FEOFF(inode));
 	if (namelen <= 0) {
 		if (namelen)
@@ -1780,8 +1810,8 @@
 	 * underneath us... */
 	orphan_fe = OCFS_BH_GET_DATA_WRITE(orphan_dir_bh);
 	if (S_ISDIR(inode->i_mode))
-		orphan_fe->link_cnt++;
-	orphan_dir_inode->i_nlink = orphan_fe->link_cnt;
+		orphan_fe->i_links_count++;
+	orphan_dir_inode->i_nlink = orphan_fe->i_links_count;
 	OCFS_BH_PUT_DATA(orphan_dir_bh);
 
 	status = ocfs_journal_dirty(handle, orphan_dir_bh);
@@ -1791,14 +1821,14 @@
 	}
 
 
-	OCFS_SET_FLAG (fe->sync_flags, OCFS_SYNC_FLAG_ORPHANED);
+	fe->i_flags |= OCFS2_ORPHANED_FL;
 
 leave:
 	if (orphan_dir_inode)
 		iput(orphan_dir_inode);
 
 	if (name)
-		ocfs_free(name);
+		kfree(name);
 
 	if (orphan_dir_bh)
 		brelse(orphan_dir_bh);
@@ -1814,14 +1844,19 @@
 {
 	char *name = NULL;
 	int namelen;
-	ocfs_file_entry *orphan_fe;
+	ocfs2_dinode *orphan_fe;
 	int status = 0;
 	struct buffer_head *target_de_bh;
 	struct ocfs2_dir_entry *target_de = NULL;
 
-	name = ocfs_malloc(OCFS_MAX_FILENAME_LENGTH+1);
+	name = kmalloc(OCFS2_MAX_FILENAME_LENGTH+1, GFP_KERNEL);
+	if (!name) {
+		status = -EFAIL;
+		LOG_ERROR_STATUS(status);
+		goto leave;
+	}
 
-	namelen = snprintf(name, OCFS_MAX_FILENAME_LENGTH+1, "%llu", 
+	namelen = snprintf(name, OCFS2_MAX_FILENAME_LENGTH+1, "%llu", 
 			   GET_INODE_FEOFF(inode));
 	if (namelen <= 0) {
 		if (namelen)
@@ -1862,8 +1897,8 @@
 	/* do the i_nlink dance! :) */
 	orphan_fe = OCFS_BH_GET_DATA_WRITE(orphan_dir_bh);
 	if (S_ISDIR(inode->i_mode))
-		orphan_fe->link_cnt--;
-	orphan_dir_inode->i_nlink = orphan_fe->link_cnt;
+		orphan_fe->i_links_count--;
+	orphan_dir_inode->i_nlink = orphan_fe->i_links_count;
 	OCFS_BH_PUT_DATA(orphan_dir_bh);
 
 	status = ocfs_journal_dirty(handle, orphan_dir_bh);
@@ -1874,7 +1909,7 @@
 
 leave:
 	if (name)
-		ocfs_free(name);
+		kfree(name);
 
 	return status;
 }
@@ -1890,12 +1925,12 @@
 	int tmpstat;
 	struct buffer_head *bh = NULL;
 	struct buffer_head *parent_fe_bh = NULL;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	if (inode->i_nlink >= OCFS_LINK_MAX)
+	if (inode->i_nlink >= OCFS2_LINK_MAX)
 		return -EMLINK;
 
 #define OCFS_DATA_TRANS_BLOCKS          (3 * 8 - 2)
@@ -1933,8 +1968,8 @@
 		goto bail;
 
 	fe = OCFS_BH_GET_DATA_WRITE(bh);
-	fe->link_cnt++;
-	inode->i_nlink = fe->link_cnt;
+	fe->i_links_count++;
+	inode->i_nlink = fe->i_links_count;
 	inode->i_ctime = CURRENT_TIME;
 	atomic_inc(&inode->i_count);
 	OCFS_BH_PUT_DATA(bh);

Added: trunk/src/namei.h
===================================================================
--- trunk/src/namei.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/namei.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,63 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * namei.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_NAMEI_H
+#define OCFS2_NAMEI_H
+
+int ocfs_check_dir_entry (struct inode *dir,
+			  struct ocfs2_dir_entry *de,
+			  struct buffer_head *bh, unsigned long offset);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+int ocfs_create (struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
+#else
+int ocfs_create (struct inode *dir, struct dentry *dentry, int mode);
+#endif
+struct buffer_head *ocfs_find_entry(const char *name, int namelen,
+				    struct inode *dir,
+				    struct ocfs2_dir_entry **res_dir);
+int ocfs_link(struct dentry *old_dentry, struct inode *dir,
+	      struct dentry *dentry);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+struct dentry *ocfs_lookup(struct inode *dir, struct dentry *dentry,
+			   struct nameidata *nd);
+#else
+struct dentry *ocfs_lookup(struct inode *dir, struct dentry *dentry);
+#endif
+int ocfs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+int ocfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+	       ocfs_dev dev);
+int ocfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry);
+int ocfs_symlink(struct inode *dir, struct dentry *dentry,
+		 const char *symname);
+int ocfs_unlink(struct inode *dir, struct dentry *dentry);
+int ocfs_orphan_del(ocfs_super *osb, ocfs_journal_handle *handle,
+		    struct inode *orphan_dir_inode, struct inode *inode,
+		    struct buffer_head *orphan_dir_bh);
+
+#endif /* OCFS2_NAMEI_H */

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/nm.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * nm.c
  *
  * net and disk process vote, nm thread, etc.
@@ -24,12 +26,31 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/bitops.h>
+#include <linux/net.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "dlm.h"
+#include "heartbeat.h"
+#include "inode.h"
+#include "lockres.h"
+#include "nm.h"
+#include "util.h"
+#include "volcfg.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_NM
 
@@ -169,7 +190,7 @@
 	ocfs_process_vote(osb, &(sv->ctxt));
 
 	/* we should free this when done. */
-	ocfs_free(sv);
+	kfree(sv);
 
 	LOG_EXIT();
 	return;
@@ -211,7 +232,7 @@
 
 	ctxt = &sv->ctxt;
 	ctxt->u.publish = (ocfs_publish *) &sv->publish_sect;
-	memcpy(ctxt->u.publish, publish, osb->sect_size);
+	memcpy(ctxt->u.publish, publish, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(bh);
 
 	ctxt->request_method = DISK_VOTE;
@@ -231,8 +252,8 @@
 	/* if no error, then the workqueue should clear it? */
 	if ((status < 0) && ctxt) {
 		if (ctxt->u.publish)
-			ocfs_free(ctxt->u.publish);
-		ocfs_free(ctxt);
+			kfree(ctxt->u.publish);
+		kfree(ctxt);
 	}
 
 	LOG_EXIT_STATUS(status);
@@ -257,11 +278,10 @@
 	__u32 i;
 	unsigned long j;
 	__u32 highest_vote_node;
-	__u64 offset = 0;
+	__u64 offset = 0, len = 0;
 	__u32 num_nodes = 0;
 	__u32 vote_node;
 	ocfs_node_config_hdr *node_cfg_hdr = NULL;
-	__u64 curr_node_map;
 	__u64 cfg_seq_num;
 	int which;
 	struct buffer_head *bh = NULL;
@@ -286,9 +306,6 @@
 		vote_node = OCFS_INVALID_NODE_NUM;
 		highest_vote_node = 0;
 
-		if (OcfsGlobalCtxt.hbm == 0)
-			OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM;
-
 		if (!time_after (jiffies, (unsigned long) (osb->hbt)))
 			goto finally;
 
@@ -296,17 +313,18 @@
 		down (&(osb->publish_lock));
 
 		/* Get the Publish Sector start Offset */
-		offset = osb->vol_layout.new_cfg_off;
+		offset = osb->new_autoconfig_blkno << osb->sb->s_blocksize_bits;
+		len = osb->total_autoconfig_blocks << osb->sb->s_blocksize_bits;
 
-		/* Read disk for Publish Sectors of all nodes */
-		status = ocfs_read_bhs(osb, offset, osb->cfg_len, osb->cfg_bhs, 0, NULL);
+		/* Read disk for 4 autoconfig blocks + all nodes publish blocks */
+		status = ocfs_read_bhs(osb, offset, len, osb->autoconfig_bhs, 0, NULL);
 		if (status < 0) {
 			up (&(osb->publish_lock));
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
-		bh = osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num];
+		bh = osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num];
 		publish = OCFS_BH_GET_DATA_READ(bh);
 		if ((osb->check_mounted) && (publish->mounted == 0)) {
 			printk("ocfs2: Heartbeat timed out, volume has been "
@@ -323,10 +341,10 @@
 		up (&(osb->publish_lock));
 
 		/* If another node was added to the config read and update the cfg */
-		node_cfg_hdr = OCFS_BH_GET_DATA_READ(osb->cfg_bhs[1]);
+		node_cfg_hdr = OCFS_BH_GET_DATA_READ(osb->autoconfig_bhs[1]);
 		num_nodes = node_cfg_hdr->num_nodes;
 		cfg_seq_num = node_cfg_hdr->cfg_seq_num;
-		OCFS_BH_PUT_DATA(osb->cfg_bhs[1]);
+		OCFS_BH_PUT_DATA(osb->autoconfig_bhs[1]);
 
 		if ((osb->cfg_seq_num != cfg_seq_num) ||
 		    (osb->num_cfg_nodes != num_nodes)) {
@@ -339,10 +357,10 @@
 			}
 		}
 
-		num_nodes = OCFS_MAXIMUM_NODES;
+		num_nodes = osb->max_nodes;
 
 		/* Refresh the publish map */
-		ocfs_update_publish_map (osb, &(osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS]), 0);
+		ocfs_update_publish_map (osb, &(osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS]), 0);
 
 		/* send signal to mount thread to continue */
 		if (atomic_read (&osb->nm_init) < OCFS_HEARTBEAT_INIT) {
@@ -352,21 +370,19 @@
 			atomic_inc (&osb->nm_init);
 		}
 
-		LOG_TRACE_ARGS ("Publish map: 0x%08X\n", (__u32)(osb->publ_map & 0xFFFFFFFFULL));
-
-		/* map of local node */
-		curr_node_map = (__u64) ((__u64)1 << osb->node_num);
-
 		/* Check for the highest node looking for a vote, if anybody is looking */
 		for (i = 0, which = OCFS_VOLCFG_NEWCFG_SECTORS; i < num_nodes; i++, which++) {
-			publish = OCFS_BH_GET_DATA_READ(osb->cfg_bhs[which]);
+			ocfs_node_map vote_map;
+			publish = OCFS_BH_GET_DATA_READ(osb->autoconfig_bhs[which]);
 
 			if (publish->time == (__u64) 0)
 				goto loop;
 
-			if (publish->vote != FLAG_VOTE_NODE ||
-			    !(publish->vote_map & curr_node_map))
+			if (publish->vote != FLAG_VOTE_NODE)
 				goto loop;
+			ocfs_get_publish_vote_map(osb, publish, &vote_map);
+			if (!ocfs_node_map_test_bit(&vote_map, osb->node_num))
+				goto loop;
 
 			LOG_TRACE_ARGS ("node(%u): vote=%d dirty=%d type=%u\n",
 					i, publish->vote, publish->dirty, 
@@ -375,11 +391,11 @@
 			highest_vote_node = i;
 
 			/* Check if the node is alive or not */
-			if (IS_NODE_ALIVE (osb->publ_map, highest_vote_node, 
-					   num_nodes)) {
+			if (ocfs_node_is_alive(&osb->publ_map,
+					       highest_vote_node)) {
 				vote_node = highest_vote_node;
 			} else {
-				OCFS_BH_PUT_DATA(osb->cfg_bhs[which]);
+				OCFS_BH_PUT_DATA(osb->autoconfig_bhs[which]);
 				publish = NULL;
 				/* I guess we don't even have to do
 				 * anything here... */
@@ -390,7 +406,7 @@
 			}
 loop:
 			publish = NULL;
-			OCFS_BH_PUT_DATA(osb->cfg_bhs[which]);
+			OCFS_BH_PUT_DATA(osb->autoconfig_bhs[which]);
 		}
 
 
@@ -399,7 +415,7 @@
 			__s32 voted;
 			LOG_TRACE_ARGS("vote_node = %d\n", vote_node);
 
-			bh = osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS 
+			bh = osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS 
 					  + osb->node_num];
 			down(&(osb->publish_lock));
 
@@ -422,7 +438,7 @@
 			up(&(osb->publish_lock));
 
 			which = vote_node + OCFS_VOLCFG_NEWCFG_SECTORS;
-			bh = osb->cfg_bhs[which];
+			bh = osb->autoconfig_bhs[which];
 
 			status= ocfs_schedule_process_vote(osb, bh, vote_node);
 			if (status < 0) {
@@ -444,23 +460,10 @@
 		}
 		set_current_state (TASK_INTERRUPTIBLE);
 		schedule_timeout (osb->hbt - j);
-		/* this should all just be in some common function as
-		 * it's totally duplicated. */
+
+		/* ignore the actual signal */
 		if (signal_pending(current)) {
-			/* ignore the actual signal */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 			dequeue_signal_lock(current, &current->blocked, &info);
-#else
-#ifdef HAVE_NPTL
-			spin_lock_irq (&current->sighand->siglock);
-			dequeue_signal(&current->blocked, &info);
-			spin_unlock_irq(&current->sighand->siglock);
-#else 
-			spin_lock_irq(&current->sigmask_lock);
-			dequeue_signal(&current->blocked, &info);
-			spin_unlock_irq(&current->sigmask_lock);
-#endif /* !HAVE_NPTL */
-#endif /* 2.4.x kernel */
 		}
 	}
 
@@ -500,9 +503,11 @@
 			return INVALID_REQUEST;
 		}
 	} else if (lockres) {
-		*master_alive = lockres->master_node_num != OCFS_INVALID_NODE_NUM &&
-			IS_NODE_ALIVE(osb->publ_map, 
-			 lockres->master_node_num, OCFS_MAXIMUM_NODES);
+		*master_alive =
+			(lockres->master_node_num !=
+			 OCFS_INVALID_NODE_NUM) &&
+			ocfs_node_is_alive(&osb->publ_map, 
+					   lockres->master_node_num);
 
 		// if an outstanding vote request is found on this lockid
 		// and this node number is higher, this node wins
@@ -719,9 +724,9 @@
 	int status = 0;
 	int tmpstat = 0;
 	ocfs_lock_res *lockres = NULL;
-	__u32 flags, num_nodes, i;
+	__u32 flags, num_nodes;
 	__u64 offset;
-	ocfs_file_entry *fe = NULL;
+	ocfs2_dinode *fe = NULL;
 	ocfs_vote *vote = NULL;
 	struct buffer_head *fe_bh = NULL, *vote_bh = NULL;
 	int vote_type = INVALID_REQUEST, vote_response = 0;
@@ -738,6 +743,7 @@
 	__u32 node_num = ctxt->node_num;
 	__u64 lock_id, seq_num;
 	int needs_trunc = 0;
+	ocfs_node_map disk_map;
 
 	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, ctxt);
 
@@ -751,10 +757,10 @@
 		return status;
 	}
 
-	num_nodes = OCFS_MAXIMUM_NODES;
+	num_nodes = osb->max_nodes;
 	if (disk_vote) {
 		flags = publish->vote_type;
-		lock_id = publish->dir_ent;
+		lock_id = publish->lock_id;
 		seq_num = publish->publ_seq_num;
 	} else {
 		ocfs_dlm_req_master *req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
@@ -762,13 +768,12 @@
 		lock_id = req_master->lock_id;
 		seq_num = req_master->lock_seq_num;
 	}
-	lockflags = (lock_id >= osb->vol_layout.bitmap_off ? OCFS_BH_CACHED : 0);
 	
 	LOG_TRACE_ARGS ("node=%u, id=%llu, seq=%llu\n", node_num,
 			lock_id, seq_num);
 
 	if (disk_vote) {
-		offset = osb->vol_layout.vote_sect_off + (osb->node_num * osb->sect_size);
+		offset = (osb->vote_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
 		status = ocfs_read_bh(osb, offset, &vote_bh, 0, NULL);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
@@ -819,12 +824,14 @@
 		}
 	}
 
+	lockflags = (IS_SYSTEM_FILE_INODE(inode) ? 0 : OCFS_BH_CACHED);
+
 	/* fail here if no inode, unless this is a delete/rename release */
 	vote_type = get_process_vote_action(osb, lockres, node_num, flags, 
 					    status, &master_alive, inode);
 
 #ifdef VERBOSE_PROCESS_VOTE
-	printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s\n", ocfs_getpid(),
+	printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s\n", current->pid,
 	       flags & FLAG_RELEASE_LOCK ? "RELEASE" : 
 	       (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
  	       vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote" );
@@ -844,8 +851,8 @@
 		/* Zero out the vote for everybody, if any already set
 		 * and hung */
 		vote = OCFS_BH_GET_DATA_WRITE(vote_bh);
-		for (i = 0; i < num_nodes; i++)
-			vote->vote[i] = 0;
+		vote->node = (__u8)-1;
+		vote->type = 0;
 		OCFS_BH_PUT_DATA(vote_bh);
 		status = ocfs_write_bh(osb, vote_bh, 0, NULL);
 		if (status < 0) {
@@ -980,8 +987,8 @@
 				LOG_ERROR_STR("Wowzers, how'd I hit this code:"
 					      " master node is invalid!?!");
 			}
-			if (lockres->master_node_num == osb->node_num &&
-			    lockres->lock_type < OCFS_DLM_EXCLUSIVE_LOCK) {
+			if ((lockres->master_node_num == osb->node_num) &&
+			    (lockres->lock_type < OCFS_DLM_EXCLUSIVE_LOCK)) {
 				LOG_ERROR_STR("Wowzers, how'd I hit this "
 					      "code!?!");
 			}
@@ -1004,7 +1011,7 @@
 #endif
 #endif
 			}
-			lockres->readonly_map |= (1 << node_num);
+			ocfs_node_map_set_bit(&lockres->readonly_map, node_num);
 			lockres->readonly_node = osb->node_num;
 			vote_response = FLAG_VOTE_NODE;
 			status = 0;
@@ -1041,12 +1048,12 @@
 			 * map that lock will be changing to RW before we
 			 * continue.  RETRY this request while we spawn 
 			 * off a thread to collect up the communication */
-			if (lockres->readonly_map != 0ULL) {
+			if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
 				// assumption: node asking for vote has already dropped readonly_node
-				lockres->readonly_map &= ~(1 << node_num);
+				ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
 				// should not be in there, but...
-				lockres->readonly_map &= ~(1 << osb->node_num);
-				if (lockres->readonly_map != 0ULL) {
+				ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
+				if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
 					OCFS_ASSERT(lockres->readonly_node == osb->node_num);
 					OCFS_ASSERT(inode);
 					status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
@@ -1085,14 +1092,18 @@
 			}
 	
 			fe = OCFS_BH_GET_DATA_READ(fe_bh);
-			is_dir = fe->attribs & OCFS_ATTRIB_DIRECTORY;
-			is_locked = DISK_LOCK_FILE_LOCK (fe) > OCFS_DLM_NO_LOCK;
+			is_dir = S_ISDIR(fe->i_mode);
+			is_locked = DISK_LOCK(fe)->dl_level > OCFS_DLM_NO_LOCK;
 			if (vote_type == CHANGE_MASTER) {
 				OCFS_BH_PUT_DATA(fe_bh);
 				fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-				if (inode && OCFS_I(inode)->open_hndl_cnt)
-					DISK_LOCK_OIN_MAP (fe) |= (1 << osb->node_num);
-				DISK_LOCK_CURRENT_MASTER (fe) = node_num;
+				if (inode && OCFS_I(inode)->open_hndl_cnt) {
+					ocfs_node_map_init(osb, &disk_map);
+					ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+					ocfs_node_map_set_bit(&disk_map, osb->node_num);
+					ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+				}
+				DISK_LOCK(fe)->dl_master = node_num;
 			}
 			OCFS_BH_PUT_DATA(fe_bh);
 
@@ -1104,7 +1115,7 @@
 				if (vote_type == RELEASE_CACHE) {
 					LOG_TRACE_STR("release cache vote, setting to NO_LOCK");
 					fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-					DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+					DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
 					OCFS_BH_PUT_DATA(fe_bh);
 				}
 				status = ocfs_write_bh(osb, fe_bh, 0, inode);
@@ -1134,16 +1145,16 @@
 			}
 			fe = OCFS_BH_GET_DATA_READ(fe_bh);
 
-			if (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID)) {
+			if (!(fe->i_flags & OCFS2_VALID_FL)) {
 				vote_response = FLAG_VOTE_FILE_DEL;
 				OCFS_BH_PUT_DATA(fe_bh);
 			} else {
-				__u64 tmpmap;
-			
+				ocfs_node_map_init(osb, &disk_map);
 				OCFS_BH_PUT_DATA(fe_bh);
 				fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-				DISK_LOCK_OIN_MAP (fe) |= (1 << node_num);
-				tmpmap = DISK_LOCK_OIN_MAP (fe);
+				ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+				ocfs_node_map_set_bit(&disk_map, node_num);
+				ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
 				OCFS_BH_PUT_DATA(fe_bh);
 
 				/* Write new map on the disk */
@@ -1155,7 +1166,7 @@
 				}
 
 				/* Add this node to the oin map on the file entry */
-				lockres->oin_openmap = tmpmap;
+				ocfs_node_map_set(&lockres->oin_openmap, &disk_map);
 				vote_response = FLAG_VOTE_NODE;
 			}
 			brelse(fe_bh);
@@ -1179,15 +1190,16 @@
 			 * even in the error case.  do nothing for error. */	
 			if (lockres->master_node_num != node_num ||
 			    lockres->lock_type != OCFS_DLM_ENABLE_CACHE_LOCK ||
-			    lockres->readonly_map != 0ULL)
-				LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d map=%16llX ronode=%d\n",
+			    !ocfs_node_map_is_empty(&lockres->readonly_map))
+				LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d ronode=%d\n",
 				       lockres->master_node_num, node_num, lockres->lock_type, 
-				       lockres->readonly_map, lockres->readonly_node);
+				       lockres->readonly_node);
 			else
 				lockres->readonly_node = OCFS_INVALID_NODE_NUM;
 
 			status = 0;
 			vote_response = FLAG_VOTE_NODE;
+			inc_inode_seq = 1;
 			break;
 			
 		case NOT_MASTER:
@@ -1244,10 +1256,11 @@
 
 	if (disk_vote) {
 		vote = OCFS_BH_GET_DATA_WRITE(vote_bh);
-		vote->dir_ent = lock_id;
+		vote->lock_id = lock_id;
 		vote->vote_seq_num = seq_num;
 		vote->open_handle = open_handle;
-		vote->vote[node_num] = vote_response;
+		vote->node = node_num;
+		vote->type = vote_response;
 		OCFS_BH_PUT_DATA(vote_bh);
 		status = ocfs_write_bh(osb, vote_bh, 0, NULL);
 		brelse(vote_bh);
@@ -1256,7 +1269,7 @@
 	}
 
 #ifdef VERBOSE_PROCESS_VOTE
-	printk("(%u) vote: lockid=%llu, node=%d, seqnum=%llu, response=%d, open_handle=%s\n",ocfs_getpid(), lock_id, node_num, seq_num, vote_response, open_handle?"yes":"no");
+	printk("(%u) vote: lockid=%llu, node=%d, seqnum=%llu, response=%d, open_handle=%s\n",current->pid, lock_id, node_num, seq_num, vote_response, open_handle?"yes":"no");
 #endif
 		       
 	if (status < 0)
@@ -1440,13 +1453,15 @@
 	}
 
 	lockres->lock_state |= FLAG_READONLY_DROPPING;
-	lockres->readonly_map &= ~(1 << osb->node_num);  /* remove this node */
+	/* remove this node */
+	ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
 	
 	status = 0;
-	while (lockres->readonly_map != 0ULL) {
+	while (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
 		int disk_vote = 0;
 
-		lockres->readonly_map &= osb->publ_map;      /* remove all dead nodes */
+		/* remove all dead nodes */
+		ocfs_node_map_and(&lockres->readonly_map, &osb->publ_map);
 		status = new_lock_function(osb, OCFS_DLM_ENABLE_CACHE_LOCK, FLAG_DROP_READONLY, 
 					   NULL, &disk_vote, inode);
 		if (status == -EAGAIN) {
@@ -1465,8 +1480,8 @@
 			LOG_ERROR_STATUS (status);
 		break;
 	}
-					
-	if (lockres->readonly_map == 0ULL && 
+			
+	if (ocfs_node_map_is_empty(&lockres->readonly_map) &&	
 	    lockres->readonly_node == osb->node_num)
 		lockres->readonly_node = OCFS_INVALID_NODE_NUM;
 
@@ -1495,23 +1510,9 @@
 	ocfs_daemonize (OCFS_DROP_RO_THREAD_NAME, strlen(OCFS_DROP_RO_THREAD_NAME));
 	status = _ocfs_drop_readonly_cache_lock(arg);
 
-	/* this should all just be in some common function as
-	 * it's totally duplicated. */
+	/* ignore the actual signal */
 	if (signal_pending(current)) {
-		/* ignore the actual signal */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 		dequeue_signal_lock(current, &current->blocked, &info);
-#else
-#ifdef HAVE_NPTL
-		spin_lock_irq (&current->sighand->siglock);
-		dequeue_signal(&current->blocked, &info);
-		spin_unlock_irq(&current->sighand->siglock);
-#else 
-		spin_lock_irq(&current->sigmask_lock);
-		dequeue_signal(&current->blocked, &info);
-		spin_unlock_irq(&current->sigmask_lock);
-#endif /* !HAVE_NPTL */
-#endif /* 2.4.x kernel */
 	}
 
 	/* Flush all scheduled tasks */
@@ -1522,5 +1523,12 @@
 #endif
 
 #warning   need a way to wait on all of these threads on dismount
+/*
+ * The way to do this is to create a wait queue on the osb.  When one of
+ * these guys start, you bump a counter.  When it ends, it decrements
+ * the counter and wake_up()s the wait queue.  The counter probably can
+ * be protected by a spinlock on the OSB.  The dismount handling just
+ * waits on that wait queue until readonly_threads == 0.
+ */
 	return status;
 }

Added: trunk/src/nm.h
===================================================================
--- trunk/src/nm.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/nm.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * nm.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_NM_H
+#define OCFS2_NM_H
+
+int ocfs_drop_readonly_cache_lock(ocfs_super *osb, struct inode *inode,
+				  int yield);
+void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode,
+			int sync_buffers);
+int ocfs_process_vote(ocfs_super *osb, ocfs_vote_request_ctxt *ctxt);
+int ocfs_recv_thread(void *unused);
+void ocfs_recover_oin_locks(ocfs_super *osb, __u32 node_num);
+int ocfs_volume_thread(void *arg);
+
+#endif /* OCFS2_NM_H */

Added: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,1440 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs.h
+ *
+ * Defines macros and structures used in ocfs
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS_H
+#define OCFS_H
+
+/*
+** System header files
+*/
+#define   __KERNEL_SYSCALLS__
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+# include <linux/workqueue.h>
+#else
+# include <linux/tqueue.h>
+#endif
+
+
+#define OCFS_BITS_IN_CHUNK(sb)  ((sb)->s_blocksize * 8)
+
+typedef struct _ocfs_alloc_bm
+{
+	__u32 validbits; /* number of valid bits */
+	__u32 allocbits; /* number of allocated bits */
+	__u32 failed;
+	__u32 ok_retries;
+	/* 'numbh' is the number of buffer heads in chunk. We keep
+	 * around enough buffer heads to cover the entire alloc'd size
+	 * of the bitmap, even though we may only ever care about the
+	 * valid size */
+	__u32 numbh;
+	struct buffer_head **chunk;
+}
+ocfs_alloc_bm;
+
+
+
+#define OCFS_POINTER_SIZE   (sizeof(void *))
+
+enum
+{
+	OCFS_VOTE_REQUEST = 1,
+	OCFS_VOTE_REPLY,
+	OCFS_INFO_DISMOUNT
+};
+
+enum {
+	DISK_VOTE,
+	COMM_VOTE
+};
+
+enum {
+	INVALID_REQUEST,      // reply with a NO vote
+	UPDATE_OIN_INODE,     // update both oin and inode
+	DELETE_ACQUIRE,// delete or rename acquire request
+	DELETE_RELEASE,// delete or rename release request
+	RELEASE_CACHE,        // release a cache lock I hold
+	CHANGE_MASTER,        // request to change master to requestor
+	ADD_OIN_MAP,          // add requestor into oin map
+	NOT_MASTER,           // I am not master, retry
+	REMASTER_THIS,        // remaster lock to me
+	REMASTER_REQUESTOR,   // remaster lock to requestor
+	DROP_READONLY,	      // RO cachelock needs to convert to RW
+	READONLY,	      // a RW or RO cachelock, requesting RO
+	RELEASE_DENTRY
+};
+
+#define  OCFS_MAX_DLM_PKT_SIZE			256
+#define  OCFS_DLM_MAX_MSG_SIZE			256
+#define  OCFS_DLM_MSG_MAGIC			0x79677083
+
+enum {
+    OSB_DATA_LOCK,
+    OSB_MD_LOCK,
+    OSB_CFG_LOCK,
+    OSB_LOG_LOCK
+};
+
+
+/* convenience macro */
+
+#define OCFS_ASSERT(x)             do { if (!(x)) BUG(); } while (0)
+
+
+#define BITCOUNT(x)     (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+#define BX_(x)          ((x) - (((x)>>1)&0x77777777) \
+			     - (((x)>>2)&0x33333333) \
+			     - (((x)>>3)&0x11111111))
+
+
+#ifndef list_for_each_prev_safe
+#define list_for_each_prev_safe(pos, n, head) \
+	for (pos = (head)->prev, n = pos->prev; pos != (head); \
+		pos = n, n = pos->prev)
+#endif
+
+
+#define OCFS_CURRENT_TIME               ocfs_get_seconds(CURRENT_TIME)
+#define OCFS_SET_INODE_TIME(i, x, y)    (ocfs_get_seconds(i->x) = (y))
+
+
+#define  ONE_MEGA_BYTE           (1 * 1024 * 1024)   /* in bytes */
+
+#define  MISS_COUNT_VALUE        30
+
+/*
+** The following flag values reflect the operation to be performed
+**   by ocfs_create_modify_file
+*/
+// FILEFLAG MASK
+#define  FLAG_FILE_CREATE         0x00000001
+#define  FLAG_FILE_EXTEND         0x00000002
+#define  FLAG_FILE_DELETE         0x00000004
+#define  FLAG_FILE_RENAME         0x00000008
+#define  FLAG_FILE_UPDATE         0x00000010
+#define  FLAG_FILE_RECOVERY       0x00000020
+#define  FLAG_FILE_CREATE_DIR     0x00000040
+#define  FLAG_FILE_UPDATE_OIN     0x00000080
+#define  FLAG_FILE_RELEASE_MASTER 0x00000100
+#define  FLAG_RELEASE_DENTRY      0x00000200
+#define  FLAG_CHANGE_MASTER       0x00000400
+#define  FLAG_ADD_OIN_MAP         0x00000800
+#define  FLAG_DIR                 0x00001000
+#define  FLAG_REMASTER            0x00002000
+#define  FLAG_FAST_PATH_LOCK      0x00004000
+#define  FLAG_FILE_UNUSED5        0x00008000
+#define  FLAG_FILE_UNUSED6        0x00010000
+//#define  FLAG_DEL_NAME            0x00020000
+//#define  FLAG_DEL_INODE           0x00040000
+#define  FLAG_FILE_UNUSED7        0x00080000
+#define  FLAG_FILE_UNUSED8        0x00100000
+#define  FLAG_FILE_UNUSED9        0x00200000
+#define  FLAG_FILE_RELEASE_CACHE  0x00400000
+#define  FLAG_FILE_UNUSED10       0x00800000
+#define  FLAG_FILE_UNUSED11       0x01000000
+#define  FLAG_FILE_UNUSED12       0x02000000
+#define  FLAG_FILE_UNUSED13       0x04000000
+#define  FLAG_FILE_TRUNCATE       0x08000000
+#define  FLAG_DROP_READONLY       0x10000000 
+#define  FLAG_READDIR             0x20000000 
+#define  FLAG_ACQUIRE_LOCK        0x40000000 
+#define  FLAG_RELEASE_LOCK        0x80000000 
+									    
+
+
+#define  OCFS_MAXIMUM_NODES          32
+#define  OCFS_MAX_OSB_ID             65536
+
+
+#define  HEARTBEAT_METHOD_DISK       (1)
+#define  HEARTBEAT_METHOD_IPC        (2)
+
+
+enum
+{
+	LEFT_NO_OVERLAP,
+	LEFT_ADJACENT,
+	LEFT_OVERLAP,
+	FULLY_CONTAINED,
+	FULLY_CONTAINING,
+	RIGHT_OVERLAP,
+	RIGHT_ADJACENT,
+	RIGHT_NO_OVERLAP
+};
+
+
+/*
+** Extents Defines
+*/
+
+typedef enum _ocfs_ext_flag {
+	LOCAL_EXT = 1,
+	NONLOCAL_EXT = 2
+} ocfs_ext_flag;
+
+/*
+** Structure signatures 
+*/
+#define  OCFS_TYPE_OSB            (0x05534643)
+#define  OCFS_TYPE_GLOBAL_DATA    (0x07534643)
+
+// LOCKTYPE ONE
+#define  OCFS_DLM_NO_LOCK              (0x0)
+#define  OCFS_DLM_EXCLUSIVE_LOCK       (0x2)
+#define  OCFS_DLM_ENABLE_CACHE_LOCK    (0x8)
+
+#define  OCFS_INVALID_NODE_NUM         UINT_MAX
+
+typedef enum _ocfs_rw_mode
+{
+	OCFS_READ,
+	OCFS_WRITE
+}
+ocfs_rw_mode;
+
+
+/* lockres->lock_state flags */
+#define  FLAG_READONLY_DROPPING        0x00000008
+
+/* osb->osb_flags flags */
+#define  OCFS_OSB_FLAGS_BEING_DISMOUNTED  (0x00000004)
+#define  OCFS_OSB_FLAGS_SHUTDOWN          (0x00000008)
+#define  OCFS_OSB_FLAGS_OSB_INITIALIZED   (0x00000020)
+
+/* OcfsGlobalCtxt.flags flags */
+#define  OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED (0x00000001)
+#define  OCFS_FLAG_MEM_LISTS_INITIALIZED          (0x00000002)
+#define  OCFS_FLAG_SHUTDOWN_VOL_THREAD            (0x00000004)
+
+/*
+** Information on Publish sector of each node
+*/
+#define  DISK_HBEAT_COMM_ON           20	/* in the order of 5 secs */
+#define  DISK_HBEAT_NO_COMM           4		/* in the order of 1 sec */
+#define  DISK_HBEAT_INVALID           0		/* in the order of 100ms */
+
+
+/*
+** Information on Vote sector of each node
+*/
+// VOTEFLAG MASK
+#define  FLAG_VOTE_NODE               0x1
+#define  FLAG_VOTE_OIN_UPDATED        0x2
+#define  FLAG_VOTE_OIN_ALREADY_INUSE  0x4
+#define  FLAG_VOTE_UPDATE_RETRY       0x8
+#define  FLAG_VOTE_FILE_DEL           0x10
+
+
+#define  INVALID_NODE_POINTER		-1
+#define  OCFS_LOCAL_ALLOC_SIGNATURE	"LCLBMP"
+
+#define  MAX_IP_ADDR_LEN	32
+
+#define SHUTDOWN_SIGS   (sigmask(SIGKILL) | sigmask(SIGHUP) | \
+			 sigmask(SIGINT) | sigmask(SIGQUIT))
+
+#define EFAIL                      999
+#define EWARNING                   998
+
+#define OCFS_MAGIC                 0xa156f7eb
+
+#define OCFS_LINUX_MAX_FILE_SIZE   9223372036854775807LL
+#define INITIAL_EXTENT_MAP_SIZE    10
+
+#define OCFS_UNUSED_INODE_NUMBER   1
+
+#define OCFS_VOLCFG_LOCK_ITERATE	(HZ/10)	/* in jiffies */
+#define OCFS_VOLCFG_LOCK_TIME		1000    /* in ms */
+#define OCFS_VOLCFG_HDR_SECTORS		2	/* in sectors */
+#define OCFS_VOLCFG_NEWCFG_SECTORS	4	/* in sectors */
+
+#define OCFS_NM_HEARTBEAT_TIME		500	/* in ms */
+#define OCFS_HEARTBEAT_INIT             10      /* number of NM iterations to stabilize the publish map */
+	
+#ifndef O_DIRECT
+#warning this depends on the architecture!
+#define O_DIRECT        040000
+#endif
+
+#define NOT_MOUNTED_EXCLUSIVE   (-1)
+
+
+#define BLOCKS_PER_CLEAN_LIST     ( ((PAGE_SIZE-sizeof(void *))/sizeof(unsigned long)) >> 3 )
+
+#define IORUN_ALLOC_SIZE    (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs))
+
+#ifndef  _OCFSDEF_H_
+#define  _OCFSDEF_H_
+
+/* sm - ocfs 1.0 fails to set fe->sig for dirs */
+#define  IS_VALID_FILE_ENTRY(ptr)     \
+	(!strcmp((ptr)->i_signature, OCFS2_FILE_ENTRY_SIGNATURE))
+#define  IS_VALID_EXTENT_BLOCK(ptr)  \
+	(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
+
+#define  IS_VALID_NODE_NUM(node)      \
+	(((node) >= 0) && ((node) < OCFS_MAXIMUM_NODES))
+
+
+#define down_with_flag(_sem, _flg)	\
+	do {				\
+		if (!_flg) {		\
+			down (_sem);	\
+			_flg = 1;	\
+		}			\
+	} while (0)
+
+#define up_with_flag(_sem, _flg)	\
+	do {				\
+		if (_flg) {		\
+			up (_sem);	\
+			_flg = 0;	\
+		}			\
+	} while (0)
+
+#define ocfs_task_interruptible(_o)	((_o)->dlm_task != current && signal_pending(current))
+
+/*
+** Macros
+*/
+#define  OCFS_SET_FLAG(flag, value)    ((flag) |= (value))
+#define  OCFS_CLEAR_FLAG(flag, value)  ((flag) &= ~(value))
+
+
+/*
+** Structures...
+*/
+
+
+static inline void *ocfs_malloc(size_t size)
+{
+	void *p = kmalloc(size, GFP_KERNEL);
+	if (p)
+		memset(p, 0, size);
+	return p;
+}
+
+
+typedef enum _ocfs_protocol
+{
+	OCFS_TCP = 1,
+	OCFS_UDP
+}
+ocfs_protocol;
+
+
+#define ocfs_allocate_extent_entry()  (ocfs_extent *)kmem_cache_alloc ( \
+						    OcfsGlobalCtxt.extent_cache, GFP_NOFS)
+#define ocfs_free_extent_entry(ext)   kmem_cache_free(OcfsGlobalCtxt.extent_cache, ext)
+
+
+#define  OCFS_NAME              "OCFS2"
+
+/* ioctl commands */
+#define  OCFS_IOC_MAGIC          'O'
+#define  OCFS_IOC_GETTYPE        _IOR(OCFS_IOC_MAGIC, 1, struct ocfs_ioc)
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+typedef long sector_t;
+#define map_bh(bh, sb, blk)   \
+	({ \
+	 	bh->b_dev = sb->s_dev; \
+		bh->b_blocknr = blk; \
+		bh->b_state |= (1UL << BH_Mapped); \
+	})
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+typedef dev_t ocfs_dev;
+#define OCFS_NODEV     0
+#else /* 2.4 kernel */
+typedef int ocfs_dev;
+#define OCFS_NODEV     NODEV
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+/* No longer exists in 2.5 */
+#define fsync_inode_buffers(inode) sync_mapping_buffers(inode->i_mapping)
+#endif /* >= 2.6.0  */
+
+#define OCFS_SB(sb)	    ((ocfs_super *)OCFS_GENERIC_SB_MEMBER(sb))
+
+
+#define  OCFS_MINOR_VERSION              (0)
+#define  OCFS_MAJOR_VERSION              (2)
+#define  OCFS_MINOR_VER_STRING           "0"
+#define  OCFS_MAJOR_VER_STRING           "2"
+
+#define  OCFS_VOLUME_SIGNATURE           "OracleCFS"
+#define  MAX_VOL_SIGNATURE_LEN		128
+#define  MAX_MOUNT_POINT_LEN		128
+
+#define DLOCK_FLAG_OPEN_MAP    (0x1)
+#define DLOCK_FLAG_LOCK        (0x2)
+#define DLOCK_FLAG_SEQ_NUM     (0x4)
+#define DLOCK_FLAG_MASTER      (0x8)
+#define DLOCK_FLAG_LAST_UPDATE (0x10)
+#define DLOCK_FLAG_ADD_SELF    (0x20)
+#define DLOCK_FLAG_ALL         (DLOCK_FLAG_OPEN_MAP | DLOCK_FLAG_LOCK | \
+				DLOCK_FLAG_SEQ_NUM | DLOCK_FLAG_MASTER | \
+				DLOCK_FLAG_LAST_UPDATE)
+
+
+
+#define OCFS_IPC_DEFAULT_PORT   7001
+
+		
+#define OCFS_IPC_DLM_VERSION    0x0201
+
+
+/* =========================================================== */
+
+#include "ocfs2_fs.h"
+
+typedef struct _ocfs_vol_disk_hdr		   // CLASS
+{
+	__u32 minor_version;                       // NUMBER RANGE(0,UINT_MAX)
+	__u32 major_version;                       // NUMBER RANGE(0,UINT_MAX)
+	__u8 signature[MAX_VOL_SIGNATURE_LEN];	 // CHAR[MAX_VOL_SIGNATURE_LEN]
+	__u8 mount_point[MAX_MOUNT_POINT_LEN];	 // CHAR[MAX_MOUNT_POINT_LEN]
+	__u64 serial_num;                          // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Size of the device in bytes */           
+	__u64 device_size;	                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Start of the volume... typically 0 */    
+	__u64 start_off;		                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Offset to Volume Bitmap... */            
+	__u64 bitmap_off;		                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Offset to the Publish Sector */          
+	__u64 publ_off;		                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Offset to the Vote Sector */             
+	__u64 vote_off;		                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 root_bitmap_off;                     // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 data_start_off;                      // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 root_bitmap_size;                    // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 root_off;                            // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 root_size;                           // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Cluster size as specified during format */        
+	__u64 cluster_size;	                 // CLUSTERSIZE
+	/* Max number of nodes.... OCFS_MAXIMUM_NODES */
+	__u64 num_nodes;		                 // NUMBER RANGE(0,32)
+	/* Number of free clusters at format */
+	__u64 num_clusters;	                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* OCFS_DEFAULT_FILE_NODE_SIZE */
+	__u64 file_node_size;	                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 internal_off;                        // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Offset to Node Config */
+	__u64 node_cfg_off;	                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Size of Node Config */
+	__u64 node_cfg_size;	                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* Offset to Node Config Lock */
+	__u64 new_cfg_off;	                 // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u32 prot_bits;                           // PERMS
+	__s32 excl_mount;                          // NODENUM
+}
+ocfs_vol_disk_hdr;				   // END CLASS
+
+typedef struct _ocfs_vol_label			 // CLASS
+{
+	ocfs2_disk_lock disk_lock;                // DISKLOCK
+	__u8 label[MAX_VOL_LABEL_LEN];            // CHAR[MAX_VOL_LABEL_LEN]
+	__u16 label_len;                           // NUMBER RANGE(0,MAX_VOL_LABEL_LEN)
+	__u8 vol_id[MAX_VOL_ID_LENGTH];           // HEX[MAX_VOL_ID_LENGTH]
+	__u16 vol_id_len;                          // NUMBER RANGE(0,MAX_VOL_ID_LENGTH)
+	__u8 cluster_name[MAX_CLUSTER_NAME_LEN];  // CHAR[MAX_CLUSTER_NAME_LEN]
+	__u16 cluster_name_len;                    // NUMBER RANGE(0,MAX_CLUSTER_NAME_LEN)
+}
+ocfs_vol_label;					  // END CLASS
+
+typedef struct _BARF_BARF_BARF
+{
+	char node_name[MAX_NODE_NAME_LENGTH];
+	ocfs_guid guid;
+	ocfs_ipc_config_info ipc_config;
+}
+BARF_BARF_BARF;
+
+typedef struct _ocfs_extent
+{
+	struct list_head list;
+	__s64 virtual;
+	__s64 physical;
+	__s64 sectors;
+}
+ocfs_extent;
+
+typedef struct _ocfs_extent_map
+{
+	spinlock_t lock;
+	__u32 count;
+	int initialized;
+	struct list_head head;
+	__u32 next_idx;
+	struct list_head *next_ptr;
+}
+ocfs_extent_map;
+
+
+typedef struct _ocfs_obj_id
+{
+	__u32 type;		/* 4 byte signature to uniquely identify the struct */
+	__u32 size;		/* sizeof the struct */
+}
+ocfs_obj_id;
+
+
+typedef struct _ocfs_super ocfs_super;
+typedef struct _ocfs_io_runs ocfs_io_runs;
+typedef struct _ocfs_lock_res ocfs_lock_res;
+
+/* this limits us to 256 nodes
+ * if we need more, we can do a kmalloc for the map */
+#define OCFS_NODE_MAP_MAX_NODES    256
+typedef struct _ocfs_node_map
+{
+	__u32 num_nodes;
+	unsigned long map[BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES)];
+} ocfs_node_map;
+
+
+
+/* XXX: fields that can go if we move this to the inode private */
+struct _ocfs_lock_res
+{
+	__u32 master_node_num;	/* Master Node */
+	__u32 lock_state;
+	__u32 lock_holders;
+	__u8 lock_type;
+	atomic_t lr_ref_cnt;	/* When 0, freed */  // XXX
+	struct semaphore lock_mutex;
+	__u32 readonly_node;
+	ocfs_node_map readonly_map;
+	ocfs_node_map oin_openmap;
+	__u64 last_upd_seq_num;
+};
+
+struct _ocfs_journal_handle;
+
+/* OCFS2 Inode Private Data */
+typedef struct _ocfs_inode_private
+{
+	/* inode and feoff fields never change and are always safe to
+	 * read. */
+	struct inode     *inode;
+
+	__u64             feoff;
+
+	/* These fields are protected by priv_sem */
+	struct semaphore  priv_sem;
+	__u32             open_hndl_cnt;
+	__u64             chng_seq_num;
+	ocfs_extent_map   map;
+	__s64             alloc_size;
+	__u32             oin_flags;
+
+	atomic_t          ip_needs_verification;
+
+	/* This protects io on the metadata buffers related to this
+	 * inode. We also consider an "abort_trans" an I/O as it will
+	 * revert the buffer back to a previous state. */
+	struct rw_semaphore  ip_io_sem;
+
+	/* Used by the journalling code to attach an inode to a
+	 * handle.  These are protected by ip_io_sem in order to lock
+	 * out other I/O to the inode until we either commit or
+	 * abort. */
+	struct list_head            ip_handle_list;
+	struct _ocfs_journal_handle *ip_handle;
+
+	/* inode_extend_sem locks out extends on behalf of other nodes. */
+	struct rw_semaphore  inode_extend_sem;
+
+	struct list_head  recovery_list; /* protected by recovery_list_sem */
+	__u32             num_extends; /* protected by oin_num_ext_lock */
+
+	atomic_t          i_clean_buffer_seq;
+	__u32             flags; /* see below */
+
+	/* stolen right off of ocfs2_dinode */
+	union {
+		__u64 fe_private;
+		__u64 child_dirnode;
+		struct _ip_bitinfo {
+			__u32 used_bits;
+			__u32 total_bits;
+		} ip_bitinfo;
+	} u;
+
+	ocfs_lock_res     i_lockres;
+	__u32 		  i_dir_start_lookup;
+} ocfs_inode_private;
+
+/* Eventually, the 'flags' and 'oin_flags' fields need to be
+ * merged. */
+/* oin_flags flags */
+#define  OCFS_OIN_DIRECTORY                      (0x00000002)
+#define  OCFS_OIN_OPEN_FOR_DIRECTIO              (0x00000008)
+#define  OCFS_OIN_OPEN_FOR_WRITE                 (0x00000010)
+
+/* 'flags' flags. */
+/* has this inode been deleted, either from this node or from another node. */
+#define OCFS_INODE_DELETED          0x00000001
+/* is this the journal inode? */
+#define OCFS_INODE_JOURNAL          0x00000002
+/* set on init_private, cleared on clear_inode */
+#define OCFS_INODE_INITIALIZED      0x00000004
+/* is this a system file? */
+#define OCFS_INODE_SYSTEM_FILE      0x00000008
+/* are we going to let another node deal with deletion of this inode? */
+#define OCFS_INODE_SKIP_DELETE      0x00000010
+
+#define OCFS_I(i)        ((ocfs_inode_private *)(i->u.generic_ip))
+
+#define GET_INODE_CLEAN_SEQ(i)  (atomic_t *)(&(OCFS_I(i)->i_clean_buffer_seq))
+
+#define IS_SYSTEM_FILE_INODE(i)  ((i) && (OCFS_I(i)->flags & OCFS_INODE_SYSTEM_FILE))
+
+
+#define INODE_DELETED(i) (OCFS_I(i)->flags & OCFS_INODE_DELETED)
+#define SET_INODE_DELETED(i) (OCFS_I(i)->flags |= OCFS_INODE_DELETED)
+#define CLEAR_INODE_DELETED(i) (OCFS_I(i)->flags &= (~OCFS_INODE_DELETED))
+
+#define INODE_JOURNAL(i) (OCFS_I(i)->flags & OCFS_INODE_JOURNAL)
+#define SET_INODE_JOURNAL(i) (OCFS_I(i)->flags |= OCFS_INODE_JOURNAL)
+#define CLEAR_INODE_JOURNAL(i) (OCFS_I(i)->flags &= (~OCFS_INODE_JOURNAL))
+
+#define SET_INODE_FEOFF(i,o)						      \
+do {									      \
+	OCFS_I(i)->feoff = o;					      \
+} while (0)
+
+#define GET_INODE_FEOFF(i) OCFS_I(i)->feoff
+
+
+#define GET_INODE_LOCKRES(i) ({ if (i==NULL) BUG(); (&(OCFS_I(i)->i_lockres)); })
+
+typedef enum _ocfs_vol_state
+{
+	VOLUME_DISABLED,
+	VOLUME_INIT,
+	VOLUME_ENABLED,
+	VOLUME_LOCKED,
+	VOLUME_IN_RECOVERY,
+	VOLUME_MOUNTED,
+	VOLUME_BEING_DISMOUNTED,
+	VOLUME_DISMOUNTED
+}
+ocfs_vol_state;
+
+typedef struct _ocfs_vol_node_map
+{
+	__u64 time;
+	__u32 miss_cnt;
+	atomic_t dismount;
+}
+ocfs_vol_node_map;
+
+struct _ocfs_bitmap_free_head;
+
+typedef struct _ocfs_inode_hash {
+	spinlock_t        lock;     /* protects the whole hash */
+	int               size;     /* number of lists in the hash */
+	unsigned int      num_ents; /* global number of offsets in there */
+	struct list_head  *hash; 
+} ocfs_inode_hash;
+
+typedef struct _ocfs_inode_num {
+	enum {
+		INUM_UNBOUND = 0,  /* unbound to an inode*/
+		INUM_BOUND         /* we have an inode attached. */
+	}                 i_state;
+	struct list_head  i_list;
+	unsigned long     i_ino;
+	__u64             i_feoff;  /* used only for directory inodes,
+				     * points to parent fe of
+				     * dirnode. for files i_voteoff == i_feoff,
+				     * for root directory this is 0 */
+	struct inode     *i_inode;  /* may be null! */
+} ocfs_inode_num;
+
+typedef struct _ocfs_commit_task
+{
+	struct completion c_complete;
+	struct task_struct *c_task;
+	struct semaphore c_lock;
+	struct list_head c_list;
+} ocfs_commit_task;
+
+typedef struct _ocfs_dlm_stats
+{
+	atomic_t total;
+	atomic_t okay;
+	atomic_t etimedout;
+	atomic_t efail;
+	atomic_t eagain;
+	atomic_t enoent;
+	atomic_t def;
+}
+ocfs_dlm_stats;
+
+typedef struct _ocfs_alloc_stats
+{
+	atomic_t moves;
+	atomic_t local_data;
+	atomic_t bitmap_data;
+	atomic_t bitmap_meta;
+	atomic_t dir_allocs;
+	atomic_t dir_extends;
+	atomic_t ext_allocs;
+	atomic_t ext_extends;
+} ocfs_alloc_stats;
+
+extern char *system_file_names[];
+
+
+struct _ocfs_journal;
+
+/*
+ * ocfs_super
+ *
+ * A mounted volume is represented using the following structure.
+ */
+struct _ocfs_super
+{
+	ocfs_obj_id obj_id;
+	struct semaphore osb_res; /* resource to protect the ocfs_super */
+	struct list_head osb_next;	/* list of ocfs_super(s) */
+	__u32 osb_id;		/* id used by the proc interface */
+	struct completion dlm_complete;
+	struct task_struct *dlm_task;
+	ocfs_commit_task *commit;
+	__u32 osb_flags;
+	__s64 file_open_cnt;	/* num of open files/dirs. vol cannot be dismounted if > 0 */
+	ocfs_node_map publ_map;
+	struct list_head cache_lock_list;
+	struct super_block *sb;
+	struct inode *root_inode;
+	struct inode *sys_root_inode;
+	struct inode *system_inodes[NUM_SYSTEM_INODES];
+	
+	/* new */
+	u64 num_clusters;
+	u64 root_blkno;
+	u64 system_dir_blkno;
+	u64 bitmap_blkno;
+	u64 bitmap_blocks;
+	u64 publish_blkno;
+	u64 publish_blocks;
+	u64 vote_blkno;
+	u64 vote_blocks;
+	u64 autoconfig_blkno;
+	u64 autoconfig_blocks;
+	u64 new_autoconfig_blkno;
+	u64 new_autoconfig_blocks;
+	u64 total_autoconfig_blocks;
+	u8 *uuid;
+	u8 *vol_label;
+
+	
+	ocfs_vol_node_map *vol_node_map;
+	struct semaphore cfg_lock;
+	BARF_BARF_BARF **node_cfg_info;
+	__u64 cfg_seq_num;
+	int cfg_initialized;
+	u32 max_nodes;
+	u32 num_cfg_nodes;
+	u32 num_nodes;
+	u32 node_num;
+	int reclaim_id;		/* reclaim the original node number*/
+	__u32 hbt;
+	int s_sectsize_bits;
+	int s_clustersize;
+	int s_clustersize_bits;
+	int needs_flush;
+
+	ocfs_alloc_bm cluster_bitmap;
+	ocfs_vol_state vol_state;
+	struct semaphore recovery_lock;
+	spinlock_t recovery_map_lock;
+	ocfs_node_map recovery_map;
+	int disable_recovery;
+	atomic_t num_recovery_threads;
+	struct timer_list lock_timer;
+	atomic_t lock_stop;
+	wait_queue_head_t lock_event;
+	atomic_t lock_event_woken;
+	struct semaphore comm_lock;	/* protects ocfs_comm_process_vote_reply */
+	atomic_t nm_init;
+	wait_queue_head_t nm_init_event;
+	__u32 prealloc_lock;
+	struct buffer_head **autoconfig_bhs;
+	struct semaphore publish_lock;  /* protects r/w to publish sector */
+	atomic_t node_req_vote;         /* set when node's vote req pending */
+	int publish_dirty;
+	struct list_head needs_flush_head;
+	wait_queue_head_t flush_event;
+	atomic_t flush_event_woken;
+	struct _ocfs_journal *journal;
+	atomic_t clean_buffer_seq;
+	spinlock_t clean_buffer_lock;
+	struct list_head *lock_recovery_lists;
+	__u64 *last_publ_seq_num;
+	int have_local_alloc;
+	struct buffer_head *local_alloc_bh;
+	/* Protects local alloc */
+	struct semaphore local_alloc_sem;
+	ocfs_inode_hash inode_hash;
+	struct semaphore extend_sem;
+	__u8 check_mounted; /* tell nm to check mounted flag, protected by publish_lock*/
+	ocfs_dlm_stats net_reqst_stats;	/* stats of netdlm vote requests */
+	ocfs_dlm_stats net_reply_stats;	/* stats of netdlm vote reponses */
+	ocfs_dlm_stats dsk_reqst_stats;	/* stats of diskdlm vote requests */
+	ocfs_dlm_stats dsk_reply_stats;	/* stats of diskdlm vote reponses */
+	ocfs_alloc_stats alloc_stats;
+	char dev_str[20];		/* "major,minor" of the device */
+	struct semaphore vote_sem; /* protects calls to ocfs_process_vote */
+	struct list_head vote_obj_queue;
+	spinlock_t vote_obj_queue_lock;
+	unsigned long voting_ino; /* only safe from the process_vote pid */
+};
+
+typedef struct _ocfs_comm_info
+{
+	u16 ip_version;			/* IP version in NBO */
+	u16 ip_port;			/* IP port in NBO */
+	union {
+		__u32 ip_addr4;		/* IPv4 address in NBO */
+		__u32 ip_addr6[4];	/* IPv6 address in NBO */
+	} addr_u;
+} ocfs_comm_info;
+
+typedef struct _ocfs_global_ctxt
+{
+	ocfs_obj_id obj_id;
+	struct semaphore global_res;
+	struct list_head osb_next;	/* List of all volumes */
+	kmem_cache_t *inode_cache;
+	kmem_cache_t *extent_cache;
+	__u32 flags;
+	__u32 pref_node_num;		/* preferred... osb has the real one */
+	ocfs_guid guid;			/* uniquely identifies a node */
+	char *node_name;		/* human readable node identification */
+	char *cluster_name;		/* unused */
+	ocfs_comm_info comm_info;	/* ip address, etc for listener */
+	int comm_info_read;		/* ipc info loaded from config file */
+	spinlock_t comm_seq_lock;	/* protects comm_seq_num */
+	__u64 comm_seq_num;		/* local node seq num used in ipcdlm */
+	ocfs_dlm_stats net_reqst_stats;	/* stats of netdlm vote requests */
+	ocfs_dlm_stats net_reply_stats;	/* stats of netdlm vote reponses */
+	ocfs_dlm_stats dsk_reqst_stats;	/* stats of diskdlm vote requests */
+	ocfs_dlm_stats dsk_reply_stats;	/* stats of diskdlm vote reponses */
+}
+ocfs_global_ctxt;
+
+struct _ocfs_io_runs
+{
+	__u64 disk_off;
+	__u32 offset;
+	__u32 byte_cnt;
+};
+
+typedef struct _ocfs_ipc_ctxt
+{
+	__u32 dlm_msg_size;
+	__u16 version;
+	int init;
+	struct socket *send_sock;
+	struct socket *recv_sock;
+	struct completion complete;
+	struct task_struct *task;
+}
+ocfs_ipc_ctxt;
+
+
+extern ocfs_ipc_ctxt OcfsIpcCtxt;
+
+typedef struct _ocfs_ipc_dlm_config
+{
+	__u16 version;
+	__u32 msg_size;
+	__u32 num_recv_threads;
+}
+ocfs_ipc_dlm_config;
+
+/*
+** Globals ...
+*/
+extern ocfs_global_ctxt OcfsGlobalCtxt;
+
+typedef struct _ocfs_publish		// CLASS
+{
+	__u64 time;                     // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__s32 vote;                     // BOOL
+	__u32 dirty;                     // BOOL
+	__u32 vote_type;                  // FILEFLAG
+	__u32 mounted;                   /* used for journaling */
+	__u32 reserved1[8];		// this is now used as the vote_map !!!!! was __u8[32]
+	__u64 vote_map;                   // NODEBITMAP  this is now unused!!!!
+	__u64 publ_seq_num;               // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 lock_id;                    // NUMBER RANGE(0,ULONG_LONG_MAX)
+	/* last seq num used in comm voting */
+	__u64 comm_seq_num;		// NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 fe_off;                   /* needed to create inodes. */
+} ocfs_publish;				// END CLASS
+
+typedef struct _ocfs_vote		// CLASS
+{
+	__u8 type;
+	__u8 node;
+	__u8 reserved1[30];		// used to be vote[32]
+	__u64 vote_seq_num;              // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u64 lock_id;                   // NUMBER RANGE(0,ULONG_LONG_MAX)
+	__u8 open_handle;                // BOOL
+	__u8 ov_pad[7]; 		// UNUSED
+} ocfs_vote;				// END CLASS
+
+typedef struct _ocfs_dlm_msg_hdr
+{
+	__u64 lock_id;
+	__u64 lock_seq_num;
+	__u32 flags;
+	__u8 open_handle;
+	__u8 odmh_pad[3];
+} ocfs_dlm_msg_hdr;
+
+typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master;
+typedef ocfs_dlm_msg_hdr ocfs_dlm_disk_vote_req;
+
+typedef struct _ocfs_dlm_reply_master
+{
+	ocfs_dlm_msg_hdr h;
+	__u32 status;
+}
+ocfs_dlm_reply_master;
+
+typedef struct _ocfs_dlm_disk_vote_reply
+{
+	ocfs_dlm_msg_hdr h;
+	__u32 status;
+}
+ocfs_dlm_disk_vote_reply;
+
+typedef struct _ocfs_dlm_msg
+{
+	__u32 magic;
+	__u32 msg_len;
+	__u8 vol_id[MAX_VOL_ID_LENGTH];
+	__u32 src_node;
+	__u32 dst_node;
+	__u32 msg_type;
+	__u32 check_sum;
+	__u8 msg_buf[0];
+} ocfs_dlm_msg;
+
+typedef struct _ocfs_vote_obj
+{
+	struct list_head list;
+	wait_queue_head_t voted_event;
+	atomic_t voted_event_woken;
+	atomic_t refcount;
+	spinlock_t lock;
+	__u32 vote_state;
+	__u32 req_lock_type;
+	__u32 vote_status;
+	ocfs_node_map req_vote_map;
+	ocfs_node_map got_vote_map;
+	ocfs_node_map tmp_openmap;
+	__u64 seq_num;
+	pid_t pid;
+	ocfs_dlm_msg m;
+} ocfs_vote_obj;
+
+enum {
+	VOTE_OBJ_STATE_UNSENT,
+	VOTE_OBJ_STATE_SENT,
+	VOTE_OBJ_STATE_PARTIAL_REPLY,
+	VOTE_OBJ_STATE_FULL_REPLY,
+	VOTE_OBJ_STATE_DESTROYING
+};
+
+	
+
+typedef struct _ocfs_vote_obj_lookup_data ocfs_vote_obj_lookup_data;
+
+struct _ocfs_vote_obj_lookup_data
+{
+	union {
+		struct {
+			__u64 seq_num;
+			__u64 lock_id;
+		} s;
+		struct {
+			char *page;
+			int *len;
+			int max;
+		} proc;
+	} u;
+	int (*func) (ocfs_vote_obj *obj, struct _ocfs_vote_obj_lookup_data *data);
+	ocfs_vote_obj **ret;
+};
+
+
+
+
+typedef struct _ocfs_recv_ctxt
+{
+	__s32 msg_len;
+	__u8 msg[OCFS_MAX_DLM_PKT_SIZE];
+	int status;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+	struct work_struct ipc_wq;
+#else
+	struct tq_struct ipc_tq;
+#endif
+}
+ocfs_recv_ctxt;
+
+typedef struct _ocfs_cfg_task
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+	struct work_struct cfg_wq;
+#else
+	struct tq_struct cfg_tq;
+#endif
+	ocfs_super *osb;
+	__u64 lock_off;
+	__u8 *buffer;
+	struct buffer_head *bh;
+}
+ocfs_cfg_task;
+
+typedef enum _ocfs_volcfg_op
+{
+	OCFS_VOLCFG_ADD,
+	OCFS_VOLCFG_UPD
+}
+ocfs_volcfg_op;
+
+typedef struct _ocfs_vote_request_ctxt
+{
+	int request_method;
+	__u32 node_num;
+	int status;
+	union {
+		ocfs_dlm_msg *dlm_msg;
+		ocfs_publish *publish;
+	} u;
+} ocfs_vote_request_ctxt;
+
+typedef struct _ocfs_vote_reply_ctxt
+{
+	int reply_method;
+	int *status;
+	ocfs_node_map *got_vote_map;
+	ocfs_node_map *open_map;
+	__u32 flags;
+	union {
+		ocfs_dlm_reply_master *reply;
+		ocfs_vote *vote;
+	} u;
+} ocfs_vote_reply_ctxt;
+
+
+/* these three used as 'type' in ocfs_bitmap_update */
+#if 0
+#define  DISK_ALLOC_DIR_NODE      1
+#endif
+#define  DISK_ALLOC_EXTENT_NODE   2
+#define  DISK_ALLOC_VOLUME        3
+#define  DISK_ALLOC_INODE	  4
+
+/* a bitmap update, currently used for freeing bits */
+typedef struct ocfs_bitmap_update
+{
+	__u64 length;
+	__u64 file_off;
+	__u32 type;
+	__u32 node_num;
+}
+ocfs_bitmap_update;
+
+#define  FREE_LOG_SIZE            150
+
+typedef struct _ocfs_free_rec
+{
+	__u32 num_updates;
+	struct list_head log_list;
+	ocfs_bitmap_update update[FREE_LOG_SIZE];
+} 
+ocfs_free_rec;
+
+typedef struct _ocfs_bitmap_free_head
+{
+	__u32 num_logs;
+	struct _ocfs_free_rec *tail;
+	struct list_head free_logs;
+} 
+ocfs_bitmap_free_head;
+
+
+struct ocfs_ioc
+{
+	char name[255];		/* "OCFS" */
+	char version[255];	/* version */
+	__u16 nodenum;		/* node number */
+	char nodename[255];	/* node name */
+};
+
+/* timeout structure taken from Ben's aio.c */
+typedef struct _ocfs_timeout {
+	struct timer_list	timer;
+	int			timed_out;
+	wait_queue_head_t	wait;
+} ocfs_timeout;
+
+#define NAMEI_RA_CHUNKS  2
+#define NAMEI_RA_BLOCKS  4
+#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+
+
+#define __ocfs_wait(wq, condition, timeo, ret)			\
+do {								\
+	ocfs_timeout __to;					\
+								\
+	DECLARE_WAITQUEUE(__wait, current);			\
+	DECLARE_WAITQUEUE(__to_wait, current);			\
+								\
+	ocfs_init_timeout(&__to);				\
+								\
+	if (timeo) {						\
+		ocfs_set_timeout(&__to, timeo);			\
+		if (__to.timed_out) {				\
+			ocfs_clear_timeout(&__to);		\
+		}						\
+	}							\
+								\
+	add_wait_queue(&wq, &__wait);				\
+	add_wait_queue(&__to.wait, &__to_wait);			\
+	do {							\
+		ret = 0;					\
+		set_current_state(TASK_INTERRUPTIBLE);		\
+		if (condition)					\
+			break;					\
+		ret = -ETIMEDOUT;				\
+		if (__to.timed_out)				\
+			break;					\
+		schedule();					\
+		if (signal_pending(current)) {			\
+			ret = -EINTR;				\
+			break;					\
+		}						\
+	} while (1);						\
+								\
+	set_current_state(TASK_RUNNING);			\
+	remove_wait_queue(&wq, &__wait);			\
+	remove_wait_queue(&__to.wait, &__to_wait);		\
+								\
+	if (timeo)						\
+		ocfs_clear_timeout(&__to);			\
+								\
+} while(0)
+
+#define ocfs_wait(wq, condition, timeout)			\
+({								\
+	int __ret = 0;						\
+	if (!(condition))					\
+		__ocfs_wait(wq, condition, timeout, __ret);	\
+	__ret;							\
+})
+#endif				/* !USERSPACE_TOOL */
+
+
+
+static inline unsigned long ino_from_fe_off(struct inode *inode)
+{
+	__u64 block_off = GET_INODE_FEOFF(inode) >> inode->i_sb->s_blocksize_bits;
+
+	return (unsigned long)(block_off & (__u64)ULONG_MAX);
+}
+
+static inline unsigned long ino_from_off(struct super_block *sb,
+					 __u64 off)
+{
+	__u64 block_off = off >> sb->s_blocksize_bits;
+
+	return (unsigned long)(block_off & (__u64)ULONG_MAX);
+}
+
+static inline void ocfs_set_de_type(struct super_block *sb,
+				    struct ocfs2_dir_entry *de,
+				    umode_t mode)
+{
+	de->file_type = ocfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
+static inline ocfs_bitmap_free_head * ocfs_alloc_bitmap_free_head(void)
+{
+	struct _ocfs_bitmap_free_head *f;
+
+	f = ocfs_malloc(sizeof(ocfs_bitmap_free_head));
+	if (f) {
+		f->num_logs = 0;
+		f->tail = NULL;
+		INIT_LIST_HEAD(&(f->free_logs));
+	}
+	return(f);
+}
+
+static inline void ocfs_free_bitmap_free_head(ocfs_bitmap_free_head *f)
+{
+	ocfs_free_rec *log;
+	struct list_head *p, *n;
+
+	if (f && (f->num_logs)) {
+		list_for_each_safe(p, n, &(f->free_logs)) {
+			log = list_entry(p, ocfs_free_rec, log_list);
+			list_del(&(log->log_list));
+			kfree(log);
+		}
+		kfree(f);
+	}
+}
+
+static inline unsigned int ocfs_clusters_for_bytes(struct super_block *sb,
+						   __u64 bytes)
+{
+	int cl_bits = OCFS_SB(sb)->s_clustersize_bits;
+	unsigned int clusters;
+
+	bytes += OCFS_SB(sb)->s_clustersize - 1;
+	/* OCFS2 just cannot have enough clusters to overflow this */
+	clusters = (unsigned int)(bytes >> cl_bits);
+
+	return clusters;
+}
+
+static inline __u64 ocfs_align_bytes_to_clusters(struct super_block *sb,
+						 __u64 bytes)
+{
+	int cl_bits = OCFS_SB(sb)->s_clustersize_bits;
+	unsigned int clusters;
+
+	clusters = ocfs_clusters_for_bytes(sb, bytes);
+	return (__u64)clusters << cl_bits;
+}
+
+static inline int ocfs_inc_icount(struct inode *inode)
+{
+	if (atomic_read(&inode->i_count) >= 1 &&
+	    !(inode->i_state & I_CLEAR)) {
+		atomic_inc(&inode->i_count);
+		return 0;
+	}
+
+	LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, no dentry\n",
+		       atomic_read(&inode->i_count), inode->i_state,
+		       inode->i_ino);
+
+	LOG_ERROR_ARGS("feoff=%llu, deleted=%u\n", 
+		       GET_INODE_FEOFF(inode),
+		       INODE_DELETED(inode));
+
+	return -EINVAL;
+}
+
+#define ocfs_check_inode(inode)  __ocfs_check_inode(inode, __LINE__)
+
+/* call this right before an atomic_dec or iput. */
+static inline void __ocfs_check_inode(struct inode *inode, unsigned int line)
+{
+	struct dentry *dentry = NULL;
+	struct list_head *iter;
+	int max_cnt = 1; /* one for the inode hash */
+	ocfs_super *osb = NULL;
+
+	if (!inode)
+		return;
+
+	osb = OCFS_SB(inode->i_sb);
+
+	if (INODE_DELETED(inode))
+		max_cnt--;
+
+	list_for_each (iter, &(inode->i_dentry)) {
+		dentry = list_entry (iter, struct dentry, d_alias);
+		// what the heck, take the first one ;-)
+		break;
+	}
+
+	/* add one for the dentrys ref... */
+	if (dentry)
+		max_cnt++;
+
+	/* +1 for the ref you're about to remove. */
+	if (atomic_read(&inode->i_count) >= (max_cnt+1))
+		return;
+
+
+	LOG_ERROR_ARGS("max_cnt = %d, line = %u\n", max_cnt, line);
+	if (dentry)
+		LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, name='%*s'\n",
+			       atomic_read(&inode->i_count), inode->i_state, inode->i_ino,
+			       dentry->d_name.len, dentry->d_name.name);
+	else
+		LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, no dentry\n",
+			       atomic_read(&inode->i_count), inode->i_state, inode->i_ino);
+
+	LOG_ERROR_ARGS("feoff=%llu, deleted=%u\n", 
+		       GET_INODE_FEOFF(inode),
+		       INODE_DELETED(inode));
+
+	BUG();
+}
+
+
+static inline int ocfs_is_local_cache_lock(ocfs_super *osb, struct inode *inode)
+{
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	if (lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK &&
+	    lockres->master_node_num == osb->node_num)
+		return 1;
+	return 0;
+}
+
+static inline int ocfs_get_right_shift_bits(int num)
+{
+	int i, ret;
+
+	ret = 0;
+	for (i=0; i<32; i++) {
+		if (1 << i == num) {
+			ret = i;
+			break;
+		}
+	}
+	return ret;
+}
+
+static inline int ocfs_extent_for_offset(struct super_block *sb,
+					 ocfs2_extent_block *eb,
+					 u64 vbo)
+{ 
+	int k;
+	ocfs2_extent_list *el = &eb->h_list;
+	ocfs2_extent_rec *ext;
+	u32 cluster;
+	u64 byte_off;
+
+	for (k = 0; k < el->l_count; k++) {
+		ext = &el->l_recs[k];
+
+		cluster = ext->e_cpos + ext->e_clusters;
+		byte_off = cluster << OCFS_SB(sb)->s_clustersize_bits;
+
+      		if (byte_off >= vbo)
+	    		break;
+	}
+
+	return k;
+}
+
+static inline u64 ocfs_clusters_to_blocks(struct super_block *sb,
+		    			  u32 clusters)
+{
+	int c_to_b_bits = OCFS_SB(sb)->s_clustersize_bits -
+		sb->s_blocksize_bits;
+
+	return (u64)clusters << c_to_b_bits;
+}
+
+static inline u32 ocfs_blocks_to_clusters(struct super_block *sb,
+					  u64 blocks)
+{
+	int b_to_c_bits = OCFS_SB(sb)->s_clustersize_bits -
+		sb->s_blocksize_bits;
+
+	return (u32)(blocks >> b_to_c_bits);
+}
+
+static inline int ocfs_blocks_for_bits(struct super_block *sb,
+ 				       unsigned int num_bits)
+{
+	int bits_per_chunk = OCFS_BITS_IN_CHUNK(sb);
+
+	return (num_bits + (bits_per_chunk - 1)) / bits_per_chunk;
+}
+
+static inline void ocfs_bitmap_block_for_off(struct super_block *sb,
+					     unsigned int bm_off,
+					     unsigned int *block,
+					     unsigned int *block_off)
+{
+	if (block)
+		*block = bm_off / OCFS_BITS_IN_CHUNK(sb);
+
+	if (block_off)
+		*block_off = bm_off % OCFS_BITS_IN_CHUNK(sb);
+}
+
+static inline unsigned int ocfs_bitmap_off_for_block(struct super_block *sb,
+						     unsigned int block,
+						     unsigned int block_off)
+{
+	return ((block * OCFS_BITS_IN_CHUNK(sb)) + block_off);
+}
+
+static inline int ocfs_bitmap_blocks_affected(struct super_block *sb,
+					      unsigned int start_bit,
+					      unsigned int num_bits,
+					      unsigned int *start_blk)
+{
+	unsigned int start_tmp, end_tmp;
+
+	ocfs_bitmap_block_for_off(sb, start_bit, &start_tmp, NULL);
+	if (start_blk)
+		*start_blk = start_tmp;
+
+	if (!num_bits)
+		return 0;
+
+	ocfs_bitmap_block_for_off(sb, start_bit + num_bits - 1,
+				  &end_tmp, NULL);
+
+	return (end_tmp - start_tmp + 1);
+}
+
+
+typedef struct _ocfs_journal_handle ocfs_journal_handle;
+
+#endif /* !OCFS_H */

Added: trunk/src/ocfs1_fs_compat.h
===================================================================
--- trunk/src/ocfs1_fs_compat.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs1_fs_compat.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,112 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs1_fs_compat.h
+ *
+ * OCFS1 volume header definitions.  OCFS2 creates valid but unmountable
+ * OCFS1 volume headers on the first two sectors of an OCFS2 volume.
+ * This allows an OCFS1 volume to see the partition and cleanly fail to
+ * mount it.
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS1_FS_COMPAT_H
+#define _OCFS1_FS_COMPAT_H
+
+#define MAX_VOL_SIGNATURE_LEN_V1          128
+#define MAX_MOUNT_POINT_LEN_V1            128
+#define MAX_VOL_ID_LENGTH_V1               16
+#define MAX_VOL_LABEL_LEN_V1               64
+#define MAX_CLUSTER_NAME_LEN_V1            64
+
+#define OCFS1_MAJOR_VERSION              (2)
+#define OCFS1_MINOR_VERSION              (0)
+#define OCFS1_VOLUME_SIGNATURE		 "OracleCFS"
+
+/*
+ * OCFS1 superblock.  Lives at sector 0.
+ */
+typedef struct _ocfs1_vol_disk_hdr
+{
+/*00*/	__u32 minor_version;
+	__u32 major_version;
+/*08*/	__u8 signature[MAX_VOL_SIGNATURE_LEN_V1];
+/*88*/	__u8 mount_point[MAX_MOUNT_POINT_LEN_V1];
+/*108*/	__u64 serial_num;
+/*110*/	__u64 device_size;
+	__u64 start_off;
+/*120*/	__u64 bitmap_off;
+	__u64 publ_off;
+/*130*/	__u64 vote_off;
+	__u64 root_bitmap_off;
+/*140*/	__u64 data_start_off;
+	__u64 root_bitmap_size;
+/*150*/	__u64 root_off;
+	__u64 root_size;
+/*160*/	__u64 cluster_size;
+	__u64 num_nodes;
+/*170*/	__u64 num_clusters;
+	__u64 dir_node_size;
+/*180*/	__u64 file_node_size;
+	__u64 internal_off;
+/*190*/	__u64 node_cfg_off;
+	__u64 node_cfg_size;
+/*1A0*/	__u64 new_cfg_off;
+	__u32 prot_bits;
+	__s32 excl_mount;
+/*1B0*/
+} ocfs1_vol_disk_hdr;
+
+
+typedef struct _ocfs1_disk_lock
+{
+/*00*/	__u32 curr_master;
+	__u8 file_lock;
+	__u8 compat_pad[3];  /* Not in orignal definition.  Used to
+				make the already existing alignment
+				explicit */
+	__u64 last_write_time;
+/*10*/	__u64 last_read_time;
+	__u32 writer_node_num;
+	__u32 reader_node_num;
+/*20*/	__u64 oin_node_map;
+	__u64 dlock_seq_num;
+/*30*/
+} ocfs1_disk_lock;
+
+/*
+ * OCFS1 volume label.  Lives at sector 1.
+ */
+typedef struct _ocfs1_vol_label
+{
+/*00*/	ocfs1_disk_lock disk_lock;
+/*30*/	__u8 label[MAX_VOL_LABEL_LEN_V1];
+/*70*/	__u16 label_len;
+/*72*/	__u8 vol_id[MAX_VOL_ID_LENGTH_V1];
+/*82*/	__u16 vol_id_len;
+/*84*/	__u8 cluster_name[MAX_CLUSTER_NAME_LEN_V1];
+/*A4*/	__u16 cluster_name_len;
+/*A6*/
+} ocfs1_vol_label;
+
+
+#endif /* _OCFS1_FS_COMPAT_H */
+

Added: trunk/src/ocfs2_fs.h
===================================================================
--- trunk/src/ocfs2_fs.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs2_fs.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,526 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs2_fs.h
+ *
+ * On-disk structures for OCFS2.
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS2_FS_H
+#define _OCFS2_FS_H
+
+/* Version */
+#define OCFS2_MAJOR_REV_LEVEL		2
+#define OCFS2_MINOR_REV_LEVEL          	0
+
+/*
+ * An OCFS2 volume starts this way:
+ * Sector 0: Valid ocfs1_vol_disk_hdr that cleanly fails to mount v1.
+ * Sector 1: Valid ocfs1_vol_label that cleanly fails to mount v1.
+ * Block OCFS2_SUPER_BLOCK_BLKNO: OCFS2 superblock.
+ *
+ * All other structures are found from the superblock information.
+ *
+ * OCFS2_SUPER_BLOCK_BLKNO is in blocks, not sectors.  eg, for a
+ * blocksize of 2K, it is 4096 bytes into disk.
+ */
+#define OCFS2_SUPER_BLOCK_BLKNO		2
+
+/*
+ * As OCFS2 has a minimum clustersize of 4K, it has a maximum blocksize
+ * of 4K
+ */
+#define OCFS2_MAX_BLOCKSIZE		4096
+
+/* Object signatures */
+#define OCFS2_SUPER_BLOCK_SIGNATURE	"OCFSV2"
+#define OCFS2_FILE_ENTRY_SIGNATURE	"INODE01"
+#define OCFS2_EXTENT_BLOCK_SIGNATURE	"EXBLK01"
+
+/*
+ * Flags on ocfs2_dinode.i_flags
+ */
+#define OCFS2_VALID_FL		(0x00000001)	/* Inode is valid */
+#define OCFS2_UNUSED2_FL	(0x00000002)
+#define OCFS2_ORPHANED_FL	(0x00000004)	/* On the orphan list */
+#define OCFS2_UNUSED3_FL	(0x00000008)
+/* System inode flags */
+#define OCFS2_SYSTEM_FL		(0x00000010)	/* System inode */
+#define OCFS2_SUPER_BLOCK_FL	(0x00000020)	/* Super block */
+#define OCFS2_LOCAL_ALLOC_FL	(0x00000040)	/* Node local alloc bitmap */
+#define OCFS2_BITMAP_FL		(0x00000080)	/* Allocation bitmap */
+#define OCFS2_JOURNAL_FL	(0x00000100)	/* Node journal */
+#define OCFS2_DLM_FL		(0x00000200)	/* DLM area */
+	
+
+/* Limit of space in ocfs2_dir_entry */
+#define OCFS2_MAX_FILENAME_LENGTH       255
+
+/* Limit of node map bits in ocfs2_disk_lock */
+#define OCFS2_MAX_NODES			256
+
+#define MAX_VOL_ID_LENGTH               16
+#define MAX_VOL_LABEL_LEN               64
+#define MAX_CLUSTER_NAME_LEN            64
+
+
+#define ONE_MEGA_BYTE           	(1 * 1024 * 1024)   /* in bytes */
+#define OCFS2_DEFAULT_JOURNAL_SIZE	(8 * ONE_MEGA_BYTE)
+
+
+/* System file index */
+enum {
+	GLOBAL_BITMAP_SYSTEM_INODE = 0,
+	GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+	GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE,
+	//AUTOCONFIG_SYSTEM_INODE,
+	//PUBLISH_SYSTEM_INODE,
+	//VOTE_SYSTEM_INODE,
+	DLM_SYSTEM_INODE,
+	ORPHAN_DIR_SYSTEM_INODE,
+#define OCFS2_LAST_GLOBAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
+	EXTENT_ALLOC_SYSTEM_INODE,
+	EXTENT_ALLOC_BITMAP_SYSTEM_INODE,
+	INODE_ALLOC_SYSTEM_INODE,
+	INODE_ALLOC_BITMAP_SYSTEM_INODE,
+	JOURNAL_SYSTEM_INODE,
+	LOCAL_ALLOC_SYSTEM_INODE,
+	NUM_SYSTEM_INODES
+};
+
+static char *ocfs2_system_inode_names[NUM_SYSTEM_INODES] = {
+	/* Global system inodes (single copy) */
+	[GLOBAL_BITMAP_SYSTEM_INODE]		"global_bitmap",
+	[GLOBAL_INODE_ALLOC_SYSTEM_INODE] 	"global_inode_alloc",
+	[GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE]	"global_inode_alloc_bitmap",
+	//[AUTOCONFIG_SYSTEM_INODE]		"autoconfig",
+	//[PUBLISH_SYSTEM_INODE]			"publish",
+	//[VOTE_SYSTEM_INODE]			"vote",
+	[DLM_SYSTEM_INODE]			"dlm",
+	[ORPHAN_DIR_SYSTEM_INODE]		"orphan_dir",
+
+	/* Node-specific system inodes (one copy per node) */
+	[EXTENT_ALLOC_SYSTEM_INODE]		"extent_alloc:%04d",
+	[EXTENT_ALLOC_BITMAP_SYSTEM_INODE]	"extent_alloc_bitmap:%04d",
+	[INODE_ALLOC_SYSTEM_INODE]		"inode_alloc:%04d",
+	[INODE_ALLOC_BITMAP_SYSTEM_INODE]	"inode_alloc_bitmap:%04d",
+	[JOURNAL_SYSTEM_INODE]			"journal:%04d",
+	[LOCAL_ALLOC_SYSTEM_INODE]		"local_alloc:%04d"
+};
+
+
+/* Default size for the local alloc bitmap */
+#define OCFS2_LOCAL_BITMAP_DEFAULT_SIZE		256
+
+/*
+ * OCFS2 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+#define OCFS2_FT_UNKNOWN	0
+#define OCFS2_FT_REG_FILE	1
+#define OCFS2_FT_DIR		2
+#define OCFS2_FT_CHRDEV		3
+#define OCFS2_FT_BLKDEV		4
+#define OCFS2_FT_FIFO		5
+#define OCFS2_FT_SOCK		6
+#define OCFS2_FT_SYMLINK	7
+
+#define OCFS2_FT_MAX		8
+
+/*
+ * OCFS2_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define OCFS2_DIR_PAD			4
+#define OCFS2_DIR_ROUND			(OCFS2_DIR_PAD - 1)
+#define OCFS2_DIR_REC_LEN(name_len)	(((name_len) + 12 + \
+                                          OCFS2_DIR_ROUND) & \
+					 ~OCFS2_DIR_ROUND)
+#define OCFS2_LINK_MAX		32000
+
+#define S_SHIFT			12
+static unsigned char ocfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]    OCFS2_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]    OCFS2_FT_DIR,
+	[S_IFCHR >> S_SHIFT]    OCFS2_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]    OCFS2_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]    OCFS2_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]   OCFS2_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]    OCFS2_FT_SYMLINK,
+};
+
+
+/*
+ * Convenience casts
+ */
+#define OCFS2_RAW_SB(dinode)	(&((dinode)->id2.i_super))
+#define DISK_LOCK(dinode)	(&((dinode)->i_disk_lock))
+#define LOCAL_ALLOC(dinode)	(&((dinode)->id2.i_lab))
+
+/* TODO: change these?  */
+#define OCFS2_NODE_CONFIG_HDR_SIGN	"NODECFG"
+#define OCFS2_NODE_CONFIG_SIGN_LEN	8
+#define OCFS2_NODE_CONFIG_VER		2
+#define OCFS2_NODE_MIN_SUPPORTED_VER	2
+
+#define MAX_NODE_NAME_LENGTH	32
+
+#define OCFS2_GUID_HOSTID_LEN	20
+#define OCFS2_GUID_MACID_LEN	12
+#define OCFS2_GUID_LEN		(OCFS2_GUID_HOSTID_LEN + OCFS2_GUID_MACID_LEN)
+
+
+
+/*
+ * On disk extent record for OCFS2
+ * It describes a range of clusters on disk.
+ */
+typedef struct _ocfs2_extent_rec {
+/*00*/	__u32 e_cpos;		/* Offset into the file, in clusters */
+	__u32 e_clusters;	/* Clusters covered by this extent */
+	__u64 e_blkno;		/* Physical disk offset, in blocks */
+/*10*/
+} ocfs2_extent_rec;	
+
+/*
+ * On disk extent list for OCFS2 (node in the tree).  Note that this
+ * is contained inside ocfs2_dinode or ocfs2_extent_block, so the
+ * offsets are relative to ocfs2_dinode.id2.i_list or
+ * ocfs2_extent_block.h_list, respectively.
+ */
+typedef struct _ocfs2_extent_list {
+/*00*/	__s16 l_tree_depth;		/* Extent tree depth from this
+					   point.  -1 means data extents
+					   hang directly off this
+					   header (a leaf) */
+	__u16 l_count;			/* Number of extent records */
+	__u16 l_next_free_rec;		/* Next unused extent slot */
+	__u16 l_reserved1;
+	__u64 l_reserved2;		/* Pad to
+					   sizeof(ocfs2_extent_rec) */
+/*10*/	ocfs2_extent_rec l_recs[0];	/* Extent records */
+} ocfs2_extent_list;
+
+/*
+ * On disk extent block (indirect block) for OCFS2
+ */
+typedef struct _ocfs2_extent_block
+{
+/*00*/	__u8 h_signature[8];		/* Signature for verification */
+	__u64 h_suballoc_blkno;		/* Node suballocator offset,
+					   in blocks */
+/*10*/	__u16 h_suballoc_node;		/* Node suballocator this
+					   extent_header belongs to */
+	__u16 h_reserved1;
+	__u32 h_reserved2;
+	__u64 h_blkno;			/* Offset on disk, in blocks */
+/*20*/	__u64 h_parent_blk;		/* Offset on disk, in blocks,
+					   of this block's parent in the
+					   tree */
+	__u64 h_next_leaf_blk;		/* Offset on disk, in blocks,
+					   of next leaf header pointing
+					   to data */
+/*30*/	ocfs2_extent_list h_list;	/* Extent record list */
+/* Actual on-disk size is one block */
+} ocfs2_extent_block;
+
+/*
+ * On disk lock structure for OCFS2
+ */
+typedef struct _ocfs2_disk_lock
+{
+/*00*/	__u32 dl_master;	/* Node number of current master */
+	__u8 dl_level;		/* Lock level */
+	__u8 dl_reserved1[3];	/* Pad to u64 */
+	__u64 dl_seq_num;	/* Lock transaction seqnum */
+/*10*/	__u32 dl_node_map[8];	/* Bitmap of interested nodes,
+				   was __u32 */ 
+/*30*/
+} ocfs2_disk_lock;
+
+/*
+ * On disk superblock for OCFS2
+ * Note that it is contained inside an ocfs2_dinode, so all offsets
+ * are relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_super_block {
+/*00*/	__u16 s_major_rev_level;
+	__u16 s_minor_rev_level;
+	__u16 s_mnt_count;
+	__s16 s_max_mnt_count;
+	__u16 s_state;			/* File system state */
+	__u16 s_errors;			/* Behaviour when detecting errors */
+	__u32 s_checkinterval;		/* Max time between checks */
+/*10*/	__u64 s_lastcheck;		/* Time of last check */
+	__u32 s_creator_os;		/* OS */
+	__u32 s_feature_compat;		/* Compatible feature set */
+/*20*/	__u32 s_feature_incompat;	/* Incompatible feature set */
+	__u32 s_feature_ro_compat;	/* Readonly-compatible feature set */
+	__u64 s_root_blkno;		/* Offset, in blocks, of root directory
+					   dinode */
+/*30*/	__u64 s_system_dir_blkno;	/* Offset, in blocks, of system
+					   directory dinode */
+	__u32 s_blocksize_bits;		/* Blocksize for this fs */
+	__u32 s_clustersize_bits;	/* Clustersize for this fs */
+/*40*/	__u32 s_max_nodes;		/* Max nodes in this cluster before
+					   tunefs required */
+	__u32 s_reserved1;
+	__u64 s_reserved2;
+/*50*/	__u8  s_label[64];		/* Label for mounting, etc. */
+/*90*/	__u8  s_uuid[16];		/* Was vol_id */
+/*A0*/
+} ocfs2_super_block;
+
+/*
+ * Local allocation bitmap for OCFS2 nodes
+ * Node that it exists inside an ocfs2_dinode, so all offsets are
+ * relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_local_alloc
+{
+/*00*/	__u32 la_bm_off;	/* Starting bit offset in main bitmap */
+	/* Do we want to use id1.bitmap1? */
+	__u16 la_bm_bits;	/* Number of bits from main bitmap */
+	__u16 la_bits_set;	/* Number of set bits */
+	__u16 la_size;		/* Size of included bitmap, in bytes */
+	__u16 la_reserved1;
+	__u32 la_reserved2;
+/*10*/	__u8 la_bitmap[0];
+} ocfs2_local_alloc;
+
+/*
+ * On disk inode for OCFS2
+ */
+typedef struct _ocfs2_dinode {
+/*00*/	__u8 i_signature[8];		/* Signature for validation */
+	__u32 i_generation;		/* Generation number */
+	__u16 i_reserved1;
+	__u16 i_suballoc_node;		/* Node suballocater this inode
+					   belongs to */
+/*10*/	__u64 i_suballoc_blkno;		/* Node suballocator offset,
+       					   in blocks */
+/*18*/	ocfs2_disk_lock i_disk_lock;	/* Lock structure */
+/*48*/	__u32 i_uid;			/* Owner UID */
+	__u32 i_gid;			/* Owning GID */
+/*50*/	__u64 i_size;			/* Size in bytes */
+	__u16 i_mode;			/* File mode */
+	__u16 i_links_count;		/* Links count */
+	__u32 i_flags;			/* File flags */
+/*60*/	__u64 i_atime;			/* Access time */
+	__u64 i_ctime;			/* Creation time */
+/*70*/	__u64 i_mtime;			/* Modification time */
+	__u64 i_dtime;			/* Deletion time */
+/*80*/	__u64 i_blkno;			/* Offset on disk, in blocks */
+	__u32 i_clusters;		/* Cluster count */
+	__u32 i_reserved2;
+/*90*/	__u64 i_last_eb_blk;		/* Pointer to last extent
+					   block */
+	__u64 i_reserved3;
+/*A0*/	__u64 i_reserved4;
+	__u64 i_reserved5;
+/*B0*/	__u64 i_reserved6;
+	union {
+		__u64 i_pad1;		/* Generic way to refer to this 64bit
+					   union */
+		struct {
+			__u64 i_rdev;	/* Device number */
+		} dev1;
+		struct {		/* Info for bitmap system inodes */
+			__u32 i_used;	/* Bits (ie, clusters) used  */
+			__u32 i_total;	/* Total bits (clusters) available */
+		} bitmap1;
+	} id1;				/* Inode type dependant 1 */
+/*C0*/	union {
+		ocfs2_super_block i_super;
+                ocfs2_local_alloc i_lab;
+		ocfs2_extent_list i_list;
+	} id2;
+/* Actual on-disk size is one block */
+} ocfs2_dinode;
+
+/*
+ * On-disk directory entry structure for OCFS2
+ */
+struct ocfs2_dir_entry {
+/*00*/	__u64   inode;                  /* Inode number */
+	__u16   rec_len;                /* Directory entry length */
+	__u8    name_len;               /* Name length */
+	__u8    file_type;
+/*0C*/	char    name[OCFS2_MAX_FILENAME_LENGTH];    /* File name */
+/* Actual on-disk length specified by rec_len */
+};
+
+/*
+ * On-disk IPC configuration for an OCFS2 node.
+ */
+typedef struct _ocfs_ipc_config_info
+{
+/*00*/	__u16 ip_version;		/* IP version in NBO */
+	__u16 ip_port;			/* IP port in NBO */
+	__u32 ip_reserved1;
+	__u64 ip_reserved2;
+/*10*/	union {
+		__u32 ip_addr4;		/* IPv4 address in NBO */
+		__u32 ip_addr6[4];	/* IPv6 address in NBO */
+	} addr_u;
+/*20*/
+} ocfs_ipc_config_info;
+
+/*
+ * On-disk structure representing a Global Unique ID for an OCFS2 node.
+ *
+ * The GUID has two parts.  The host_id is a generally-randomly-unique
+ * hex-as-ascii string of 20 characters (10 bytes).  The mad_id field
+ * is, unsurprisingly, the MAC address of the network card that the
+ * IPC mechanism will be using (the address in
+ * ocfs_ipc_config_info.addr_u).  This should (ha-ha) provide a unique
+ * identifier for a node in the OCFS2 cluster.  It has the added
+ * benefit of detecting when a node has changed network cards
+ * (host_id is the same, mac_id has changed) or when an identical
+ * mac address is on a different mode (the converse).
+ */
+typedef union _ocfs_guid
+{
+/*00*/	struct
+	{
+		char host_id[OCFS2_GUID_HOSTID_LEN];
+		char mac_id[OCFS2_GUID_MACID_LEN];
+	} id;
+	__u8 guid[OCFS2_GUID_LEN];
+/*20*/
+} ocfs_guid;
+
+/*
+ * On-disk configuration information for an OCFS2 node.  A node
+ * populates its own info for other nodes to read and use.
+ */
+typedef struct _ocfs_node_config_info
+{
+/*00*/	ocfs2_disk_lock disk_lock;		/* Lock on the info */
+/*30*/	ocfs_guid guid;				/* GUID */
+/*50*/	ocfs_ipc_config_info ipc_config;	/* IPC info */
+/*70*/	__u8 node_name[MAX_NODE_NAME_LENGTH+1]; /* Name */
+/*91*/	__u8 name_pad[7];			/* Pad to align (UGH) */
+/*98*/
+} ocfs_node_config_info;
+
+/*
+ * On-disk ... for OCFS2.  FIXME this description.
+ */
+typedef struct _ocfs_node_config_hdr
+{
+/*00*/	ocfs2_disk_lock disk_lock;
+/*30*/	__u8 signature[OCFS2_NODE_CONFIG_SIGN_LEN];
+	__u32 version;
+	__u32 num_nodes;
+/*40*/	__u32 last_node;
+	__u32 onch_pad;
+	__u64 cfg_seq_num;
+/*50*/	
+} ocfs_node_config_hdr;
+
+
+#ifdef __KERNEL__
+static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(struct super_block *sb)
+{
+	/*
+	 * Perhaps change one day when we want to be dynamic
+	 * based on sb->s_blocksize.
+	 */
+	return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#else
+static inline int ocfs2_extent_recs_per_inode(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(int blocksize)
+{
+	return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#endif  /* __KERNEL__ */
+
+
+static inline int ocfs2_system_inode_is_global(int type)
+{
+	return ((type >= 0) &&
+		(type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE));
+}
+
+static inline int ocfs2_sprintf_system_inode_name(char *buf, int len,
+						  int type, int node)
+{
+	int chars;
+
+        /*
+         * Global system inodes can only have one copy.  Everything
+         * after OCFS_LAST_GLOBAL_SYSTEM_INODE in the system inode
+         * list has a copy per node.
+         */
+	if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE)
+		chars = snprintf(buf, len,
+				 ocfs2_system_inode_names[type]);
+	else
+		chars = snprintf(buf, len,
+				 ocfs2_system_inode_names[type], node);
+
+	return chars;
+}
+
+#endif  /* _OCFS2_FS_H */

Added: trunk/src/ocfs_buffer_head.h
===================================================================
--- trunk/src/ocfs_buffer_head.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_buffer_head.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,237 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs_buffer_head.h
+ *
+ * Buffer cache handling functions defined
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh
+ */
+
+#ifndef OCFSIO_H
+#define OCFSIO_H
+
+#include <linux/compiler.h>
+
+/* UGLY place to put it, but it is for the best */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+# include <linux/locks.h>
+#else
+# include <linux/buffer_head.h>
+#endif
+
+/* Yosh made me do it. */
+static inline int ocfs_write_bh  (ocfs_super          *osb, 
+				  struct buffer_head  *bh, 
+				  int                  flags, 
+				  struct inode        *inode);
+static inline int ocfs_read_bh   (ocfs_super          *osb, 
+				  __u64                off, 
+				  struct buffer_head **bh, 
+				  int                  flags, 
+				  struct inode        *inode);
+
+int ocfs_write_bhs (ocfs_super          *osb,
+		    struct buffer_head  *bh[], 
+		    int                  nr, 
+		    int                  flags, 
+		    struct inode        *inode);
+int ocfs_read_bhs  (ocfs_super          *osb, 
+		    __u64                off,
+		    __u64                len, 
+		    struct buffer_head  *bhs[], 
+		    int                  flags, 
+		    struct inode         *inode);
+
+
+#define OCFS_BH_CACHED            1
+#define OCFS_BH_COND_CACHED       2
+#define OCFS_BH_CONCURRENT_WRITE  4     /* This should only be used by ocfs_worker */
+#define OCFS_BH_IGNORE_JBD        8     /* This should only be used by ocfs_checkpoint_handle! */
+#define OCFS_BH_READAHEAD         16	/* use this to pass READA down to submit_bh */
+
+
+#define BH_Modified 18
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+/*
+ * Copied right out of the 2.6.2 kernel's buffer_head.h:
+ * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
+ * and buffer_foo() functions.
+ */
+#define BUFFER_FNS(bit, name)						\
+static inline void set_buffer_##name(struct buffer_head *bh)		\
+{									\
+	set_bit(BH_##bit, &(bh)->b_state);				\
+}									\
+static inline void clear_buffer_##name(struct buffer_head *bh)		\
+{									\
+	clear_bit(BH_##bit, &(bh)->b_state);				\
+}									\
+static inline int buffer_##name(struct buffer_head *bh)			\
+{									\
+	return test_bit(BH_##bit, &(bh)->b_state);			\
+}
+
+#undef buffer_uptodate
+#undef buffer_dirty
+BUFFER_FNS(Uptodate, uptodate)
+BUFFER_FNS(Dirty, dirty)
+
+#endif /* 2.4.x kernel */
+
+#define OCFS_BH_GET_DATA(bh)  ((void *) ((bh)->b_data))
+#define OCFS_BH_GET_DATA_READ(bh) OCFS_BH_GET_DATA(bh)
+#define OCFS_BH_GET_DATA_WRITE(bh) OCFS_BH_GET_DATA(bh)
+#define OCFS_BH_GET_DATA_WRITE_TRYLOCK(bh) OCFS_BH_GET_DATA(bh)
+#define OCFS_BH_PUT_DATA(bh) do { } while(0)
+
+#define STATE_BH_BITS	8
+#define USED_BH_BITS	23 /* Number of BH bits used up through JBD */
+
+#define STATE_BIT_MAX           (1 << STATE_BH_BITS)
+#define STATE_BIT_MAX_MASK      ((1 << STATE_BH_BITS)-1)
+#define STATE_BIT_MASK		((~0UL) << USED_BH_BITS)
+
+
+static inline void CLEAR_BH_SEQNUM(struct buffer_head *bh)
+{
+	unsigned int prev = bh->b_state & STATE_BIT_MASK;
+	bh->b_state &= ~prev;
+}
+
+static inline void SET_BH_SEQNUM(struct inode *inode, struct buffer_head *bh)
+{
+	unsigned int seq = (atomic_read(GET_INODE_CLEAN_SEQ(inode)) & 
+			    STATE_BIT_MAX_MASK) << USED_BH_BITS;
+
+	CLEAR_BH_SEQNUM(bh);
+	bh->b_state |= seq;
+}
+
+static inline int TEST_BH_SEQNUM(struct inode *inode, struct buffer_head *bh)
+{
+	int ret;
+	unsigned int seq =
+		(bh->b_state & STATE_BIT_MASK) >> USED_BH_BITS;
+
+	ret = (seq == atomic_read(GET_INODE_CLEAN_SEQ(inode)));
+	return ret;
+}
+
+static inline int check_block_zero_write(struct buffer_head *bh)
+{
+	if (unlikely(bh->b_blocknr == 0)) {
+		ocfs_vol_disk_hdr *hdr = OCFS_BH_GET_DATA_READ(bh);
+
+		if (hdr == NULL) {
+			printk ("ocfs2: failed to map bh page!!!\n");
+			return -EIO;
+		}
+
+		if (memcmp(hdr->signature, 
+			   OCFS_VOLUME_SIGNATURE, 
+			   strlen(OCFS_VOLUME_SIGNATURE)) != 0) {
+			printk("ocfs2: WARNING! attempting to write non "
+			       "volume header to block 0\n");
+			OCFS_BH_PUT_DATA(bh);
+			return -EIO;
+		}
+
+		OCFS_BH_PUT_DATA(bh);
+	}
+
+	return 0;
+}
+
+#define OCFS_PRINT_FE_SIZE(bh)						      \
+do {									      \
+	if (bh) {							      \
+		ocfs2_dinode *__fe = bh->b_data;			      \
+		if (IS_VALID_FILE_ENTRY(__fe)) {			      \
+			printk("ocfs2: %s:%d fe_check: file_size = (%llu), "  \
+				"this_sector = (%llu)\n",		      \
+				__FUNCTION__, __LINE__, 		      \
+				__fe->file_size,			      \
+				__fe->this_sector);			      \
+		}							      \
+	}								      \
+} while(0)
+
+/* check the 1st five bytes. */
+static inline int ocfs_poison(struct buffer_head *bh)
+{
+	int i;
+	for(i = 0; i < 512; i++)
+		if ((bh)->b_data[i] != 'P')
+			return(0);
+	return(1);
+}
+
+#define OCFS_DO_HEX_DUMP(bh)						      \
+do {									      \
+	printk("bh->b_blocknr = %lu, bh->b_data:\n", bh->b_blocknr);	      \
+	for(i = 0; i < 512; i++) {					      \
+		printk("%03x ", bh->b_data[i]);				      \
+		if ( ((i+1) % 16) == 0 )				      \
+		printk("\n");						      \
+	}								      \
+	printk("\n");							      \
+} while (0)
+
+static inline int ocfs_write_bh (ocfs_super * osb, struct buffer_head *bh, 
+				 int flags, struct inode *inode)
+{
+	int status;
+	
+	IO_FUNC_TIMING_DECL
+
+	status = ocfs_write_bhs (osb, &bh, 1, flags, inode);
+
+	IO_FUNC_TIMING_PRINT("ocfs_write_bh", status);
+
+	return status;
+}
+
+static inline int ocfs_read_bh (ocfs_super * osb, __u64 off, 
+				struct buffer_head **bh, int flags, 
+				struct inode *inode)
+{
+	int status = 0;
+
+	IO_FUNC_TIMING_DECL
+
+	if (bh == NULL) {
+		printk("ocfs2: bh == NULL\n");
+		status = -EINVAL;
+		goto bail;
+	}
+	
+	status = ocfs_read_bhs(osb, off, osb->sb->s_blocksize, bh,
+			       flags, inode);
+
+bail:
+	IO_FUNC_TIMING_PRINT("ocfs_read_bh", status);
+
+	return status;
+}
+
+#endif /* OCFSIO_H */

Added: trunk/src/ocfs_compat.h
===================================================================
--- trunk/src/ocfs_compat.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_compat.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,116 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs_compat.h
+ *
+ * Compatibility stuff for 2.4
+ *
+ * Copyright (C) 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version
+ * 2 of the License.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS_COMPAT_H
+#define OCFS_COMPAT_H
+
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+#ifdef __ia64__
+extern inline void prefetch(const void *x);
+extern inline void prefetchw(const void *x);
+#else
+static inline void prefetch(const void *x);
+static inline void prefetchw(const void *x);
+#endif
+extern inline int generic_fls(int x);
+extern inline int get_bitmask_order(unsigned int count);
+/* XXX Hack to avoid warning */
+struct mem_dqinfo;
+extern inline void mark_info_dirty(struct mem_dqinfo *info);
+
+#include <linux/sched.h>
+
+#define OCFS_GENERIC_SB_MEMBER(sb)  ((sb)->u.generic_sbp)
+
+#define ocfs_get_seconds(t) (t)
+
+#define wait_on_page_locked(p) wait_on_page(p)
+#define PageUptodate(p)        Page_Uptodate(p)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
+#define unlock_page(p)         UnlockPage(p)
+
+static inline struct buffer_head * sb_getblk(struct super_block *sb, int block)
+{
+	return getblk(sb->s_dev, block, sb->s_blocksize);
+}
+#endif
+
+#define io_schedule		schedule
+#define io_schedule_timeout	schedule_timeout
+#define bdev_hardsect_size(b)	get_hardsect_size((b)->bd_dev);
+
+#ifdef HAVE_NPTL
+static inline void dequeue_signal_lock(struct task_struct *task,
+				       sigset_t *blocked, siginfo_t *info)
+{
+	spin_lock_irq(&task->sighand->siglock);
+	dequeue_signal(blocked, info);
+	spin_unlock_irq(&task->sighand->siglock);
+}
+#else
+static inline void dequeue_signal_lock(struct task_struct *task,
+				       sigset_t *blocked, siginfo_t *info)
+{
+	spin_lock_irq(&task->sigmask_lock);
+	dequeue_signal(blocked, info);
+	spin_unlock_irq(&task->sigmask_lock);
+}
+#endif
+
+static inline dev_t huge_decode_dev(u64 val)
+{
+	u16 val16 = (u16)(val & 0xFFFFULL);
+	return MKDEV((val16 >> 8) & 0xFF, val16 & 0xFF);
+}
+
+static inline u64 huge_encode_dev(dev_t dev)
+{
+	return (u64)((MAJOR(dev) << 8) | MINOR(dev));
+}
+
+#else /* LINUX_VERSION_CODE < 2.6 */
+
+#define OCFS_GENERIC_SB_MEMBER(sb)  ((sb)->s_fs_info)
+
+#define ocfs_get_seconds(t) ((t).tv_sec)
+
+#endif  /* LINUX_VERSION_CODE < 2.6 */
+
+#ifndef BITS_TO_LONGS
+#define BITS_TO_LONGS(bits) \
+		(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#endif
+
+#endif  /* OCFS_COMPAT_H */
+

Added: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_journal.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,391 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * journal.h
+ *
+ * Defines journalling api and structures.
+ *
+ * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh
+ */
+
+#ifndef  _OCFSJOURNAL_H_
+#define  _OCFSJOURNAL_H_
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+
+#define OCFS_JOURNAL_CURRENT_VERSION	1
+#define OCFS_DEFAULT_COMMIT_INTERVAL 	(HZ * 5)
+#define OCFS_JOURNAL_DEFAULT_SIZE	(8 * ONE_MEGA_BYTE)
+
+#undef 	OCFS_JOURNAL_USE_CB
+
+enum ocfs_journal_state {
+	OCFS_JOURNAL_FREE = 0,
+	OCFS_JOURNAL_LOADED,
+	OCFS_JOURNAL_IN_SHUTDOWN,
+};
+
+struct _ocfs_super;
+struct _ocfs2_dinode;
+struct _ocfs_journal_handle;
+
+typedef struct _ocfs_journal ocfs_journal;
+struct _ocfs_journal {
+	enum ocfs_journal_state   state;      /* Journals current state   */
+
+	journal_t                 *k_journal; /* The kernels journal type */
+	struct inode              *k_inode;   /* Kernel inode pointing to
+					       * this journal             */
+	__u8                      version;    /* Journal version          */
+	__u64                     lock_id;    /* Lock id for journal file */ 
+	struct _ocfs_super        *osb;       /* pointer to the super
+					       * block for the node
+					       * we're currently
+					       * running on -- not
+					       * necessarily the super
+					       * block from the node
+					       * which we usually run
+					       * from (recovery,
+					       * etc)                     */
+	__u32                     node_num;   /* Whose journal are we?    */
+	struct buffer_head        *lockbh;    /* Journal disk lock, used 
+						 to access file entry	  */
+	atomic_t                  num_trans;  /* Number of transactions 
+					       * currently in the system. */
+	/* locking order: trans_lock -> commit_sem -> journal.curr.list_lock */
+	struct semaphore          commit_sem; /* protects *everything*
+					       * in the commited list
+					       * and also protects
+					       * 'curr' from
+					       * removal/creation. */
+	struct list_head          commited;   /* doubly linked list of all
+					       * commited handles awaiting
+					       * checkpointing.           */
+	struct rw_semaphore       trans_barrier;
+};
+
+typedef struct _ocfs_journal_lock ocfs_journal_lock;
+struct _ocfs_journal_lock {
+	__u32 type;
+	__u32 flags;
+	struct buffer_head *bh;
+	struct inode *inode;
+	int req_io_sem;
+	struct list_head lock_list;
+};
+
+typedef struct _ocfs_journal_copyout ocfs_journal_copyout;
+struct _ocfs_journal_copyout {
+	__u8                forget;  /* should we journal_forget this
+				      * buffer instead? */
+	unsigned long       blocknr; /* what block is this for? */
+	char                *data;   /* the actual data */
+};
+
+struct _ocfs_journal_handle {
+	handle_t            *k_handle; /* kernel handle.                */
+	ocfs_journal        *journal;
+	struct _ocfs_super  *osb;      /* what super block we belong to */
+	__u32               flags;     /* see flags below.              */
+	struct list_head    h_list;    /* points to whatever list we're 
+					* on.                           */
+	int                 max_buffs; /* Buffs reserved by this handle */
+
+	/* We know how many buffers (max) we'll have for this
+	 * transaction so we can just allocate an array of pointers at
+	 * the same time as the creation of this handle. */
+	int                 num_buffs;
+	struct buffer_head  **buffs;
+
+	/* The following three fields are for ocfs_handle_add_lock */
+	spinlock_t          list_lock; /* Used to protect the 'locks'
+					* list. Only used if the
+					* handle is the same as
+					* journal->curr. otherwise, we
+					* should be in the commited
+					* list in which case we're
+					* protected by commit_sem */
+	int                 num_locks; 
+	struct list_head    locks;     /* A bunch of locks to 
+					* release on commit/abort. This 
+					* should be a list_head */
+
+	int                  num_co;
+	ocfs_journal_copyout *co_buffs; /* Copy-out buffers. On 1st
+					 * journal_access of a buffer
+					 * we make a copy of it into
+					 * one of these. That way if we
+					 * abort we can place the
+					 * original copy back into the
+					 * buffer. */
+
+	struct _ocfs_bitmap_free_head *commit_bits; /* bits to be
+						     * freed ONLY if
+						     * we commit the
+						     * handle. */
+	__u64                new_file_lockid;  /* offset for the 
+						* most recently 
+						* created file
+						* sitting on this
+						* journal handle */  
+	struct list_head     inode_list;
+};
+
+/* should we checkpoint this handle on commit? */
+#define OCFS_HANDLE_CHECKPOINT			1
+/* should we sync-commit this handle? */
+#define OCFS_HANDLE_SYNC			2
+/* was local alloc used (should we release the sem?) */
+#define OCFS_HANDLE_LOCAL_ALLOC			4
+
+static inline void ocfs_handle_free_all_copyout(ocfs_journal_handle *handle)
+{
+	while (handle->num_co) {
+		handle->num_co--;
+		if (handle->co_buffs[handle->num_co].data)
+			kfree(handle->co_buffs[handle->num_co].data);
+	}
+	kfree(handle->co_buffs);
+	handle->co_buffs = NULL;
+}
+
+
+static inline void ocfs_handle_set_checkpoint(ocfs_journal_handle *handle,
+					      int checkpoint)
+{
+	if (checkpoint)
+		handle->flags |= OCFS_HANDLE_CHECKPOINT;
+	else
+		handle->flags &= ~OCFS_HANDLE_CHECKPOINT;
+}
+
+static inline void ocfs_handle_set_sync(ocfs_journal_handle *handle, int sync)
+{
+	if (sync)
+		handle->flags |= OCFS_HANDLE_SYNC;
+	else
+		handle->flags &= ~OCFS_HANDLE_SYNC;
+}
+
+static inline int ocfs_handle_add_commit_bits(ocfs_journal_handle *handle,
+					      __u32 len, __u32 fileoff,
+					      __u32 nodenum, __u32 type)
+{
+	int ret = 0;
+	if (!handle->commit_bits)
+		handle->commit_bits = ocfs_alloc_bitmap_free_head();
+
+	if (!handle->commit_bits)
+		ret = -ENOMEM;
+	else
+		ret = ocfs_add_to_bitmap_free_head(handle->osb,
+						   handle->commit_bits,
+						   len, fileoff,
+						   nodenum, type);
+	return ret;
+}
+
+
+/*
+ *  Journal Control:
+ *  Initialize, Load, Shutdown, Wipe, Create a journal.
+ *  
+ *  ocfs_journal_init     - Initialize journal structures in the OSB.
+ *  ocfs_journal_load     - Load the given journal off disk. Replay it if
+ *                          there's transactions still in there.
+ *  ocfs_journal_shutdown - Shutdown a journal, this will flush all 
+ *                          uncommited, uncheckpointed transactions.
+ *  ocfs_journal_wipe     - Wipe transactions from a journal. Optionally 
+ *                          zero out each block.
+ *  ocfs_recovery_thread  - Perform recovery on a node. osb is our own osb.
+ */
+int    ocfs_journal_init(struct _ocfs_super *osb);
+void   ocfs_journal_shutdown(struct _ocfs_super *osb);
+int    ocfs_journal_wipe(ocfs_journal *journal, int full);
+int    ocfs_journal_load(ocfs_journal *journal);
+void   ocfs_recovery_thread(struct _ocfs_super *osb, int node_num);
+int    ocfs_journal_new_file_search(struct _ocfs_super *osb, __u64 lockid);
+
+/*
+ *  Transaction Handling:
+ *  Manage the lifetime of a transaction handle.
+ *
+ *  ocfs_start_trans      - Begin a transaction. Give it an upper estimate of 
+ *                          the number of blocks that will be changed during 
+ *                          this handle.
+ *  ocfs_commit_trans     - Complete a handle.
+ *  ocfs_abort_trans      - Abort a running handle.
+ *  ocfs_journal_access   - Notify the handle that we want to journal this 
+ *                          buffer. Will have to call ocfs_journal_dirty once
+ *                          we've actually dirtied it. Type is one of . or .
+ *  ocfs_journal_dirty    - Mark a journalled buffer as having dirty data.
+ *  ocfs_handle_add_lock  - Sometimes we need to delay lock release
+ *                          until after a transaction has been completed. Use
+ *                          ocfs_handle_add_lock to indicate that a lock needs
+ *                          to be released at the end of that handle. Locks 
+ *                          will be released in the order that they are added. 
+ *  ocfs_handle_add_inode - Add a locked inode to a transaction.
+ */
+/* You must always start_trans with a number of buffs > 0, but it's
+ * perfectly legal to go through an entire transaction without having
+ * dirtied any buffers. */
+ocfs_journal_handle *ocfs_start_trans(struct _ocfs_super *osb, int max_buffs);
+void                 ocfs_commit_trans(ocfs_journal_handle *handle);
+void                 ocfs_abort_trans(ocfs_journal_handle *handle);
+/*
+ * Create access is for when we get a newly created buffer and we're
+ * not gonna read it off disk, but rather fill it ourselves. If it's
+ * not part of an already commiting transaction, we'll mark it to be 
+ * passed to journal_forget in case of abort. Otherwise, it's treated 
+ * just like a OCFS_JOURNAL_ACCESS_WRITE buffer.
+ *
+ * Write access is for when we read a block off disk and are going to
+ * modify it. This way the journalling layer knows it may need to make
+ * a copy of that block (if it's part of another, uncommited
+ * transaction) before we do so.
+ */
+#define OCFS_JOURNAL_ACCESS_CREATE 0
+#define OCFS_JOURNAL_ACCESS_WRITE  1
+#define OCFS_JOURNAL_ACCESS_UNDO   2
+int                  ocfs_journal_access(ocfs_journal_handle *handle, 
+					 struct buffer_head *bh, int type);
+/*
+ * A word about the journal_access/journal_dirty "dance". It is
+ * entirely legal to journal_access a buffer more than once (as long
+ * as the access type is the same -- I'm not sure what will happen if
+ * access type is different but this should never happen anyway) It is
+ * also legal to journal_dirty a buffer more than once. In fact, you
+ * can even journal_access a buffer after you've done a
+ * journal_access/journal_dirty pair. The only thing you cannot do
+ * however, is journal_dirty a buffer which you haven't yet passed to
+ * journal_access at least once.
+ *
+ * That said, 99% of the time this doesn't matter and this is what the
+ * path looks like: 
+ * 
+ *	<read a bh>
+ *	ocfs_journal_access(handle, bh,	OCFS_JOURNAL_ACCESS_WRITE); 
+ *	<modify the bh>
+ * 	ocfs_journal_dirty(handle, bh);
+ */
+int                  ocfs_journal_dirty(ocfs_journal_handle *handle, 
+					struct buffer_head *bh);
+void                 ocfs_handle_add_lock(ocfs_journal_handle *handle, 
+					  __u32 type, __u32 flags, 
+					  struct buffer_head *bh, 
+					  struct inode *inode,
+					  int req_io_sem);
+/*
+ * Some transactions require us to leave inodes in a locked state
+ * until we either commit or abort because the buffer state can change
+ * in abort_trans. Use this function to lock those inodes and put them
+ * on the handle where they'll be cleaned up after the transaction
+ * completes.
+ */
+void                 ocfs_handle_add_inode(ocfs_journal_handle *handle, 
+					   struct inode *inode);
+
+/*
+ *  Credit Macros:
+ *  Convenience macros to calculate number of credits needed.
+ *
+ *  For convenience sake, I have a set of macros here which calculate
+ *  the *maximum* number of sectors which will be changed for various
+ *  metadata updates. I also have a completely arbitrary 'fuzz' value
+ *  which I'll add to some of these in case of a miscalculation. The
+ *  journal is large enough, and we don't journal as much metadata
+ *  that I'm not worried about the 'fuzz' taking up potential credits.
+ */
+#define OCFS_JOURNAL_FUZZ_CREDITS (5)
+#define OCFS_SINGLE_FILE_EXTEND_CREDITS (10)
+
+/* locknode + new fe + dirnode head + new dirnode for parent directory
+ * + extending (diralloc, filealloc, dirallocbitmap, fileallocbitmap)
+ * + a second dirnode for handling mkdir + 2 * 2 = 4 blocks for
+ * setting the bits in the dir alloc bitmap and the metadata alloc
+ * bitmap if dir alloc needs to be extended + some fuzz. */
+#define OCFS_MKNOD_CREDITS (1 + 1 + 	      \
+			    (OCFS_SINGLE_FILE_EXTEND_CREDITS * 4) +	      \
+			    OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* single file metadata updates * 3 because we might have to extend
+ * the file alloc and file alloc bitmap files + possible update to
+ * local bitmap. + 2 blocks for bits to set in the metadata alloc
+ * bitmap file */
+#define OCFS_FILE_EXTEND_CREDITS (OCFS_SINGLE_FILE_EXTEND_CREDITS * 3         \
+				  + 1 + 2 + OCFS_JOURNAL_FUZZ_CREDITS)
+
+
+/* Now that we journal bitmap writes, this might get a bit more
+ * complicated, use this function to determine how many credits are
+ * needed for an extend. Unfortunately, we're in bytes because the
+ * rest of the file system is. 
+ */
+static inline int ocfs_calc_extend_credits(struct super_block *sb,
+					   __u32 bytes_wanted)
+{
+	int bitmap_blocks, sysfile_bitmap_blocks;
+	unsigned int bits_wanted;
+	bits_wanted = ocfs_clusters_for_bytes(sb, bytes_wanted);
+	/* take advantage of the fact that we always allocate in one 
+	 * large chunk. */
+	bitmap_blocks = ocfs_blocks_for_bits(sb, bits_wanted) + 1;
+
+	/* need to account for any bitmap system file extension.
+	 * system files extend by up to 2mb, so the corresponding bitmap
+	 * file (which are all blocksize allocators today) will require
+	 * enough credits to zero the new bitmap data for that 2mb.
+	 * even on a 512byte sect size, this is only 512 bytes of bitmap
+	 * data, so it's really never more than two extra blocks.  add
+	 * some extra in case the request spans the new blocks + old ones. */
+	
+	/* NOTE: if we ever add an allocator that allocates something
+	 * other than blocksize chunks, or expect requests to be lots of 
+	 * bits at a time, this assumption doesn't hold */
+	sysfile_bitmap_blocks = 4;
+	
+	return (bitmap_blocks + sysfile_bitmap_blocks + OCFS_FILE_EXTEND_CREDITS);
+}
+
+/* fe, anything along new 'edge' of tree + fuzz*/
+#define OCFS_FILE_TRUNCATE_CREDITS (1 + 4 + OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* the file entry + the locknode + possibily the parent dirnode + fuzz */
+#define OCFS_FILE_DELETE_CREDITS  (1 + 1 + 1 + OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* need to create a new file and extend it to hold the info for the
+ * symlink we add one for a potential write of the main bitmap. Since
+ * we wind up with twice the fuzz because we reuse some macros so we
+ * subtract one.*/
+#define OCFS_SYMLINK_CREDITS (OCFS_MKNOD_CREDITS + OCFS_FILE_EXTEND_CREDITS   \
+			      + 1 - OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* fe change, locknode change, dirnode head, times two plus a possible
+ * delete, plus a possible dirnode addition in insert_file, and fuzz */
+#define OCFS_FILE_RENAME_CREDITS  (2 * (1 + 1 + 1) + OCFS_FILE_DELETE_CREDITS \
+				   + OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* On very rare open operations (it's a first open and we're not in
+ * the open map), we have to change the file entry. */
+#define OCFS_OPEN_CREDITS 1
+
+#endif /* _OCFSJOURNAL_H_ */

Added: trunk/src/ocfs_log.h
===================================================================
--- trunk/src/ocfs_log.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_log.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,324 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs_log.h
+ *
+ * Defines macros and flags used for logging.
+ *
+ * Copyright (C) 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version
+ * 2 of the License.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS_LOG_H
+#define OCFS_LOG_H
+
+extern __u32 debug_context;
+extern __u32 debug_level;
+extern __u32 debug_exclude;
+
+
+
+/* Tracing Levels */
+#define OCFS_DEBUG_LEVEL_ERROR         0x00000001
+#define OCFS_DEBUG_LEVEL_TRACE         0x00000002
+
+#define OCFS_DEBUG_LEVEL_ENTRY         0x00000010
+#define OCFS_DEBUG_LEVEL_EXIT          0x00000020
+
+#define OCFS_DEBUG_LEVEL_TIMING        0x00000100
+#define OCFS_DEBUG_LEVEL_STACK         0x00000200
+
+#define OCFS_DEBUG_LEVEL_PRINTK        0x00001000
+#define OCFS_DEBUG_LEVEL_MALLOC        0x00002000
+
+/* Tracing Contexts */
+#define OCFS_DEBUG_CONTEXT_ALLOC       0x00000001	/* alloc.c    */
+#define OCFS_DEBUG_CONTEXT_DIR         0x00000002	/* dir.c      */
+#define OCFS_DEBUG_CONTEXT_EXTMAP      0x00000004	/* extmap.c   */
+#define OCFS_DEBUG_CONTEXT_HEARTBEAT   0x00000008	/* hearbeat.c */
+#define OCFS_DEBUG_CONTEXT_IOCTL       0x00000010	/* ioctl.c    */
+#define OCFS_DEBUG_CONTEXT_NM          0x00000020	/* nm.c       */
+#define OCFS_DEBUG_CONTEXT_PROC        0x00000040	/* proc.c     */
+#define OCFS_DEBUG_CONTEXT_SYMLINK     0x00000080	/* symlink.c  */
+#define OCFS_DEBUG_CONTEXT_BITMAP      0x00000100	/* bitmap.c   */
+#define OCFS_DEBUG_CONTEXT_FILE        0x00000200	/* file.c     */
+#define OCFS_DEBUG_CONTEXT_INODE       0x00000400	/* inode.c    */
+#define OCFS_DEBUG_CONTEXT_JOURNAL     0x00000800	/* journal.c  */
+#define OCFS_DEBUG_CONTEXT_UNUSED1     0x00001000	/*            */
+#define OCFS_DEBUG_CONTEXT_UNUSED2     0x00002000	/*            */
+#define OCFS_DEBUG_CONTEXT_SYSFILE     0x00004000	/* sysfile.c  */
+#define OCFS_DEBUG_CONTEXT_VOLCFG      0x00008000	/* volcfg.c   */
+#define OCFS_DEBUG_CONTEXT_DCACHE      0x00010000	/* dcache.c   */
+#define OCFS_DEBUG_CONTEXT_DLM         0x00020000	/* dlm.c      */
+#define OCFS_DEBUG_CONTEXT_HASH        0x00040000	/* hash.c     */
+#define OCFS_DEBUG_CONTEXT_IO          0x00080000	/* io.c       */
+#define OCFS_DEBUG_CONTEXT_NAMEI       0x00100000	/* namei.c    */
+#define OCFS_DEBUG_CONTEXT_OSB         0x00200000	/* osb.c      */
+#define OCFS_DEBUG_CONTEXT_SUPER       0x00400000	/* super.c    */
+#define OCFS_DEBUG_CONTEXT_UTIL        0x00800000	/* util.c     */
+#define OCFS_DEBUG_CONTEXT_VOTE        0x01000000	/* vote.c     */
+#define OCFS_DEBUG_CONTEXT_LOCKRES     0x02000000	/* lockres.c  */
+
+
+#ifdef OCFS_DBG_TIMING
+typedef union _my_timing_t
+{
+	__u64 q;
+	__u32 lohi[2];
+} my_timing_t;
+
+#define IO_FUNC_TIMING_DECL	my_timing_t begin, end;	rdtsc (begin.lohi[0], begin.lohi[1]); 
+
+#define IO_FUNC_TIMING_PRINT(_fn,_ret)					      \
+	do {								      \
+		rdtsc (end.lohi[0], end.lohi[1]);			      \
+		IF_LEVEL_NO_CONTEXT(OCFS_DEBUG_LEVEL_TIMING)		      \
+			printk("(%d) EXIT : %s() = %d  => [%llu]\n",	      \
+				current->pid, _fn, _ret,		      \
+				end.q-begin.q);			      \
+	} while(0)		      
+#else
+#define IO_FUNC_TIMING_DECL
+#define IO_FUNC_TIMING_PRINT(_fn,_ret)
+#endif
+
+
+#ifndef OCFS_DBG_TIMING
+#define DECL_U8_ARRAY(__t, __s)
+#define INIT_U8_ARRAY(__s)
+#define PRINT_STRING(__t)		printk("\n");
+#define PRINT_ENTRY(__t)		printk("(%d) ENTRY: %s", current->pid, __FUNCTION__)
+#else
+#define DECL_U8_ARRAY(__t, __s)		__u8 (__t)[(__s)]
+#define INIT_U8_ARRAY(__s)		*(__s) = '\0'
+#define PRINT_STRING(__t)		printk("%s\n", (__t))
+#define PRINT_ENTRY(__t)		printk("(%d) %sENTRY: %s", current->pid, (__t), __FUNCTION__)
+#endif
+
+
+#ifndef OCFS_DBG_TIMING
+# define GET_STACK(s)
+#else
+# define GET_STACK(s)							\
+	IF_LEVEL(OCFS_DEBUG_LEVEL_STACK) {				\
+		__s32 esp;						\
+		__asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : 	\
+				     "0" (8191));			\
+		esp -= sizeof(struct task_struct);			\
+		sprintf((s), "[%ld] ", esp);				\
+	}
+#endif
+
+/* privately used macros */
+# define IF_LEVEL(level)	\
+	if ((debug_context & OCFS_DEBUG_CONTEXT) && (debug_level & level) && \
+	    current->pid!=debug_exclude)
+# define IF_LEVEL_NO_CONTEXT(level)	\
+	if ((debug_level & level) &&  current->pid!=debug_exclude)
+	
+#ifndef OCFS_DBG_TIMING
+# define ENTRY_TIMING_DECLS
+# define GET_TIMING(s, hi, lo)
+#else
+# define ENTRY_TIMING_DECLS    __u32 _HI = 0, _LO = 0
+# define GET_TIMING(s, hi, lo)					\
+	do {							\
+		IF_LEVEL(OCFS_DEBUG_LEVEL_TIMING) {		\
+			__u32 _lo, _hi;				\
+			rdtsc (_lo, _hi);			\
+			if ((s) == NULL) {			\
+				(hi) = _hi; (lo) = _lo;		\
+			} else {				\
+				__u64 _b, _e;			\
+				_b = hi; _b <<= 32; _b |= lo;	\
+				_e = _hi; _e <<= 32; _e |= _lo;	\
+				_e -= _b; 			\
+				sprintf((s), " => [%llu]", _e);	\
+			}					\
+		}						\
+	} while (0)
+#endif
+
+/* IF macro */
+#define IF_TRACE(func)						\
+	do {							\
+		if ((debug_context & OCFS_DEBUG_CONTEXT) &&	\
+		    (debug_level & OCFS_DEBUG_LEVEL_TRACE))	\
+			func;					\
+	} while (0)
+
+#define IF_LEVEL_PID()						\
+	if ((debug_level & OCFS_DEBUG_LEVEL_PRINTK) && 		\
+	    (current->pid!=debug_exclude))
+
+static inline void eat_value_int(int val)
+{
+	return;
+}
+
+static inline void eat_value_uint(unsigned int val)
+{
+	return;
+}
+
+static inline void eat_value_long(long val)
+{
+	return;
+}
+
+static inline void eat_value_ulong(unsigned long val)
+{
+	return;
+}
+
+static inline void eat_value_ptr(void *val)
+{
+	return;
+}
+
+/* TRACE disabled. ERROR macros are never disabled. */
+#if !defined(TRACE)
+# define  LOG_ENTRY()
+# define  LOG_EXIT()
+# define  LOG_EXIT_STATUS(val)                  eat_value_int(val)
+# define  LOG_EXIT_INT(val)                    eat_value_int(val)
+# define  LOG_EXIT_UINT(val)                   eat_value_uint(val)
+# define  LOG_EXIT_LONG(val)                    eat_value_long(val)
+# define  LOG_EXIT_ULONG(val)                   eat_value_ulong(val)
+# define  LOG_EXIT_PTR(val)                     eat_value_ptr(val)
+# define  LOG_TRACE_STR(str)
+# define  LOG_TRACE_STATUS(val)                 eat_value_int(val)
+# define  LOG_ENTRY_ARGS(fmt, arg...)
+# define  LOG_EXIT_ARGS(fmt, arg...)
+# define  LOG_TRACE_ARGS(fmt, arg...)
+# define  LOG_PID_PRINTK(fmt, arg...)
+# define  LOG_PID_STR(str)
+#endif                          /* !defined(TRACE) */
+
+	
+
+/* TRACE enabled */
+#if defined(TRACE)
+
+#define LOG_PID_PRINTK(fmt, arg...) 					\
+	do {								\
+		IF_LEVEL_PID() {					\
+			printk("(%d) %s(): ", current->pid, 		\
+			       __FUNCTION__); 				\
+			printk(fmt, ## arg);				\
+		}							\
+	} while (0)
+
+#define LOG_PID_STR(str) LOG_PID_PRINTK("%s\n", str)
+
+/* ENTRY macros */
+/* LOG_ENTRY_ARGS()
+ *
+ * Note: The macro expects the args to be terminated by a newline.
+ */
+#define LOG_ENTRY_ARGS(fmt, arg...)					\
+	ENTRY_TIMING_DECLS;						\
+	do {								\
+		DECL_U8_ARRAY(_t, 16);                                  \
+		INIT_U8_ARRAY(_t);                                      \
+		GET_STACK(_t);						\
+		GET_TIMING(NULL, _HI, _LO);				\
+		IF_LEVEL(OCFS_DEBUG_LEVEL_ENTRY) {			\
+			PRINT_ENTRY(_t);				\
+			printk(fmt, ##arg);			\
+		}							\
+	} while (0)
+
+#define LOG_ENTRY()            LOG_ENTRY_ARGS("() \n")
+
+
+
+/* EXIT macros */
+/* LOG_EXIT_ARGS()
+ *
+ */
+#define LOG_EXIT_ARGS(fmt, arg...)					\
+	do {								\
+		IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) {			\
+			DECL_U8_ARRAY(_t, 50);				\
+			INIT_U8_ARRAY(_t);				\
+			GET_TIMING(_t, _HI, _LO);			\
+			printk("(%d) EXIT : %s() %s", 			\
+			       current->pid, __FUNCTION__, 		\
+			       (*fmt == '\n' ? "" : "= "));		\
+			if (*fmt != '\n')					\
+				printk(fmt, ## arg);			\
+			PRINT_STRING(_t);				\
+		}							\
+	}  while (0)
+
+#define LOG_EXIT()             LOG_EXIT_ARGS("\n") /* I hate you Sunil */
+#define LOG_EXIT_STATUS(val)   LOG_EXIT_ARGS("%d ", val)
+#define LOG_EXIT_INT(val)      LOG_EXIT_ARGS("%d ", val)
+#define LOG_EXIT_UINT(val)     LOG_EXIT_ARGS("%u ", val)
+#define LOG_EXIT_LONG(val)     LOG_EXIT_ARGS("%ld ", val)
+#define LOG_EXIT_ULONG(val)    LOG_EXIT_ARGS("%lu ", val)
+#define LOG_EXIT_PTR(val)      LOG_EXIT_ARGS("0x%p ", val)
+
+
+/* TRACE macros */
+/* LOG_TRACE_ARGS()
+ *
+ * Note: The macro expects the args to be terminated by a newline.
+ */
+#define LOG_TRACE_ARGS(fmt, arg...)					\
+	do {								\
+		IF_LEVEL(OCFS_DEBUG_LEVEL_TRACE) {			\
+			printk("(%d) TRACE: %s() ", current->pid,	\
+			       __FUNCTION__);				\
+			printk(fmt, ## arg);				\
+		}							\
+	} while (0)
+
+#define LOG_TRACE_STR(str)     LOG_TRACE_ARGS("%s\n", str)
+#define LOG_TRACE_STATUS(val)  LOG_TRACE_ARGS("%d\n", val);
+
+#endif				/* TRACE */
+
+
+
+/* ERROR macros are not compiled out */
+/* LOG_ERROR_ARGS()
+ *
+ * Note: The macro expects the args to be terminated by a newline.
+ */
+#define LOG_ERROR_ARGS(fmt, arg...)					\
+	do {								\
+		printk(KERN_ERR "(%d) ERROR at %s, %d: ", current->pid, __FILE__, __LINE__);        \
+		printk(fmt, ## arg);					\
+	} while (0)
+
+#define LOG_ERROR_STR(str)     LOG_ERROR_ARGS("%s\n", str)
+#define LOG_ERROR_STATUS(st)   LOG_ERROR_ARGS("status = %d\n", st)
+
+#define ocfs_trace(p) \
+	do { \
+       		printk("Trace in %s line %d called from [<%lx>]\n", \
+		       __FILE__, __LINE__, \
+		       __builtin_return_address(0) ); \
+       		ocfs_show_trace(NULL); \
+	} while (0)
+
+#endif  /* OCFS_LOG_H */

Modified: trunk/src/proc.c
===================================================================
--- trunk/src/proc.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/proc.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * proc.c
  *
  * proc interface
@@ -26,10 +28,20 @@
 
 #define OCFSPROC_PRIVATE_DECLS
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/socket.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "proc.h"
+#include "vote.h"
+
 extern __u32 comm_voting;
 
 
@@ -47,9 +59,6 @@
 static int ocfs_proc_device (char *page, char **start, off_t off, int count, int *eof, void *data);
 static int ocfs_proc_nodes (char *page, char **start, off_t off, int count, int *eof, void *data);
 static int ocfs_proc_net_vote_obj (char *page, char **start, off_t off, int count, int *eof, void *data);
-#ifdef OCFS_LINUX_MEM_DEBUG
-static int ocfs_proc_memallocs (char *page, char **start, off_t off, int count, int *eof, void *data);
-#endif
 static int ocfs_proc_alloc_stat(char *page, char **start, off_t off,
 				int count, int *eof, void *data);
 
@@ -71,9 +80,6 @@
 		{ OCFS2_PROC_BASENAME "/nodename", NULL, ocfs_proc_nodename },
 		{ OCFS2_PROC_BASENAME "/globalctxt", NULL, ocfs_proc_globalctxt },
 		{ OCFS2_PROC_BASENAME "/lockstat", NULL, ocfs_proc_dlm_stats },
-#ifdef OCFS_LINUX_MEM_DEBUG
-		{ OCFS2_PROC_BASENAME "/memallocs", NULL, ocfs_proc_memallocs },
-#endif
 		{ NULL, }
 	};
 
@@ -101,9 +107,6 @@
 	remove_proc_entry (OCFS2_PROC_BASENAME "/nodename", NULL);
 	remove_proc_entry (OCFS2_PROC_BASENAME "/globalctxt", NULL);
 	remove_proc_entry (OCFS2_PROC_BASENAME "/lockstat", NULL);
-#ifdef OCFS_LINUX_MEM_DEBUG
-	remove_proc_entry (OCFS2_PROC_BASENAME "/memallocs", NULL);
-#endif
 	remove_proc_entry (OCFS2_PROC_BASENAME, NULL);
 
 	LOG_EXIT ();
@@ -136,90 +139,7 @@
 	return len;
 }				/* ocfs_proc_calc_metrics */
 
-#ifdef OCFS_LINUX_MEM_DEBUG
-/*
- * ocfs_proc_memallocs()
- *
- */
-static int ocfs_proc_memallocs (char *page, char **start, off_t off,
-				int count, int *eof, void *data)
-{
-	int ret = 0;
-	int proc_overflow = 0;
-	struct list_head *iter;
-	struct list_head *temp_iter;
-	alloc_item *item;
-	int len = 0;
-	char *slabname;
-	char *tmpstr = NULL;
 
-	LOG_ENTRY ();
-
-#define	MEMDBG_LEN		255
-	tmpstr = ocfs_malloc(MEMDBG_LEN);
-	if (!tmpstr) {
-		LOG_ERROR_STATUS (-ENOMEM);
-		goto bail;
-	}
-
-	sprintf (tmpstr, "%-8s  %-9s  %s\n", "Pointer", "Size/Slab", "Line:File");
-	printk("%s", tmpstr);
-	ret = sprintf ((char *) (page + len), "%s", tmpstr);
-	len += ret;
-
-	list_for_each_safe (iter, temp_iter, &OcfsGlobalCtxt.item_list) {
-		if (len >= 4096)
-			proc_overflow = 1;
-		item = list_entry (iter, alloc_item, list);
-		switch (item->type) {
-		    case SLAB_ITEM:
-			if (item->u.slab == OcfsGlobalCtxt.fe_cache)
-				slabname = "fe";
-			else
-				slabname = "unknown";
-
-			sprintf(tmpstr, "%08x  %9s  %-40s\n", item->address,
-				       	slabname, item->tag);
-
-			printk("%s", tmpstr);
-			if (!proc_overflow) {
-				ret = snprintf ((char *) (page + len),
-					       	(4096 - len), "%s", tmpstr);
-				len += ret;
-			}
-			break;
-		    case KMALLOC_ITEM: 
-		    case VMALLOC_ITEM:
-		    default:
-			sprintf(tmpstr, "%08x  %9d  %s\n", item->address,
-			       	item->u.length, item->tag);
-			printk("%s", tmpstr);
-			if (!proc_overflow) {
-				ret = snprintf ((char *) (page + len),
-					       	(4096 - len), "%s", tmpstr);
-				len += ret;
-			}
-
-			break;
-		}
-		if (ret < 0)
-			proc_overflow = 1;
-	}
-
-
-	if (proc_overflow)
-		LOG_ERROR_STR ("proc output truncated");
-
-	ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
-
-bail:
-	if (tmpstr)
-		kfree(tmpstr);
-	LOG_EXIT_LONG (ret);
-	return ret;
-}				/* ocfs_proc_memallocs */
-#endif
-
 /*
  * ocfs_proc_globalctxt()
  *
@@ -232,17 +152,17 @@
 
 	LOG_ENTRY ();
 
-	len += sprintf (page + len, "comm voting  : %d\n", comm_voting);
-	len += sprintf (page + len, "ip addr/port : %s/%d\n",
-		       	OcfsGlobalCtxt.comm_info.ip_addr,
-		       	OcfsGlobalCtxt.comm_info.ip_port);
-	len += sprintf (page + len, "guid         : ");
-	strncat (page + len, OcfsGlobalCtxt.guid.guid, GUID_LEN);
-	len += GUID_LEN;
-	strncat (page + len, "\n", 1);
+	len += sprintf(page + len, "comm voting  : %d\n", comm_voting);
+	len += sprintf(page + len, "ip addr/port : 0x%08u/%u\n",
+		       ntohl(OcfsGlobalCtxt.comm_info.addr_u.ip_addr4),
+		       ntohs(OcfsGlobalCtxt.comm_info.ip_port));
+	len += sprintf(page + len, "guid         : ");
+	strncat(page + len, OcfsGlobalCtxt.guid.guid, OCFS2_GUID_LEN);
+	len += OCFS2_GUID_LEN;
+	strncat(page + len, "\n", 1);
 	len++;
 
-	ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
+	ret = ocfs_proc_calc_metrics(page, start, off, count, eof, len);
 
 	LOG_EXIT_INT (ret);
 	return ret;
@@ -471,7 +391,8 @@
 	}
 
 	ProcList[0].data = (char *) osb;
-	ProcList[1].data = osb->vol_layout.mount_point;
+#warning fix proc mountpoint
+	ProcList[1].data = "unknown";
 	ProcList[2].data = (char *) osb;
 	ProcList[3].data = (char *) osb;
 	ProcList[4].data = (char *) osb;
@@ -561,14 +482,12 @@
 	int len;
 	char *pubmap = NULL;
 	ocfs_super *osb;
-	ocfs_vol_layout *vol;
 	int ret = 0, i;
 	char *ptr;
 
 	LOG_ENTRY ();
 
 	osb = data;
-	vol = &(osb->vol_layout);
 
 	pubmap = ocfs_malloc (100);
 	if (!pubmap) {
@@ -577,8 +496,8 @@
 	}
 
 	ptr = pubmap;
-	for (i = 0; i < 32; i++) {
-		if (osb->publ_map & (1 << i))
+	for (i = 0; i < osb->max_nodes; i++) {
+		if (ocfs_node_map_test_bit(&osb->publ_map, i))
 			ptr += sprintf (ptr, "%d ", i);
 	}
 	if (pubmap != ptr)
@@ -588,17 +507,15 @@
   "File open count          : %lld\n"		\
   "Publish map              : %s\n"		\
   "Number of nodes          : %u\n"		\
-  "Cluster size             : %u\n"		\
+  "Cluster size             : %d\n"		\
   "Volume size              : %llu\n"		\
-  "Dir node size            : %llu\n"		\
-  "File node size           : %llu\n"		\
   "Failed Large Allocs      : %u\n"		\
   "Retry Large Allocs       : %u\n"		\
   "Inode Hash Objects       : %u\n"
 
 	len = sprintf (page, PROC_STATS, osb->file_open_cnt, pubmap,
-		       vol->num_nodes, vol->cluster_size, vol->size,
-		       vol->dir_node_size, vol->file_node_size,
+		       osb->num_nodes, osb->s_clustersize, 
+		       (u64)osb->cluster_bitmap.validbits << osb->s_clustersize_bits,
 		       osb->cluster_bitmap.failed, 
 		       osb->cluster_bitmap.ok_retries, 
 		       osb->inode_hash.num_ents);
@@ -646,7 +563,6 @@
 	int ret;
 	ocfs_super *osb;
 	BARF_BARF_BARF *node;
-	__u32 publ_map;
 	char mount;
 
 	LOG_ENTRY ();
@@ -655,19 +571,19 @@
 
 	if (osb) {
 		down (&(osb->cfg_lock));
-		publ_map = (__u32)osb->publ_map;
-		for (i = 0; i < OCFS_MAXIMUM_NODES; ++i, publ_map >>= 1) {
+		for (i = 0; i < osb->max_nodes; i++) {
 			node = osb->node_cfg_info[i];
 			if (!node)
 				continue;
-			mount = (publ_map & 0x1) ? 'M' : ' ';
+			mount = ocfs_node_map_test_bit(&osb->publ_map, i) ? 'M' : ' ';
 			len += sprintf (page + len,
-				       	"%2d %c %-32s %-15s %-6d ",
+				       	"%2d %c %-32s 0x%08u %-6u ",
 				       	i, mount, node->node_name,
-				       	node->ipc_config.ip_addr,
-				       	node->ipc_config.ip_port);
-			strncat (page + len, node->guid.guid, GUID_LEN);
-			len += GUID_LEN;
+		 			ntohl(node->ipc_config.addr_u.ip_addr4),
+				       	ntohs(node->ipc_config.ip_port));
+			strncat(page + len, node->guid.guid,
+		       		OCFS2_GUID_LEN);
+			len += OCFS2_GUID_LEN;
 			len += sprintf (page + len, "\n");
 		}
 		up (&(osb->cfg_lock));

Added: trunk/src/proc.h
===================================================================
--- trunk/src/proc.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/proc.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,37 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * proc.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_PROC_H
+#define OCFS2_PROC_H
+
+void ocfs_proc_add_volume(ocfs_super *osb);
+void ocfs_proc_deinit(void);
+int ocfs_proc_init(void);
+void ocfs_proc_remove_volume(ocfs_super *osb);
+
+#endif /* OCFS2_PROC_H */

Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/super.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * super.c
  *
  * load/unload driver, mount/dismount volumes
@@ -24,13 +26,47 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 #include <linux/utsname.h>
-#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <linux/statfs.h>
+#include <linux/moduleparam.h>
+#endif
+#include <linux/blkdev.h>
+#include <linux/socket.h>
+#include <linux/inet.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+/* this should be the only file to include a version 1 header */
+#include "ocfs1_fs_compat.h"
+
+#include "alloc.h"
+#include "bitmap.h"
+#include "heartbeat.h"
+#include "inode.h"
+#include "journal.h"
+#include "nm.h"
+#include "proc.h"
+#include "super.h"
+#include "sysfile.h"
+#include "util.h"
+#include "ver.h"
+#include "volcfg.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 #define OCFS_DEBUG_CONTEXT  OCFS_DEBUG_CONTEXT_SUPER
 
 /*
@@ -145,17 +181,18 @@
 static int ocfs_statfs (struct super_block *sb, struct statfs *buf);
 #endif
 
-static int ocfs_init_system_inodes(ocfs_super *osb);
+static int ocfs_init_global_system_inodes(ocfs_super *osb);
+static int ocfs_init_local_system_inodes(ocfs_super *osb);
 static int ocfs_release_system_inodes(ocfs_super *osb);
 static int ocfs_publish_set_unmounted(ocfs_super *osb, int node_num);
 static int ocfs_publish_set_mounted(ocfs_super *osb, int node_num);
 static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num,
 				       int value);
 static int ocfs_check_volume(ocfs_super * osb);
-static int ocfs_verify_volume(ocfs_vol_disk_hdr *vdh);
-static int ocfs_initialize_osb(ocfs_super *osb, ocfs_vol_disk_hdr *vdh,
-			       ocfs_vol_label *vol_label,
-       			       __u32 sect_size);
+static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
+			      __u32 sectsize);
+static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh);
+static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size);
 
 static struct super_operations ocfs_sops = {
 	.statfs = ocfs_statfs,
@@ -194,78 +231,71 @@
 }
 
 /*
- * ocfs_init_system_inodes()
+ * ocfs_init_global_system_inodes()
  */
-static int ocfs_init_system_inodes(ocfs_super *osb)
+static int ocfs_init_global_system_inodes(ocfs_super *osb)
 {
 	struct inode *new = NULL;
-	__u64 sys_off = 0;
 	int status = 0;
+	int i;
 
 	LOG_ENTRY();
 
-	/* the vol bitmap */
-	sys_off = OCFS_BITMAP_LOCK_OFFSET;
-	new = ocfs_iget(osb, sys_off);
+	new = ocfs_iget(osb, osb->root_blkno << osb->sb->s_blocksize_bits);
 	if (!new) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
+		LOG_ERROR_STATUS(status = -EINVAL);
 		goto bail;
 	}
-	osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE] = new;
+	osb->root_inode = new;
 
-	/* file alloc bitmap */
-	sys_off = osb->vol_layout.root_int_off + 
-		((OCFS_FILE_FILE_ALLOC_BITMAP + osb->node_num) 
-		 * osb->sect_size);
-	new = ocfs_iget(osb, sys_off);
+	new = ocfs_iget(osb, osb->system_dir_blkno << osb->sb->s_blocksize_bits);
 	if (!new) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
+		LOG_ERROR_STATUS(status = -EINVAL);
 		goto bail;
 	}
-	osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE] = new;
+	osb->sys_root_inode = new;
 
-	/* inode alloc bitmap */
-	sys_off = osb->vol_layout.root_int_off + 
-		((OCFS_INODE_BITMAP + osb->node_num) 
-		 * osb->sect_size);
-	new = ocfs_iget(osb, sys_off);
-	if (!new) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
+	for (i = 0; i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
+		new = ocfs_get_system_file_inode(osb, i, osb->node_num);
+		if (!new) {
+			ocfs_release_system_inodes(osb);
+			LOG_ERROR_STATUS(status = -EINVAL);
+			goto bail;
+		}
+		osb->system_inodes[i] = new;
 	}
-	osb->system_inodes[INODE_ALLOC_BITMAP_SYSTEM_INODE] = new;
 
-	/* journal file */
-	sys_off = osb->vol_layout.root_int_off + 
-		((OCFS_JOURNAL_FILE + osb->node_num) * osb->sect_size);
-	new = ocfs_iget(osb, sys_off);
-	if (!new) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
-	}
-	osb->system_inodes[JOURNAL_SYSTEM_INODE] = new;
+bail:
+	LOG_EXIT_STATUS(status);
+	return(status);
+} /* ocfs_init_global_system_inodes */
 
-	/* orphan inode dir */
-	sys_off = osb->vol_layout.root_int_off + 
-		(OCFS_ORPHAN_DIR * osb->sect_size);
-	new = ocfs_iget(osb, sys_off);
-	if (!new) {
-		status = -EINVAL;
-		LOG_ERROR_STATUS(status);
-		goto bail;
+/*
+ * ocfs_init_local_system_inodes()
+ */
+static int ocfs_init_local_system_inodes(ocfs_super *osb)
+{
+	struct inode *new = NULL;
+	int status = 0;
+	int i;
+
+	LOG_ENTRY();
+
+	for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES ; i++) {
+		new = ocfs_get_system_file_inode(osb, i, osb->node_num);
+		if (!new) {
+			ocfs_release_system_inodes(osb);
+			LOG_ERROR_STATUS(status = -EINVAL);
+			goto bail;
+		}
+		osb->system_inodes[i] = new;
 	}
-	osb->system_inodes[ORPHAN_DIR_SYSTEM_INODE] = new;
 
 bail:
 	LOG_EXIT_STATUS(status);
 	return(status);
-} /* ocfs_init_system_inodes */
+} /* ocfs_init_local_system_inodes */
 
-
 /*
  * ocfs_release_system_inodes()
  */
@@ -275,9 +305,18 @@
 
 	LOG_ENTRY();
 
-	for (i=0; i<NUM_SYSTEM_INODES; i++) {
-		iput(osb->system_inodes[i]);
+	for (i = 0; i < NUM_SYSTEM_INODES; i++) {
+		if (osb->system_inodes[i]) {
+			iput(osb->system_inodes[i]);
+			osb->system_inodes[i] = NULL;
+		}
 	}
+
+	if (osb->sys_root_inode) {
+		iput(osb->sys_root_inode);
+		osb->sys_root_inode = NULL;
+	}
+
 	LOG_EXIT_STATUS(status);
 	return(status);
 } /* ocfs_release_system_inodes */
@@ -469,15 +508,16 @@
  */
 static int __init ocfs_driver_entry (void)
 {
-	int status = 0;
+	int status;
 
 	LOG_ENTRY ();
 
 	ocfs_version_print();
 	
 	ocfs_hostname = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
-	if (ocfs_hostname==NULL)
+	if (ocfs_hostname == NULL)
 		return -EINVAL;
+
 	strcpy(ocfs_hostname, system_utsname.nodename);
 	printk("ocfs2: hostname is %s\n", ocfs_hostname);
 	
@@ -490,9 +530,6 @@
 	memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt));
 	memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt));
 
-#ifdef OCFS_LINUX_MEM_DEBUG
-	INIT_LIST_HEAD (&(OcfsGlobalCtxt.item_list));
-#endif
 	INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
 	INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
 
@@ -515,8 +552,6 @@
 		goto leave;
 	}
 	
-	OcfsGlobalCtxt.hbm = DISK_HBEAT_COMM_ON;
-
 	spin_lock_init (&osb_id_lock);
 	spin_lock (&osb_id_lock);
 	osb_id = 0;
@@ -575,24 +610,25 @@
 
 	/* Read remaining insmod params */
 	if (node_number != OCFS_INVALID_NODE_NUM) {
-		if (node_number >= 0 && node_number < OCFS_MAXIMUM_NODES) {
-			OcfsGlobalCtxt.pref_node_num = node_number;
-			LOG_TRACE_ARGS("Preferred node number: %d\n",
-				       node_number);
-		}
-		else {
-			status = -EINVAL;
-			LOG_ERROR_STR("'node_number' must be between 0 and 31");
-		}
+		// this will be validated later
+		OcfsGlobalCtxt.pref_node_num = node_number;
+		LOG_TRACE_ARGS("Preferred node number: %d\n", node_number);
 	}
 
 	if (ip_port_v2 == 0)
-		OcfsGlobalCtxt.comm_info.ip_port = OCFS_IPC_DEFAULT_PORT;
+		OcfsGlobalCtxt.comm_info.ip_port =
+			htons(OCFS_IPC_DEFAULT_PORT);
+	else if (ip_port_v2 & 0xFFFF0000) {
+		status = -EINVAL;
+		LOG_ERROR_STR("'ip_port_v2' is too large'");
+	}
 	else
-		OcfsGlobalCtxt.comm_info.ip_port = ip_port_v2;
-	LOG_TRACE_ARGS("IP port: %d\n", OcfsGlobalCtxt.comm_info.ip_port);
+		OcfsGlobalCtxt.comm_info.ip_port =
+			htons((u16)ip_port_v2);
+	LOG_TRACE_ARGS("IP port: %d\n",
+		       ntohs(OcfsGlobalCtxt.comm_info.ip_port));
 
-	if (node_name && strlen (node_name) < MAX_NODE_NAME_LENGTH) {
+	if (node_name && strlen(node_name) < MAX_NODE_NAME_LENGTH) {
 		OcfsGlobalCtxt.node_name = node_name;
 		LOG_TRACE_ARGS ("Node name: %s\n", OcfsGlobalCtxt.node_name);
 	} else {
@@ -600,16 +636,18 @@
 		LOG_ERROR_STR ("'node_name' not set or too long");
 	}
 
-	if (ip_address && strlen (ip_address) < MAX_IP_ADDR_LEN) {
-		OcfsGlobalCtxt.comm_info.ip_addr = ip_address;
+#define MAX_IPv4_ADDR_STR_LEN	15 /* 3x '255' + 3x '.' */
+	if (ip_address && strlen (ip_address) < MAX_IPv4_ADDR_STR_LEN) {
+		OcfsGlobalCtxt.comm_info.addr_u.ip_addr4 =
+			in_aton(ip_address);
 		LOG_TRACE_ARGS ("IP address: %s\n", ip_address);
 	} else {
 		status = -EINVAL;
 		LOG_ERROR_STR ("'ip_address' not set or too long");
 	}
 
-	if (guid && strlen (guid) == GUID_LEN) {
-		memcpy (&OcfsGlobalCtxt.guid.guid, guid, GUID_LEN);
+	if (guid && strlen (guid) == OCFS2_GUID_LEN) {
+		memcpy(&OcfsGlobalCtxt.guid.guid, guid, OCFS2_GUID_LEN);
 		LOG_TRACE_ARGS ("Node guid: %s\n", guid);
 	} else {
 		status = -EINVAL;
@@ -617,7 +655,7 @@
 	}
 
 	if (status == 0) {
-		for (i = 0; i < GUID_LEN; ++i)
+		for (i = 0; i < OCFS2_GUID_LEN; ++i)
 			check_sum += (__u32) guid[i];
 		if (cs != check_sum) {
 			status = -EINVAL;
@@ -626,56 +664,13 @@
 	}
 
 	/* hardcoding... not used yet */
-	OcfsGlobalCtxt.comm_info.type = OCFS_UDP;
-	OcfsGlobalCtxt.comm_info.ip_mask = NULL;
+	OcfsGlobalCtxt.comm_info.ip_version = htons(4);
 
 	return status;
 }				/* ocfs_read_params */
 
 
-#ifdef OCFS_LINUX_MEM_DEBUG
 /*
- * ocfs_memcheck()
- *
- */
-static void ocfs_memcheck (void)
-{
-	struct list_head *iter;
-	struct list_head *temp_iter;
-	alloc_item *item;
-	char *memtype;
-	char sizeinfo[20];
-
-	list_for_each_safe (iter, temp_iter, &OcfsGlobalCtxt.item_list) {
-		item = list_entry (iter, alloc_item, list);
-		switch (item->type)
-		{
-		    case SLAB_ITEM:
-			/* TODO: use the actual slab name */
-			memtype = "SLAB";
-			snprintf(sizeinfo, 20, "slab=%p", item->u.slab);
-			break;
-		    case KMALLOC_ITEM:
-			memtype = "KMALLOC";
-			snprintf(sizeinfo, 20, "size=%d", item->u.length);
-			break;
-		    case VMALLOC_ITEM:
-			memtype = "VMALLOC";
-			snprintf(sizeinfo, 20, "size=%d", item->u.length);
-			break;
-		    default:
-			memtype = "UNKNOWN";
-			snprintf(sizeinfo, 20, "size=%d", item->u.length);
-			break;
-		}
-		LOG_ERROR_ARGS ("unfreed %s mem %x: %s tag='%s'", memtype, 
-				item->address, sizeinfo, item->tag);
-	}
-}  /* ocfs_memcheck */
-
-#endif				/* OCFS_LINUX_MEM_DEBUG */
-
-/*
  * ocfs_driver_exit()
  *
  * Called on rmmod
@@ -702,10 +697,6 @@
 
 	unregister_filesystem (&ocfs_fs_type);
 
-#ifdef OCFS_LINUX_MEM_DEBUG
-	ocfs_memcheck ();
-#endif
-
 	printk("Unloaded OCFS Driver module\n");
 	LOG_EXIT ();
 	return;
@@ -737,57 +728,68 @@
  *
  */
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static int ocfs_statfs (struct super_block *sb, struct kstatfs *buf)
+static int ocfs_statfs(struct super_block *sb, struct kstatfs *buf)
 #else
-static int ocfs_statfs (struct super_block *sb, struct statfs *buf)
+static int ocfs_statfs(struct super_block *sb, struct statfs *buf)
 #endif
 {
-	ocfs_super *osb = NULL;
-	__u32 numbits, freebits = 0;
+	ocfs_super *osb;
+	__u32 numbits, freebits;
 	unsigned int reserved_bits;
-	int status = 0;
-	ocfs_file_entry *bm_lock = NULL;
+	int status;
+	ocfs2_dinode *bm_lock;
 	struct buffer_head *bh = NULL;
+	struct inode *inode = NULL;
 
 	LOG_ENTRY_ARGS ("(%p, %p)\n", sb, buf);
 
 	osb = OCFS_SB(sb);
-	numbits = osb->cluster_bitmap.validbits;
- 
-	status = ocfs_read_bh (osb, OCFS_BITMAP_LOCK_OFFSET, &bh, 0, NULL);
+
+	inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+	if (!inode) {
+		LOG_ERROR_STR("failed to get bitmap inode");
+		return -EIO;
+	}
+
+	/* must this be a non cached read? */
+	status = ocfs_read_bh (osb, GET_INODE_FEOFF(inode), &bh, 0, inode);
 	if (status < 0) {
 		LOG_ERROR_STR("failed to read bitmap data");
 		return -EIO;
 	}
+
 	bm_lock = OCFS_BH_GET_DATA_READ(bh);
-	if (numbits >= bm_lock->u.bitinfo.used_bits)
-	    freebits = numbits - bm_lock->u.bitinfo.used_bits;
 
+	freebits = numbits = osb->cluster_bitmap.validbits;
+
+	if (numbits >= bm_lock->id1.bitmap1.i_used)
+	    freebits -= bm_lock->id1.bitmap1.i_used;
+
 	/* take out the space reserved for system files */
 	reserved_bits = ocfs_clusters_for_bytes(sb, 8 * ONE_MEGA_BYTE);
 	freebits -= reserved_bits;
 
 	buf->f_type = OCFS_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_namelen = OCFS_MAX_FILENAME_LENGTH;
+	buf->f_namelen = OCFS2_MAX_FILENAME_LENGTH;
 	buf->f_bavail = buf->f_bfree;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 	buf->f_blocks =
 		(unsigned long) ((unsigned long) (numbits) *
-				 (unsigned long) (osb->vol_layout.cluster_size >> osb->sect_size_bits) -
+				 (unsigned long) (osb->s_clustersize >> osb->sb->s_blocksize_bits) -
 				 reserved_bits);
 	buf->f_bfree =
-	    (unsigned long) (freebits * (osb->vol_layout.cluster_size >> osb->sect_size_bits));
+	    (unsigned long) (freebits * (osb->s_clustersize >> osb->sb->s_blocksize_bits));
 	buf->f_bavail = buf->f_bfree;
 	buf->f_files = (unsigned long) (numbits);
 	buf->f_ffree = (unsigned long) (numbits) - freebits;
 #else
 	buf->f_blocks =
 		(sector_t) ((unsigned long) (numbits) *
-			    (unsigned long) (osb->vol_layout.cluster_size >> osb->sect_size_bits) -
+			    (unsigned long) (osb->s_clustersize >> osb->sb->s_blocksize_bits) -
 			    reserved_bits);
 	buf->f_bfree =
-	    (sector_t) (freebits * (osb->vol_layout.cluster_size >> osb->sect_size_bits));
+	    (sector_t) (freebits * (osb->s_clustersize >> osb->sb->s_blocksize_bits));
 	buf->f_files = (sector_t) (numbits);
 	buf->f_ffree = (sector_t) (numbits) - freebits;
 #endif
@@ -810,9 +812,6 @@
 		 sizeof(ocfs_inode_private), 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
 		 NULL, NULL);
 
-	OcfsGlobalCtxt.fe_cache = kmem_cache_create ("ocfs2_fileentry",
-		512, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN, NULL, NULL);
-
 	OcfsGlobalCtxt.extent_cache = kmem_cache_create ("ocfs2_extent",
 		sizeof(ocfs_extent) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
 		NULL, NULL);
@@ -829,60 +828,137 @@
 static void ocfs_free_mem_lists (void)
 {
 	kmem_cache_destroy (OcfsGlobalCtxt.inode_cache);
-	kmem_cache_destroy (OcfsGlobalCtxt.fe_cache);
 	kmem_cache_destroy (OcfsGlobalCtxt.extent_cache);
 	OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
 }				/* ocfs_free_mem_lists */
 
-
-/*
- * ocfs_mount_volume()
- *
- */
-static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root)
+static int ocfs2_sb_probe(struct super_block *sb,
+			  struct buffer_head **bh, 
+			  int *sector_size)
 {
-	int status = 0;
-	ocfs_super *osb = NULL;
-	ocfs_vol_disk_hdr *vol_header = NULL;
-	ocfs_vol_label *vol_label = NULL;
-	int child_pid, i, sect_size;
-	struct buffer_head *bhs[] = { NULL, NULL };
-	struct inode *inode = NULL;
+	int status = 0, tmpstat;
+	ocfs1_vol_disk_hdr *hdr;
+	ocfs2_dinode *di;
+	int blksize;
 
-	LOG_ENTRY ();
+	*bh = NULL;
 
-	/* Force 512B sector size for vol_header reads */
-	status = ocfs_set_blocksize(sb, 512, 9);
+	/* may be > 512 */
+	*sector_size = bdev_hardsect_size(sb->s_bdev);
+	if (*sector_size > 4096) {
+		LOG_ERROR_ARGS("Hardware sector size too large: %d (max=4096)\n", *sector_size);
+		status = -EINVAL;
+		goto bail;
+	}
+	
+	/* check block zero for old format */
+	status = ocfs2_get_sector(sb, bh, 0, *sector_size);
 	if (status < 0) {
-		LOG_ERROR_STR("unable to set vol_header blocksize");
-		goto leave;
+		LOG_ERROR_STATUS(status);
+		goto bail;
 	}
+	hdr = OCFS_BH_GET_DATA_READ(*bh);
+	if (hdr->major_version == OCFS1_MAJOR_VERSION) {
+		LOG_ERROR_ARGS("incompatible version: %u.%u\n",
+			       hdr->major_version, hdr->minor_version);
+		status = -EINVAL;
+	}
+	if (memcmp (hdr->signature, OCFS1_VOLUME_SIGNATURE,
+		    strlen (OCFS1_VOLUME_SIGNATURE)) == 0) {
+		LOG_ERROR_ARGS("incompatible volume signature: %8s\n",
+			       hdr->signature);
+		status = -EINVAL;
+	}
+	OCFS_BH_PUT_DATA(*bh);
+	brelse(*bh);
+	*bh = NULL;
+	if (status < 0) {
+		LOG_ERROR_STR("This is an ocfs v1 filesystem which must be upgraded "
+			      "before mounting with ocfs v2");
+		goto bail;
+	}
 
-	/* get first two blocks */
-	for (i=0; i<2; i++) {
-		bhs[i] = getblk (OCFS_GET_BLOCKDEV(sb), i, 512);
-		if (bhs[i] == NULL) {
-			LOG_ERROR_STATUS(status = -EIO);
-			goto leave;
+	/*
+	 * Now check at magic offset for 512, 1024, 2048, 4096
+	 * blocksizes.  4096 is the maximum blocksize because it is
+	 * the minimum clustersize.
+	 */
+	status = -EINVAL;
+	for (blksize = *sector_size; blksize <= OCFS2_MAX_BLOCKSIZE; blksize <<= 1) {
+		tmpstat = ocfs2_get_sector(sb, bh, 
+					   OCFS2_SUPER_BLOCK_BLKNO, 
+					   blksize);
+		if (tmpstat < 0) {
+			LOG_ERROR_STATUS(status = tmpstat);
+			goto bail;
 		}
+		di = (ocfs2_dinode *) OCFS_BH_GET_DATA_READ(*bh);
+		status = ocfs_verify_volume(di, *bh, blksize);
+		OCFS_BH_PUT_DATA(*bh);
+		if (status >= 0)
+			goto bail;
+		brelse(*bh);
+		*bh = NULL;
+		if (status != -EAGAIN)
+			break;
+	}
 
-		lock_buffer(bhs[i]);
-		if (!buffer_dirty(bhs[i]))
-			clear_buffer_uptodate(bhs[i]);
-		unlock_buffer(bhs[i]);
+bail:
+	return status;
+}
+
+static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size)
+{
+	int i, bits=0, status = 0;
+
+	for (i=32; i>=0; i--) {
+		if ((1 << i) == sect_size)
+			bits = i;
 	}
+	if (!bits) {
+		LOG_ERROR_STR("unable to get blocksize bits");
+		return -EINVAL;
+	}
+	
+	status = ocfs_set_blocksize(sb, sect_size, bits);
+	if (status < 0) {
+		LOG_ERROR_STR("unable to set blocksize");
+		return status;
+	}
 
-	ll_rw_block(READ, 2, bhs);
-	for (i=0; i<2; i++)
-		wait_on_buffer(bhs[i]);
+	*bh = sb_getblk(sb, block);
+	if (!*bh) {
+		LOG_ERROR_STATUS(-EIO);
+		return -EIO;
+	}
+	lock_buffer(*bh);
+	if (!buffer_dirty(*bh))
+		clear_buffer_uptodate(*bh);
+	unlock_buffer(*bh);
+	ll_rw_block(READ, 1, bh);
+	wait_on_buffer(*bh);
+	return 0;
+}
 
-	vol_header = OCFS_BH_GET_DATA_READ(bhs[0]);
-	vol_label = OCFS_BH_GET_DATA_READ(bhs[1]);
 
-	LOG_TRACE_STR ("ocfs_verify_volume...");
-	status = ocfs_verify_volume (vol_header);
+/*
+ * ocfs_mount_volume()
+ *
+ */
+static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root)
+{
+	int status;
+	ocfs_super *osb;
+	int child_pid;
+	struct buffer_head *bh = NULL;
+	int sector_size;
+
+	LOG_ENTRY ();
+
+	/* probe for superblock */
+	status = ocfs2_sb_probe(sb, &bh, &sector_size);
 	if (status < 0) {
-		LOG_ERROR_STATUS (status);
+		LOG_ERROR_STR("superblock probe failed!");
 		goto leave;
 	}
 
@@ -896,30 +972,20 @@
 
 	osb->sb = sb;
 
+	/* Save off for ocfs_rw_direct */
+	osb->s_sectsize_bits = ocfs_get_right_shift_bits(sector_size);
+	if (!osb->s_sectsize_bits)
+		BUG();
+
 	osb->reclaim_id = reclaim_id;
 
-	/* FIXME: here it should use the actual blocksize */
-	sect_size = OCFS_SECTOR_SIZE;
-	status = ocfs_initialize_osb(osb, vol_header, vol_label,
-				     sect_size);
+	/* s_blocksize was set in the probe */
+	status = ocfs_initialize_osb(osb, bh);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
 
-	/* Now set real blocksize */
-	status = ocfs_set_blocksize(sb, osb->sect_size,
-				    osb->sect_size_bits);
-	if (status < 0) {
-		LOG_ERROR_STR("unable to set final blocksize");
-		goto leave;
-	}
-
-	OCFS_BH_PUT_DATA(bhs[0]);
-	vol_header = NULL;
-	OCFS_BH_PUT_DATA(bhs[1]);
-	vol_label = NULL;
-
 	down(&(osb->osb_res));
 
 	/* Launch the NM thread for the mounted volume */
@@ -948,7 +1014,8 @@
 		OcfsIpcCtxt.dlm_msg_size = OCFS_DLM_MAX_MSG_SIZE;
 		OcfsIpcCtxt.version = OCFS_IPC_DLM_VERSION;
 		/* start the listener thread */
-		status = ocfs_init_udp_sock (&OcfsIpcCtxt.send_sock, &OcfsIpcCtxt.recv_sock);
+		status = ocfs_init_udp_sock(&OcfsIpcCtxt.send_sock,
+					    &OcfsIpcCtxt.recv_sock);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
@@ -977,31 +1044,14 @@
 	ocfs_wait (osb->nm_init_event, (atomic_read (&osb->nm_init) >= OCFS_HEARTBEAT_INIT ), 0);
 
 	down(&(osb->osb_res));
-	osb->hbm = DISK_HBEAT_COMM_ON;
-	
 	down (&(osb->publish_lock));
 	ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 1);
 	up (&(osb->publish_lock));
-	
-	osb->publ_map |= (1 << osb->node_num);
+
+	ocfs_node_map_set_bit(&osb->publ_map, osb->node_num);
 	osb->vol_state = VOLUME_ENABLED;
 	up (&(osb->osb_res));
 
-	inode = ocfs_iget(osb, OCFS_ROOT_INODE_FE_OFF(osb));
-	if (!inode) {
-		status = -EIO;
-		LOG_ERROR_STATUS (status);
-		goto leave;
-	}
-
-	osb->root_inode = inode;
-
-	status = ocfs_init_system_inodes(osb);
-	if (status < 0) {
-		LOG_ERROR_STATUS(status);
-		goto leave;
-	}
-
 	/* Read the publish sector for this node and cleanup dirent being */
 	/* modified when we crashed. */
 	LOG_TRACE_STR ("ocfs_check_volume...");
@@ -1040,16 +1090,8 @@
 	osb->vol_state = VOLUME_MOUNTED;
 
 leave:
-	if (bhs[0] != NULL) {
-		if (vol_header)
-			OCFS_BH_PUT_DATA(bhs[0]);
-		brelse(bhs[0]);
-	}
-	if (bhs[1] != NULL) {
-		if (vol_label)
-			OCFS_BH_PUT_DATA(bhs[1]);
-		brelse(bhs[1]);
-	}
+	if (bh != NULL)
+		brelse(bh);
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_mount_volume */
@@ -1061,11 +1103,9 @@
  */
 int ocfs_dismount_volume (struct super_block *sb)
 {
-	int status = 0;
+	int status;
 	int AcquiredOSB = 0;
 	ocfs_super *osb = NULL;
-	__u32 nodemap;
-	__u32 tempmap;
 	int i;
 
 	LOG_ENTRY_ARGS ("(0x%p)\n", sb);
@@ -1077,7 +1117,7 @@
 
 	osb = OCFS_SB(sb);
 
-	if ( osb == NULL) {
+	if (osb == NULL) {
 		LOG_ERROR_STATUS (status = -EFAIL);
 		goto leave;
 	}
@@ -1135,13 +1175,9 @@
 	}
 
 
-	/* create map of all active nodes except self */
-	nodemap = (__u32)osb->publ_map;
-	tempmap = (1 << osb->node_num);
-	nodemap &= (~tempmap);
 
 	/* send dismount msg to all */
-	status = ocfs_send_dismount_msg (osb, (__u64)nodemap);
+	status = ocfs_send_dismount_msg (osb);
 	if (status < 0)
 		LOG_ERROR_STATUS (status);
 
@@ -1175,7 +1211,7 @@
 		osb->node_cfg_info[osb->node_num]->node_name, osb->node_num);
 
 	/* Free all nodecfgs */
-	for (i = 0; i < OCFS_MAXIMUM_NODES; ++i) {
+	for (i = 0; i < osb->max_nodes; ++i) {
 		BARF_BARF_BARF *p;
 
 		p = osb->node_cfg_info[i];
@@ -1212,18 +1248,17 @@
 /* true if mounted, false otherwise */
 int ocfs_publish_get_mount_state(ocfs_super *osb, int node_num) 
 {
-	int status = 0;
-	ocfs_publish *publish = NULL;
+	int status;
+	ocfs_publish *publish;
 	struct buffer_head *publish_bh = NULL;
-	__u64 offset = 0;
+	__u64 offset;
 	int retval = 0;
 	int flags = 0;
 
 	LOG_ENTRY();
 
 	/* read it in */
-	offset = osb->vol_layout.publ_sect_off +
-		(node_num * osb->sect_size);
+	offset = (osb->publish_blkno + node_num) << osb->sb->s_blocksize_bits;
 	/* we may be called during mount in which case our publish
 	 * sector might be dirty. */
 	if (node_num == osb->node_num)
@@ -1249,16 +1284,15 @@
 
 static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num, int value) 
 {
-	int status = 0;
-	ocfs_publish *publish = NULL;
+	int status;
+	ocfs_publish *publish;
 	struct buffer_head * publish_bh = NULL;
-	__u64 offset = 0;
+	__u64 offset;
 
 	LOG_ENTRY_ARGS("(node_num=%d, value=%d)\n", node_num, value);
 
 	/* read it in */
-	offset = osb->vol_layout.publ_sect_off +
-		(node_num * osb->sect_size);
+	offset = (osb->publish_blkno + node_num) << osb->sb->s_blocksize_bits;
 	status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
@@ -1327,26 +1361,45 @@
 	return(retval);
 }
 
-
 /*
  * ocfs_initialize_osb()
  *
  */
-static int ocfs_initialize_osb(ocfs_super *osb, ocfs_vol_disk_hdr *vdh,
-			       ocfs_vol_label *vol_label,
-			       __u32 sect_size)
+static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh)
 {
 	int status = 0;
 	ocfs_publish *publish = NULL;
-	__u32 bitmap_bits, length;
-	__u64 offset;
-	ocfs_vol_layout *vol_layout;
+	__u32 length;
+	__u64 offset, ret;
 	struct buffer_head *publish_bh = NULL;  /* our own publish sector */
-	struct buffer_head *publish_bhs[OCFS_MAXIMUM_NODES]; /* all the publish sectors */
+	struct buffer_head **publish_bhs = NULL; /* all the publish sectors */
 	int i;
+	ocfs2_dinode *di = NULL;
+	struct inode *inode = NULL;
 
 	LOG_ENTRY ();
 
+	
+	osb->vol_label = kmalloc(64, GFP_KERNEL);
+	if (!osb->vol_label) {
+		LOG_ERROR_STR("unable to alloc vol label");
+		status = -ENOMEM;
+		goto done_nojournal;
+	}
+	osb->uuid = kmalloc(MAX_VOL_ID_LENGTH, GFP_KERNEL);
+	if (!osb->uuid) {
+		LOG_ERROR_STR("unable to alloc uuid");
+		status = -ENOMEM;
+		goto done_nojournal;
+	}
+
+
+	/* this needs to be done before most other initializations */
+	di = OCFS_BH_GET_DATA_READ(bh);
+	osb->max_nodes = le32_to_cpu(di->id2.i_super.s_max_nodes);
+	printk("max_nodes for this device: %u\n", osb->max_nodes);
+	OCFS_BH_PUT_DATA(bh);
+
 	/* FIXME
 	 * This should be done in ocfs_journal_init(), but unknown
 	 * ordering issues will cause the filesystem to crash.
@@ -1355,6 +1408,7 @@
 	 * be my guest.
 	 */
 	/* initialize our journal structure */
+
 	osb->journal = kmalloc(sizeof(ocfs_journal), GFP_KERNEL);
 	if (!osb->journal) {
 		LOG_ERROR_STR("unable to alloc journal");
@@ -1363,17 +1417,46 @@
 	}
 	memset(osb->journal, 0, sizeof(ocfs_journal));
 
-	memset(publish_bhs, 0, OCFS_MAXIMUM_NODES * sizeof (struct buffer_head *));
-	if (osb == NULL) {
-		LOG_ERROR_STATUS(status = -EFAIL);
+	publish_bhs = kmalloc(sizeof(struct buffer_head *) * osb->max_nodes, GFP_KERNEL);
+	if (publish_bhs == NULL) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
 		goto finally;
 	}
+	memset(publish_bhs, 0, sizeof(struct buffer_head *) * osb->max_nodes);
 
+	osb->vol_node_map = kmalloc(sizeof(ocfs_vol_node_map) * osb->max_nodes, GFP_KERNEL);
+	if (!osb->vol_node_map) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
+		goto bail;
+	}
+	memset(osb->vol_node_map, 0, sizeof(ocfs_vol_node_map) * osb->max_nodes);
+
+	osb->lock_recovery_lists = kmalloc(sizeof(struct list_head) * osb->max_nodes, GFP_KERNEL);
+	if (!osb->lock_recovery_lists) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
+		goto bail;
+	}
+	memset(osb->lock_recovery_lists, 0, sizeof(struct list_head) * osb->max_nodes);
+
+	osb->last_publ_seq_num = kmalloc(sizeof(__u64) * osb->max_nodes, GFP_KERNEL);
+	if (!osb->last_publ_seq_num) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
+		goto bail;
+	}
+	memset(osb->last_publ_seq_num, 0, sizeof(__u64) * osb->max_nodes);
+	
+	osb->node_cfg_info = kmalloc(sizeof(BARF_BARF_BARF *) * osb->max_nodes, GFP_KERNEL);
+	if (!osb->node_cfg_info) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
+		goto bail;
+	}
+	memset(osb->node_cfg_info, 0, sizeof(BARF_BARF_BARF *) * osb->max_nodes);
+
+	ocfs_node_map_init(osb, &osb->publ_map);
+
+		
 	OCFS_CLEAR_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN);
 
-	vol_layout = &(osb->vol_layout);
-
-	vol_layout->cluster_size = (__u32) (vdh->cluster_size);
 	osb->obj_id.type = OCFS_TYPE_OSB;
 	osb->obj_id.size = sizeof (ocfs_super);
 	INIT_LIST_HEAD (&(osb->osb_next));
@@ -1390,7 +1473,7 @@
 	init_MUTEX (&(osb->local_alloc_sem));
 
 	spin_lock_init(&osb->recovery_map_lock);
-	osb->recovery_map = 0;
+	ocfs_node_map_init(osb, &osb->recovery_map);
 
 	osb->needs_flush = 0;
 	osb->disable_recovery = 0;
@@ -1413,89 +1496,162 @@
 	INIT_LIST_HEAD (&(osb->vote_obj_queue));
 	INIT_LIST_HEAD (&(osb->cache_lock_list));
 	INIT_LIST_HEAD (&(osb->needs_flush_head));
-	for (i=0; i<32; i++) {
+	for (i=0; i<osb->max_nodes; i++) {
 		INIT_LIST_HEAD(&(osb->lock_recovery_lists[i]));
 	}
-	osb->sect_size = sect_size;
-			
 	osb->node_num = OCFS_INVALID_NODE_NUM;
 
-	memcpy (vol_layout->mount_point, vdh->mount_point, strlen (vdh->mount_point));
-	vol_layout->serial_num = vdh->serial_num;
-	vol_layout->size = vdh->device_size;
-	vol_layout->start_off = vdh->start_off;
-	vol_layout->bitmap_off = vdh->bitmap_off;
-	vol_layout->publ_sect_off = vdh->publ_off;
-	vol_layout->vote_sect_off = vdh->vote_off;
-	vol_layout->root_bitmap_off = vdh->root_bitmap_off;
-	vol_layout->root_start_off = vdh->root_off;
-	vol_layout->root_int_off = vdh->internal_off;
-	vol_layout->root_size = vdh->root_size;
-	vol_layout->cluster_size = (__u32) vdh->cluster_size;
-	vol_layout->num_nodes = (__u32) vdh->num_nodes;
-	vol_layout->data_start_off = vdh->data_start_off;
-	vol_layout->root_bitmap_size = vdh->root_bitmap_size;
-	vol_layout->num_clusters = vdh->num_clusters;
-	vol_layout->dir_node_size = vdh->dir_node_size;
-	vol_layout->file_node_size = vdh->file_node_size;
-	vol_layout->node_cfg_off = vdh->node_cfg_off;
-	vol_layout->node_cfg_size = vdh->node_cfg_size;
-	vol_layout->new_cfg_off = vdh->new_cfg_off;
-	vol_layout->prot_bits = vdh->prot_bits;
-	vol_layout->uid = vdh->uid;
-	vol_layout->gid = vdh->gid;
+	/* not using any of these sb fields yet */
+#if 0
+di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
+di->i_mtime = cpu_to_le64(format_time); // use this as s_mtime (mount time)
+di->i_blkno = cpu_to_le64(super_off >> blocksize_bits);
+di->id2.i_super.s_mnt_count = 0;
+di->id2.i_super.s_max_mnt_count = cpu_to_le16(OCFS2_DFL_MAX_MNT_COUNT);
+di->id2.i_super.s_state = 0;
+di->id2.i_super.s_errors = 0;
+di->id2.i_super.s_lastcheck = cpu_to_le64(format_time);
+di->id2.i_super.s_checkinterval = cpu_to_le32(OCFS2_DFL_CHECKINTERVAL);
+di->id2.i_super.s_creator_os = cpu_to_le32(OCFS2_OS_LINUX);
+#endif	
 
-	memcpy (vol_layout->vol_id, vol_label->vol_id, MAX_VOL_ID_LENGTH);
+	di = OCFS_BH_GET_DATA_READ(bh);
 
-	if (vol_layout->dir_node_size == 0) 
-		vol_layout->dir_node_size = OCFS_DEFAULT_DIR_NODE_SIZE;
+	/* get some pseudo constants for clustersize bits */
+	osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
+	osb->s_clustersize = 1 << osb->s_clustersize_bits;
+	printk("clusterbits=%d\n", osb->s_clustersize_bits);
+	OCFS_ASSERT(osb->s_clustersize_bits);
 
-	if (vol_layout->file_node_size == 0) 
-		vol_layout->file_node_size = OCFS_DEFAULT_FILE_NODE_SIZE;
+	strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
+	osb->vol_label[63] = '\0';
+	memcpy(osb->uuid, di->id2.i_super.s_uuid, MAX_VOL_ID_LENGTH);
+	osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
+	osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
+	printk("vol_label: %s\n", osb->vol_label);
+	{
+		int ttt;
+		printk("uuid: ");
+		for (ttt=0; ttt<16; ttt++)
+		printk("%02x ", di->id2.i_super.s_uuid[ttt]);
+		printk("\n");
+	}
+	printk("root_blkno=%llu, system_dir_blkno=%llu\n", osb->root_blkno, osb->system_dir_blkno);
 
-	osb->inode_size = OCFS_DEFAULT_INODE_SIZE;
+	OCFS_BH_PUT_DATA(bh);
 
-	/* get some pseudo constants for >> bits */
-	osb->sect_size_bits = ocfs_get_right_shift_bits(sect_size);
-	osb->cluster_size_bits = ocfs_get_right_shift_bits(vol_layout->cluster_size);
-	osb->dir_alloc_bits = ocfs_get_right_shift_bits(vol_layout->dir_node_size);
-	osb->file_alloc_bits = ocfs_get_right_shift_bits(vol_layout->file_node_size);
-	osb->inode_alloc_bits = ocfs_get_right_shift_bits(osb->inode_size);
-	printk("sectbits=%d, clusterbits=%d, dirbits=%d, filebits=%d, inodebits=%d\n",
-	       osb->sect_size_bits, osb->cluster_size_bits, 
-	       osb->dir_alloc_bits, osb->file_alloc_bits, osb->inode_alloc_bits);
+	/* load root, system_dir, and all global system inodes */
+	status = ocfs_init_global_system_inodes(osb);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
+	/*
+	 * an inode for the whole DLM area
+	 * autoconfig, new autoconfig, publish, vote
+	 */	
+	inode = ocfs_get_system_file_inode(osb, DLM_SYSTEM_INODE, -1);
+	if (!inode) {
+		LOG_ERROR_STATUS(status = -EINVAL);
+		goto bail;
+	}
+	/* We're in the mount path, pretend locked=1 */
+	status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, 1, NULL,
+					     inode, 1);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+	// i_size must be at least
+	// (2 + osb->max_nodes + 4) + osb->max_nodes + osb->max_nodes
+	if (inode->i_size >> osb->sb->s_blocksize_bits < 
+	    (OCFS_VOLCFG_HDR_SECTORS + osb->max_nodes) + // autoconfig
+	    OCFS_VOLCFG_NEWCFG_SECTORS + // new autoconfig
+	    osb->max_nodes + // publish
+	    osb->max_nodes ) { // vote
+		LOG_ERROR_ARGS("dlm area size incorrect: "
+			       "found=%llu, need=%u\n", 
+			       inode->i_size,
+			       (OCFS_VOLCFG_HDR_SECTORS + 
+				OCFS_VOLCFG_NEWCFG_SECTORS +
+			       (osb->max_nodes*3)) << 
+			       osb->sb->s_blocksize_bits);
+		status = -EINVAL;
+		goto bail;
+
+	}
+	osb->autoconfig_blkno = ret >> osb->sb->s_blocksize_bits;
+	osb->autoconfig_blocks = OCFS_VOLCFG_HDR_SECTORS + osb->max_nodes;
 	
-	OCFS_ASSERT(osb->sect_size_bits);
-	OCFS_ASSERT(osb->cluster_size_bits);
-	OCFS_ASSERT(osb->dir_alloc_bits);
-	OCFS_ASSERT(osb->file_alloc_bits);
-	OCFS_ASSERT(osb->inode_alloc_bits);
+	osb->new_autoconfig_blkno = osb->autoconfig_blkno + osb->autoconfig_blocks;
+	osb->new_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS;
+	osb->total_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS + osb->max_nodes;
+	
+	osb->publish_blkno = osb->new_autoconfig_blkno + osb->new_autoconfig_blocks;
+	osb->publish_blocks = osb->max_nodes;
+	
+	osb->vote_blkno = osb->publish_blkno + osb->publish_blocks;
+	osb->vote_blocks = osb->max_nodes;
 
+	printk("autoconfig: blkno=%llu, blocks=%llu newblkno=%llu newblocks=%llu\n", 
+	       osb->autoconfig_blkno, osb->autoconfig_blocks, 
+	       osb->new_autoconfig_blkno, osb->new_autoconfig_blocks);
+	printk("publish: blkno=%llu, blocks=%llu\n", osb->publish_blkno, 
+	       osb->publish_blocks);
+	printk("vote: blkno=%llu, blocks=%llu\n", osb->vote_blkno, osb->vote_blocks);
 
-	osb->max_dir_node_ent = (__u32) (vol_layout->dir_node_size >> osb->sect_size_bits) - 2;
-	bitmap_bits = (__u32) vol_layout->num_clusters;
+	osb->autoconfig_bhs = ocfs_malloc (osb->total_autoconfig_blocks
+				    * sizeof(struct buffer_head *));
+	if (!osb->autoconfig_bhs) {
+		LOG_ERROR_STATUS (status = -ENOMEM);
+		goto bail;
+	}
+	memset(osb->autoconfig_bhs, 0, 
+	       osb->total_autoconfig_blocks * sizeof(struct buffer_head *));
 
-	ocfs_initialize_bitmap (&osb->cluster_bitmap, bitmap_bits, ONE_MEGA_BYTE * 8);
+	iput(inode);
+
+
+	
+	/* 
+	 * global bitmap 
+	 */	
+	inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+	if (!inode) {
+		LOG_ERROR_STATUS(status = -EINVAL);
+		goto bail;
+	}
+	/* We're in the mount path, pretend locked=1 */
+	status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, 1, NULL,
+					     inode, 1);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+	/* for now, just one extent... but in the future... */
+	osb->bitmap_blkno = ret >> osb->sb->s_blocksize_bits;
+	osb->bitmap_blocks = OCFS_I(inode)->alloc_size >> osb->sb->s_blocksize_bits;
+	osb->num_clusters = OCFS_I(inode)->u.ip_bitinfo.total_bits;
+	printk("bitmap_blkno=%llu, bitmap_blocks=%llu, num_clusters=%llu\n",
+	       osb->bitmap_blkno, osb->bitmap_blocks, osb->num_clusters);
+	
+	ocfs_initialize_bitmap(osb->sb, &osb->cluster_bitmap,
+			       osb->num_clusters,
+			       OCFS_I(inode)->alloc_size << 3);
 	/* read the whole cluster bitmap off disk, even though we only
 	 * need the beginning of it. */
-	status = ocfs_read_bhs(osb, vol_layout->bitmap_off, ONE_MEGA_BYTE, osb->cluster_bitmap.chunk, 0, NULL);
+	status = ocfs_read_bhs(osb, ret, OCFS_I(inode)->alloc_size, 
+			       osb->cluster_bitmap.chunk, 0, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto bail;
 	}
+	iput(inode);
 
+
 	osb->prealloc_lock = 0;
 
-	osb->cfg_numblocks = OCFS_MAXIMUM_NODES + OCFS_VOLCFG_NEWCFG_SECTORS;
-	osb->cfg_len = osb->cfg_numblocks * osb->sect_size;
-	osb->cfg_bhs = ocfs_malloc (osb->cfg_numblocks
-				    * sizeof(struct buffer_head *));
-	if (!osb->cfg_bhs) {
-		LOG_ERROR_STATUS (status = -ENOMEM);
-		goto bail;
-	}
-	memset(osb->cfg_bhs, 0, 
-	       osb->cfg_numblocks * sizeof(struct buffer_head *));
 
 	status = ocfs_get_config (osb);
 	if (status < 0) {
@@ -1504,7 +1660,7 @@
 	}
 
 	/* Read the Publish Sector of local Node */
-	offset = vol_layout->publ_sect_off + (osb->node_num * osb->sect_size);
+	offset = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
 	status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
@@ -1512,7 +1668,14 @@
 	}
 	publish = OCFS_BH_GET_DATA_WRITE(publish_bh);
 
-	publish->time = ocfs_get_publish_time();
+	/* 
+	 * FIXME: This really ought to be something exported by the
+	 * identical code in heartbeat.c
+	 */
+	publish->time = jiffies;
+	/* Disallow 0 */
+	if (!publish->time)
+    		publish->time = 1;
 
 	OCFS_BH_PUT_DATA(publish_bh);
 	publish = NULL;
@@ -1525,19 +1688,27 @@
 	brelse(publish_bh);
 
 	/*  Read disk for all Publish Sectors  */
-	length = OCFS_MAXIMUM_NODES * osb->sect_size;
-	status = ocfs_read_bhs(osb, vol_layout->publ_sect_off, length, 
-			       publish_bhs, 0, NULL);
+	length = osb->max_nodes << osb->sb->s_blocksize_bits;
+	offset = osb->publish_blkno << osb->sb->s_blocksize_bits;
+	status = ocfs_read_bhs(osb, offset, length, publish_bhs, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
 
-	ocfs_update_publish_map (osb, publish_bhs, 1);
+	ocfs_update_publish_map(osb, publish_bhs, 1);
 
-	for(i = 0; i < OCFS_MAXIMUM_NODES; i++)
+	for(i = 0; i < osb->max_nodes; i++)
 		osb->last_publ_seq_num[i] = (__u64) (-1);
 
+
+	/* load all node-local system inodes */
+	status = ocfs_init_local_system_inodes(osb);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
+
 	/* We might need to add a variable in Global List of osb to */
 	/* delay any creation, if any other node is already creating a file */
 
@@ -1567,9 +1738,16 @@
 	goto finally;
 
 bail:
-	if (osb->cfg_bhs)
-		kfree(osb->cfg_bhs);
-
+	if (osb->autoconfig_bhs)
+		kfree(osb->autoconfig_bhs);
+	if (osb->vol_node_map)
+		kfree(osb->vol_node_map);
+	if (osb->lock_recovery_lists)
+		kfree(osb->lock_recovery_lists);
+	if (osb->last_publ_seq_num)
+		kfree(osb->last_publ_seq_num);
+	if (osb->node_cfg_info)
+		kfree(osb->node_cfg_info);
 finally:
 	if (publish) {
 		if (publish_bh) {
@@ -1579,7 +1757,7 @@
 	}
 	if (publish_bhs[0]) {
 		int i;
-		for(i = 0; i < OCFS_MAXIMUM_NODES; i++)
+		for(i = 0; i < osb->max_nodes; i++)
 			if (publish_bhs[i])
 				brelse(publish_bhs[i]);
 	}
@@ -1591,53 +1769,59 @@
 
 /*
  * ocfs_verify_volume()
- *
+ * will return: -EAGAIN if it is ok to keep searching for superblocks
+ *              -EINVAL if there is a bad superblock
+ *              0 on success
  */
-static int ocfs_verify_volume(ocfs_vol_disk_hdr *vdh)
+static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
+			      __u32 sectsize)
 {
-	int status = 0;
+	int status = -EAGAIN;
 
 	LOG_ENTRY ();
 
-	if (vdh == NULL) {
-		LOG_ERROR_STATUS (status = -EFAIL);
-		goto bail;
-	}
-
-	/*  Compare the Signature with the one we read from disk  */
-	if (memcmp (vdh->signature, OCFS_VOLUME_SIGNATURE,
-		    strlen (OCFS_VOLUME_SIGNATURE)) != 0) {
-		LOG_ERROR_STR ("Invalid volume signature");
+	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
+		   strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
 		status = -EINVAL;
-		goto bail;
+		if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != sectsize) {
+			LOG_ERROR_ARGS("found superblock with incorrect sectorsize: "
+			       "found %u, should be %u\n", 
+			       1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
+			       sectsize);
+		} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
+			   OCFS2_MAJOR_REV_LEVEL ||
+			   le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
+			   OCFS2_MINOR_REV_LEVEL) {
+#warning dont know what is appropriate on minor rev difference and also need to check compat flags
+			LOG_ERROR_ARGS("found superblock with bad version: "
+				       "found %u.%u, should be %u.%u\n",
+				       le16_to_cpu(di->id2.i_super.s_major_rev_level),
+				       le16_to_cpu(di->id2.i_super.s_minor_rev_level),
+				       OCFS2_MAJOR_REV_LEVEL,
+				       OCFS2_MINOR_REV_LEVEL);
+		} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
+			LOG_ERROR_ARGS("bad block number on superblock: "
+				       "found %llu, should be %lu\n",
+				       le64_to_cpu(di->i_blkno), bh->b_blocknr);
+		} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
+			    le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
+			LOG_ERROR_ARGS("bad cluster size found: %u\n",
+			       1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
+		} else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
+			LOG_ERROR_STR("bad root_blkno: 0");
+		} else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
+			LOG_ERROR_STR("bad system_dir_blkno: 0");
+		} else if (le32_to_cpu(di->id2.i_super.s_max_nodes) > OCFS2_MAX_NODES) {
+			LOG_ERROR_ARGS("max_nodes found greater than maximum: "
+				       "found %u, max %u\n",
+				       le32_to_cpu(di->id2.i_super.s_max_nodes),
+				       OCFS2_MAX_NODES);
+		} else {
+			/* found it! */
+			status = 0;
+		}
 	}
 
-	/*  Check the Volume Length and the ClusterSize.  */
-	if (vdh->device_size == 0) {
-		LOG_ERROR_STR ("Device size cannot be zero");
-		status = -EINVAL;
-		goto bail;
-	}
-
-	if (vdh->cluster_size == 0) {
-		LOG_ERROR_STR ("Cluster size cannot be zero");
-		status = -EINVAL;
-		goto bail;
-	}
-
-	if (vdh->major_version != OCFS_MAJOR_VERSION) {
-		LOG_ERROR_ARGS ("Version number not compatible: %u.%u",
-				vdh->major_version, vdh->minor_version);
-		status = -EINVAL;
-		goto bail;
-	}
-
-	if (vdh->root_off == 0) {
-		LOG_ERROR_STR("No OCFS version 2 root directory found");
-		status = -EINVAL;
-		goto bail;
-	}
-bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_verify_volume */
@@ -1658,8 +1842,7 @@
 	LOG_ENTRY ();
 
 	/* Read the node's publish sector */
-	offset = osb->vol_layout.publ_sect_off + 
-		(osb->node_num * osb->sect_size);
+	offset = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
 	status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
@@ -1750,16 +1933,24 @@
 		list_del (&(osb->osb_next));
 	up (&(OcfsGlobalCtxt.global_res));
 
-	for (i=0; i<32; i++)
+	for (i=0; i<osb->max_nodes; i++)
 		ocfs_recover_oin_locks(osb, i);
 
-	if (osb->cfg_bhs) {
-		for(i = 0; i < osb->cfg_numblocks; i++)
-			if (osb->cfg_bhs[i])
-				brelse(osb->cfg_bhs[i]);
-		kfree(osb->cfg_bhs);
-	}
+	for(i = 0; i < osb->total_autoconfig_blocks; i++)
+		if (osb->autoconfig_bhs[i])
+			brelse(osb->autoconfig_bhs[i]);
 
+	if (osb->autoconfig_bhs)
+		kfree(osb->autoconfig_bhs);
+	if (osb->vol_node_map)
+		kfree(osb->vol_node_map);
+	if (osb->lock_recovery_lists)
+		kfree(osb->lock_recovery_lists);
+	if (osb->last_publ_seq_num)
+		kfree(osb->last_publ_seq_num);
+	if (osb->node_cfg_info)
+		kfree(osb->node_cfg_info);
+
 	/* FIXME
 	 * This belongs in journal shutdown, but because we have to
 	 * allocate osb->journal at the start of ocfs_initalize_osb(),

Added: trunk/src/super.h
===================================================================
--- trunk/src/super.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/super.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * super.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_SUPER_H
+#define OCFS2_SUPER_H
+
+void ocfs_delete_osb(ocfs_super *osb);
+int ocfs_dismount_volume(struct super_block *sb);
+int ocfs_publish_get_mount_state(ocfs_super *osb, int node_num);
+
+#endif /* OCFS2_SUPER_H */

Modified: trunk/src/symlink.c
===================================================================
--- trunk/src/symlink.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/symlink.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  *  linux/cluster/ssi/cfs/symlink.c
  *
  *	This program is free software; you can redistribute it and/or
@@ -30,12 +32,19 @@
  *  for CDSL support
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
-
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <linux/utsname.h>
 
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "symlink.h"
+
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_SYMLINK
 
 struct ocfs_symlink_ops {
@@ -157,15 +166,8 @@
 				NULL);
 	if (IS_ERR(page))
 		goto sync_fail;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 	wait_on_page_locked(page);
 	if (!PageUptodate(page))
-
-#else
-	wait_on_page(page);
-	if (!Page_Uptodate(page))
-
-#endif
 		goto async_fail;
 	*ppage = page;
 	return kmap(page);

Added: trunk/src/symlink.h
===================================================================
--- trunk/src/symlink.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/symlink.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * symlink.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_SYMLINK_H
+#define OCFS2_SYMLINK_H
+
+int ocfs_follow_link(struct dentry *dentry, struct nameidata *nd);
+
+#endif /* OCFS2_SYMLINK_H */

Modified: trunk/src/sysfile.c
===================================================================
--- trunk/src/sysfile.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/sysfile.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * sysfile.c
  *
  * Initialize, read, write, etc. system files.
@@ -24,608 +26,166 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
-#ifdef __KERNEL__
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#else
-#include <dummy.h>
-#include <ocfsmalloc.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "alloc.h"
+#include "dir.h"
+#include "inode.h"
+#include "sysfile.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT    OCFS_DEBUG_CONTEXT_SYSFILE
 
-static int ocfs_find_extents_of_system_file (ocfs_super * osb, __u64 file_off, __u64 Length, ocfs_file_entry * fe, void **Buffer, __u32 * NumEntries);
+static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
 
+static inline int is_global_system_inode(int type);
+static inline int is_in_system_inode_array(ocfs_super *osb, int type, __u32 node);
 
-/*
- * ocfs_read_system_file()
- *
- * make sure that the bhs array is either all NULL'd out or you really
- * know what you're doing! Also, those bh's will have to be brelse'd
- * after you're done with them!
- *
- */
-int ocfs_read_system_file (ocfs_super * osb, __u32 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset)
+static inline int is_global_system_inode(int type)
 {
-	int status = 0;
-	ocfs_file_entry *fe = NULL;
-	void *extentBuffer = NULL;
-	__u32 numExts = 0, i, j, numblocks;
-	ocfs_io_runs *IoRuns = NULL;
-	__u64 tmplen;
-	__u64 tmpoff = 0;
-	struct buffer_head * fe_bh = NULL;
+	return (type >= GLOBAL_BITMAP_SYSTEM_INODE &&
+		type <= ORPHAN_DIR_SYSTEM_INODE);
+}
 
-	LOG_ENTRY_ARGS ("(FileId = %u, offset = (%llu), "
-			"Length = (%llu))\n", FileId, Offset, Length);
+static inline int is_in_system_inode_array(ocfs_super *osb, int type, __u32 node)
+{
+	return (node == osb->node_num || is_global_system_inode(type));
+}
 
-	status = ocfs_read_bh(osb, (FileId * osb->sect_size) + 
-			      osb->vol_layout.root_int_off, &fe_bh, OCFS_BH_CACHED, NULL);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
-	}
-
-	fe = OCFS_BH_GET_DATA_READ(fe_bh);
-
-	if (!IS_VALID_FILE_ENTRY (fe)) {
-		LOG_ERROR_STATUS(status = -EINVAL);
-		goto leave;
-	}
-
-	status = ocfs_find_extents_of_system_file (osb, Offset, Length,
-					  fe, &extentBuffer, &numExts);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
-	}
-
-	OCFS_ASSERT (extentBuffer);
-
-	if (numExts < 1) {
-		LOG_ERROR_ARGS("numExts < 1  (= %d)\n", numExts);
-		goto leave;
-	}
-	IoRuns = extentBuffer;
-	tmpoff = 0;
-	tmplen = 0;
-	j = 0;
-	numblocks = 0;
-
-	for (i = 0; i < numExts; i++) {
-		//tempBuffer += tmplen;
-		/*  ?? need to align both the length and buffer and also */
-		/* offset ( atleast the starting one) */
-		tmpoff = IoRuns[i].disk_off;
-		tmplen = IoRuns[i].byte_cnt;
-
-		status = ocfs_read_bhs(osb, tmpoff, tmplen, &(bhs[j]), OCFS_BH_CACHED, NULL);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
-		}
-
-		numblocks = tmplen >> osb->sect_size_bits;
-
-		j += numblocks;
-	}
-
-leave:
-	if (fe) {
-		OCFS_BH_PUT_DATA(fe_bh);
-		brelse(fe_bh);
-	}
-
-	if (extentBuffer)
-		vfree(extentBuffer);
-
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_read_system_file */
-
-/*
- * ocfs_file_to_disk_off()
- *
- */
-__u64 ocfs_file_to_disk_off (ocfs_super * osb, __u32 FileId, __u64 Offset)
+struct inode *ocfs_get_system_file_inode(ocfs_super *osb, int type,
+					 __u32 node)
 {
-	int status = 0;
-	__u64 StartOffset = 0;
-	void *Buffer = NULL;
-	ocfs_file_entry *fe = NULL;
-	ocfs_io_runs *IoRuns;
-	__u32 NumExts = 0;
-	struct buffer_head *fe_bh = NULL;
+	struct inode *inode = NULL;
+	struct inode **arr = NULL;
 
-	LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId);
+	/* avoid the lookup if cached in local system file array */
+	if (is_in_system_inode_array(osb, type, node))
+		arr = &(osb->system_inodes[type]);
 
-	/*  Read the File Entry corresponding to File Id */
-	status = ocfs_read_bh (osb, (FileId * osb->sect_size) +
-			       osb->vol_layout.root_int_off, &fe_bh, OCFS_BH_CACHED, NULL);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
+	if (arr && (inode = *arr) != NULL) {
+		/* get a ref in addition to the array ref */
+		igrab(inode);
+		return inode;
 	}
+	
+	/* this gets one ref thru iget */
+	inode = _ocfs_get_system_file_inode(osb, type, node);
 
-	fe = OCFS_BH_GET_DATA_READ(fe_bh);
+	/* add one more if putting into array for first time */
+	if (arr)
+		*arr = igrab(inode);
+	return inode;
+}
 
-	if (!IS_VALID_FILE_ENTRY (fe)) {
-		LOG_ERROR_STATUS(status = -EINVAL);
-		goto leave;
-	}
-
-	status = ocfs_find_extents_of_system_file (osb, Offset, osb->sect_size,
-					  fe, &Buffer, &NumExts);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
-	}
-
-	OCFS_ASSERT (Buffer);
-
-	IoRuns = Buffer;
-	/*  Return the disk offset of first run . */
-	StartOffset = (IoRuns[0].disk_off);
-
-leave:
-	if (fe_bh) {
-		OCFS_BH_PUT_DATA(fe_bh);
-		brelse(fe_bh);
-	}
-
-	if (Buffer)
-		vfree(Buffer);
-
-	LOG_EXIT_ARGS ("%llu", StartOffset);
-	return StartOffset;
-}				/* ocfs_file_to_disk_off */
-
-
-/*
- * ocfs_get_system_file_size()
- *
- */
-int ocfs_get_system_file_size (ocfs_super * osb, __u32 FileId, __u64 * Length, __u64 * AllocSize)
+static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node)
 {
+	char namebuf[40];
+	struct inode *inode = NULL;
+	__u64 fe_off = 0;
+	struct buffer_head *dirent_bh = NULL;
+	struct ocfs2_dir_entry *de = NULL;
 	int status = 0;
-	ocfs_file_entry *fe = NULL;
-	__u64 offset;
-	struct buffer_head *fe_bh = NULL;
 
-	LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId);
-
-	*AllocSize = *Length = 0;
-
-	offset = (FileId * osb->sect_size) + osb->vol_layout.root_int_off;
-
-	status = ocfs_read_bh (osb, offset, &fe_bh, 0, NULL);
+	ocfs2_sprintf_system_inode_name(namebuf,
+					sizeof(namebuf),
+					type, node);
+	
+	status = ocfs_find_files_on_disk(osb, namebuf, strlen(namebuf),
+					 &fe_off, osb->sys_root_inode, 
+					 0, &dirent_bh, &de);
 	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
+		goto bail;
 	}
 
-	fe = OCFS_BH_GET_DATA_READ(fe_bh);
-
-	if (!IS_VALID_FILE_ENTRY (fe)) {
-		LOG_ERROR_ARGS("offset=%llu", offset);
-		status = -EINVAL;
-		goto leave;
+	inode = ocfs_iget(osb, fe_off);
+	if (!inode) {
+		LOG_ERROR_STR("Could not create inode!");
+		goto bail;
 	}
+bail:
+	if (dirent_bh)
+		brelse(dirent_bh);
+	return inode;
+}
 
-	*Length = fe->file_size;
-	*AllocSize = fe->alloc_size;
 
-leave:
-	if (fe) {
-		OCFS_BH_PUT_DATA(fe_bh);
-		brelse(fe_bh);
-	}
-
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_get_system_file_size */
-
-#ifdef __KERNEL__
 /*
- * ocfs_extend_system_file()
- * 
- * fe_bh can be null and we'll just read it off disk.
- * of course, if you've already read it off disk, then give us fe_bh to avoid
- * an extra read. We always do the write out of the new fe.
+ * ocfs_read_system_file()
  *
- * You need to be holding node_alloc_sem!
+ * make sure that the bhs array is either all NULL'd out or you really
+ * know what you're doing! Also, those bh's will have to be brelse'd
+ * after you're done with them!
+ *
  */
-int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle, int zero)
+int ocfs_read_system_file(ocfs_super *osb, int type, __u32 node,
+			  struct buffer_head *bhs[], __u64 Length)
 {
-	int status = 0;
-	__u64 actualDiskOffset = 0, actualLength = 0;
-	int local_fe = 0;
-	ocfs_file_entry *fe = NULL;
-	__u64 alloc_size;
-	int numbhs, i;
-	char *data;
-	struct buffer_head **bhs;
-	struct inode *ext_alloc_inode = NULL;
+	int status = 0, i;
+	__u64 ret = 0;
+	struct inode *inode = NULL;
+	__u32 blocks;
+	__u64 off, contig_blocks, contig_bytes;
 
-	LOG_ENTRY_ARGS ("(FileId = %u, Size = %llu)\n", FileId, FileSize);
+	LOG_ENTRY_ARGS ("type=%d, node=%u, Length = %llu\n", type, node, Length);
 
-	OCFS_ASSERT (osb);
-
-	if (!fe_bh) {
-		local_fe = 1;
-		status = ocfs_read_bh(osb, (FileId * osb->sect_size) + osb->vol_layout.root_int_off, &fe_bh, OCFS_BH_COND_CACHED, NULL);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
-		}
-	}
-	fe = OCFS_BH_GET_DATA_READ(fe_bh);
-
-	if (!IS_VALID_FILE_ENTRY (fe)) {
-		OCFS_BH_PUT_DATA(fe_bh);
-		LOG_ERROR_STATUS (status = -EINVAL);
+	inode = ocfs_get_system_file_inode(osb, type, node);
+	if (!inode) {
+		LOG_ERROR_STATUS(status=-EINVAL);
 		goto leave;
 	}
-	alloc_size = fe->alloc_size;
-	OCFS_BH_PUT_DATA(fe_bh);
-	fe = NULL;
-
-	status = ocfs_journal_access(handle, fe_bh, 
-				     OCFS_JOURNAL_ACCESS_WRITE);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
-	}
-
-	if (FileSize > alloc_size) {
-		/*  We need to allocate from bitmap */
-		__u64 numClusterAlloc = 0, BitmapOffset = 0;
-
-		status =
-			ocfs_find_contiguous_space_from_bitmap (osb, handle,
-						   FileSize - alloc_size,
-						   &BitmapOffset,
-						   &numClusterAlloc, 1, 
-						   NULL, NULL);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
+	
+	off = 0;
+	i = 0;
+	blocks = Length >> osb->sb->s_blocksize_bits;
+	while (blocks > 0) {
+		status = ocfs_lookup_file_allocation(osb, off, &ret,
+						     blocks, 
+						     &contig_bytes,
+						     inode, 1);
+		contig_blocks =
+			contig_bytes >> osb->sb->s_blocksize_bits;
+		if (contig_bytes !=
+		    (contig_blocks << osb->sb->s_blocksize_bits)) {
+			LOG_ERROR_ARGS("unaligned system file read! off=%llu, "
+				       "contig_bytes=%llu\n", off, contig_bytes);
+			LOG_ERROR_STATUS(status=-EINVAL);
 			goto leave;
 		}
-
-		actualDiskOffset =
-		    (BitmapOffset * osb->vol_layout.cluster_size) +
-		    osb->vol_layout.data_start_off;
-		actualLength = numClusterAlloc * osb->vol_layout.cluster_size;
-
-		ext_alloc_inode = igrab(osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE]);
-		if (!ext_alloc_inode) {
-			status = -EFAIL;
-			LOG_ERROR_STATUS(status);
+		if (status == 0) {
+			// found all remaining
+		} else if (status == -EFAIL && contig_blocks > 0) {
+			// found some
+		} else {
+			// failed
+			LOG_ERROR_STATUS(status = -EIO);
 			goto leave;
 		}
-
-		ocfs_handle_add_inode(handle, ext_alloc_inode);
-		status = ocfs_allocate_extent (osb, fe_bh, handle,  
-					       actualDiskOffset, actualLength, NULL);
+		status = ocfs_read_bhs(osb, ret, contig_bytes, &(bhs[i]), OCFS_BH_CACHED, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		
-		if (zero) {
-			/* I think at this point, this can be journalled too */
-			numbhs = actualLength >> osb->sect_size_bits;
-
-			bhs = ocfs_malloc(numbhs * sizeof(struct buffer_head *));
-			if (!bhs) {
-				status = -ENOMEM;
-				LOG_ERROR_STATUS(status);
-				goto leave;
-			}
-			memset(bhs, 0, numbhs * sizeof(struct buffer_head *));
-
-			status = ocfs_read_bhs(osb, actualDiskOffset, 
-					       actualLength, bhs, 0, NULL);
-			if (status < 0) {
-				ocfs_free(bhs);
-				LOG_ERROR_STATUS(status);
-				goto leave;
-			}
-
-			for(i = 0; i < numbhs; i++) {
-				data = OCFS_BH_GET_DATA_WRITE(bhs[i]);
-				memset(data, 0, osb->sect_size);
-				OCFS_BH_PUT_DATA(bhs[i]);
-			}
-
-			status = ocfs_write_bhs(osb, bhs, numbhs, 0, NULL);
-			for(i = 0; i < numbhs; i++)
-				brelse(bhs[i]);
-			ocfs_free(bhs);
-			if (status < 0) {
-				LOG_ERROR_STATUS(status);
-				goto leave;
-			}
-		}
-
-		atomic_inc (&osb->alloc_stats.bitmap_meta);
-	} else
-		actualLength = 0;
-		
-	fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
-	fe->alloc_size += actualLength;
-	fe->file_size = FileSize;
-
-	DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num;
-	DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_ENABLE_CACHE_LOCK;
-
-	OCFS_BH_PUT_DATA(fe_bh);
-	fe = NULL;
-
-	status = ocfs_journal_dirty(handle, fe_bh);
-	if (status < 0)
-		LOG_ERROR_STATUS (status);
-
-leave:
-	if (fe)
-		OCFS_BH_PUT_DATA(fe_bh);
-	if (local_fe)
-		brelse(fe_bh);
-	if (ext_alloc_inode)
-		iput(ext_alloc_inode);
-
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_extend_system_file */
-
-#endif				/* __KERNEL__ */
-
-/*
- * ocfs_find_extents_of_system_file()
- *
- * Searches for the extents in the file entry passed starting from
- * file offset up to the length specified.
- */
-static int ocfs_find_extents_of_system_file (ocfs_super * osb, __u64 file_off, __u64 Length, ocfs_file_entry * fe, void **Buffer, __u32 * NumEntries)
-{
-	int status = -EFAIL;
-	__u32 size;
-	__u32 k = 0, j;
-	__u32 Runs, Runoffset;
-	struct buffer_head *extent_bh = NULL;
-	ocfs_extent_group *extent = NULL;
-	ocfs_io_runs *IoRuns;
-	__u64 newOffset = 0, searchVbo, remainingLength = 0;
-
-
-	LOG_ENTRY ();
-
-	OCFS_ASSERT (osb);
-
-	if (!IS_VALID_FILE_ENTRY (fe)) {
-		LOG_ERROR_STATUS (status = -EFAIL);
-		goto leave;
+		i += contig_blocks;
+		blocks -= contig_blocks;
+		off += contig_bytes;
 	}
 
-	if (fe->local_ext)
-       	{
-		size = OCFS_MAX_FILE_ENTRY_EXTENTS * sizeof (ocfs_io_runs);
-	}
-	else
-	{
-		int pow = fe->granularity + 1;
-		/* extent tree looks like
-		*             fe[0]        fe[1]    fe[2]
-		*        hdr[0]...hdr[17]  .....
-		* dat[0]..dat[17]
-		*
-		* granularity of fe is tree height
-		* so max runs (total of all leaves) is
-		* 3 x 18 ^ (granularity+1)
-		* (OCFS_MAX_DATA_EXTENTS = 18)
-		*
-		* g=0: 1296 bytes
-		* g=1: 23328 bytes
-		* g=2: 419904 bytes!
-		*/
-		size = 3;
-		while (pow)
-		{
-			size *= OCFS_MAX_DATA_EXTENTS;
-			pow--;
-		}
-		size *= sizeof (ocfs_io_runs);
-	}
-	size = OCFS_SECTOR_ALIGN (size);
-	IoRuns = vmalloc(size);
-
-	if (IoRuns == NULL) {
-		LOG_ERROR_STATUS (status = -ENOMEM);
-		goto leave;
-	}
-
-	memset (IoRuns, 0, size);
-
-	remainingLength = Length;
-	Runs = 1;
-	Runoffset = 0;
-	newOffset = file_off;
-
-	if (fe->local_ext) {
-		for (j = 0; j < OCFS_MAX_FILE_ENTRY_EXTENTS; j++) {
-			if ((fe->extents[j].file_off +
-			     fe->extents[j].num_bytes) > newOffset) {
-				IoRuns[Runoffset].disk_off =
-				    fe->extents[j].disk_off +
-				    (newOffset - fe->extents[j].file_off);
-				IoRuns[Runoffset].byte_cnt =
-				    (__u32) ((fe->extents[j].file_off +
-					    fe->extents[j].num_bytes) -
-					   newOffset);
-				if (IoRuns[Runoffset].byte_cnt >=
-				    remainingLength) {
-					IoRuns[Runoffset].byte_cnt =
-					    (__u32) remainingLength;
-					status = 0;
-					break;
-				} else {
-					newOffset += IoRuns[Runoffset].byte_cnt;
-					remainingLength -=
-					    IoRuns[Runoffset].byte_cnt;
-					Runs++;
-					Runoffset++;
-				}
-			}
-		}
-
-		*NumEntries = Runs;
-		*Buffer = IoRuns;
-		goto leave;
-	} else {
-		/* Extents are branched and we are no longer using Local 
-		 *  Extents for this File Entry. */
-
-		while (1) {
-			/* Keep going downwards looking for the Entry, till 
-			 * we hit the last Data entry */
-			for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) {
-				if ((__s64) (fe->extents[k].file_off +
-					   fe->extents[k].num_bytes) >
-				    newOffset) {
-					break;
-				}
-			}
-
-			if (k == OCFS_MAX_FILE_ENTRY_EXTENTS) {
-				LOG_ERROR_STR ("data extents maxed");
-			}
-
-			if (fe->extents[k].disk_off == 0) {
-				LOG_ERROR_STR ("disk_off=0");
-			}
-
-			status = ocfs_read_bh(osb, fe->extents[k].disk_off,
-					      &extent_bh, OCFS_BH_COND_CACHED, NULL);
-			if (status < 0) {
-				LOG_ERROR_STATUS (status);
-				goto leave;
-			}
-
-			extent = OCFS_BH_GET_DATA_READ(extent_bh);
-			while (extent->type != OCFS_EXTENT_DATA) {
-				__u64 diskoffset;
-
-				if (!IS_VALID_EXTENT_HEADER (extent)) {
-					LOG_ERROR_STATUS(status = -EFAIL);
-					goto leave;
-				}
-
-				OCFS_GET_EXTENT ((__s64) newOffset, extent, k);
-				if (k == OCFS_MAX_DATA_EXTENTS) {
-					LOG_ERROR_STR ("data extents maxed");
-				}
-
-				if (extent->extents[k].disk_off == 0) {
-					LOG_ERROR_STR ("disk_off=0");
-				}
-
-				diskoffset = extent->extents[k].disk_off;
-
-				OCFS_BH_PUT_DATA(extent_bh);
-				brelse(extent_bh);
-				extent = NULL; extent_bh = NULL;
-
-				status = ocfs_read_bh(osb, diskoffset,
-						      &extent_bh, 
-						      OCFS_BH_CACHED, NULL);
-				if (status < 0) {
-					LOG_ERROR_STATUS (status);
-					goto leave;
-				}
-				extent = OCFS_BH_GET_DATA_READ(extent_bh);
-			}
-
-			searchVbo = newOffset;
-
-			OCFS_ASSERT (extent->type ==
-				     OCFS_EXTENT_DATA);
-
-			if (!IS_VALID_EXTENT_DATA (extent)) {
-				LOG_ERROR_STATUS(status = -EFAIL);
-				goto leave;
-			}
-
-			for (j = 0; j < OCFS_MAX_DATA_EXTENTS; j++) {
-				if ((extent->extents[j].file_off +
-				     extent->extents[j].
-				     num_bytes) > newOffset) {
-					IoRuns[Runoffset].disk_off =
-						extent->extents[j].
-						disk_off + (newOffset -
-							    extent->
-							    extents[j].
-							    file_off);
-					IoRuns[Runoffset].byte_cnt =
-						(__u32) ((extent->
-							  extents[j].
-							  file_off +
-							  extent->
-							  extents[j].
-							  num_bytes) -
-							 newOffset);
-					
-					if (IoRuns[Runoffset].
-					    byte_cnt >=
-					    remainingLength) {
-						IoRuns[Runoffset].
-							byte_cnt = (__u32)
-							remainingLength;
-						status = 0;
-						break;
-					} else {
-						newOffset +=
-							IoRuns[Runoffset].
-							byte_cnt;
-						remainingLength -=
-							IoRuns[Runoffset].
-							byte_cnt;
-						Runs++;
-						Runoffset++;
-						if (Runs >=
-						    OCFS_MAX_DATA_EXTENTS)
-						{
-							LOG_ERROR_ARGS ("Runs=%d", Runs);
-						}
-					}
-				}
-			}
-			
-			if (j == OCFS_MAX_DATA_EXTENTS) {
-				OCFS_BH_PUT_DATA(extent_bh);
-				brelse(extent_bh);
-				extent = NULL; extent_bh = NULL;
-				continue;
-			} else {
-				*NumEntries = Runs;
-				*Buffer = IoRuns;
-				goto leave;
-			}
-		} /* while (1) */
-	}
-
 leave:
-	/* Don't free the IoRuns Memory here */
-	if (extent_bh) {
-		if (extent)
-			OCFS_BH_PUT_DATA(extent_bh);
-		brelse(extent_bh);
-	}
+	if (inode)
+		iput(inode);
 
 	LOG_EXIT_STATUS (status);
 	return status;
-}				/* ocfs_find_extents_of_system_file */
-
+}				/* ocfs_read_system_file */

Added: trunk/src/sysfile.h
===================================================================
--- trunk/src/sysfile.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/sysfile.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * sysfile.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_SYSFILE_H
+#define OCFS2_SYSFILE_H
+
+struct inode * ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
+int ocfs_read_system_file (ocfs_super *osb, int type, __u32 node,
+			   struct buffer_head *bhs[], __u64 Length);
+
+#endif /* OCFS2_SYSFILE_H */

Modified: trunk/src/util.c
===================================================================
--- trunk/src/util.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/util.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * util.c
  *
  * General purpose code
@@ -24,15 +26,18 @@
  *	    Manish Singh
  */
 
-#ifdef __KERNEL__
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#else
-#include <debugocfs.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "util.h"
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT  OCFS_DEBUG_CONTEXT_UTIL
 
@@ -119,10 +124,6 @@
 	return 0;
 }				/* ocfs_sleep */
 
-#ifdef OCFS_LINUX_MEM_DEBUG
-#define SUPER_VERBOSE_MEM_DEBUG  1
-#endif
-
 /*
  * ocfs_dbg_slab_alloc()
  *
@@ -131,23 +132,10 @@
 {
     void *m;
     m = kmem_cache_alloc(slab, GFP_NOFS);
-#ifdef OCFS_LINUX_MEM_DEBUG
-    if (m == NULL) {
+#if 0 /* FIXME: Should we check? */
+    if (m == NULL)
 	LOG_ERROR_ARGS("failed to alloc from slab = %p", slab);
-    } else {
-		alloc_item *new;
-		new = kmalloc (sizeof (alloc_item), GFP_NOFS);
-		new->type = SLAB_ITEM;
-		new->address = m;
-		new->u.slab = slab;
-		snprintf (new->tag, 30, "%d:%s", line, file);
-		new->tag[29] = '\0';
-		list_add (&new->list, &OcfsGlobalCtxt.item_list);
-#ifdef SUPER_VERBOSE_MEM_DEBUG
-		LOG_TRACE_ARGS (" + %x (%p, '%s')\n", m, slab, new->tag);
 #endif
-    }
-#endif
     return m;
 }                               /* ocfs_dbg_slab_alloc */
 
@@ -157,105 +145,10 @@
  */
 void ocfs_dbg_slab_free (kmem_cache_t *slab, void *m)
 {
-
-#ifdef OCFS_LINUX_MEM_DEBUG
-	struct list_head *iter, *tmpiter;
-	alloc_item *item = NULL;
-	int do_free = 0;
-
-	list_for_each_safe (iter, tmpiter, &OcfsGlobalCtxt.item_list) {
-		item = list_entry (iter, alloc_item, list);
-
-		if (item->address == m && item->type == SLAB_ITEM) {
-#ifdef SUPER_VERBOSE_MEM_DEBUG
-			LOG_TRACE_ARGS (" - %x (%p, '%s')\n", m, item->u.slab, item->tag);
-#endif
-			list_del (&item->list);
-			do_free = 1;
-			break;
-		}
-	}
-
-	if (do_free) {
-		kmem_cache_free(slab, m);
-		kfree (item);
-		return;
-	}
-	LOG_ERROR_ARGS ("tried to free mem never allocated: %x", m);
-#endif
-#ifndef OCFS_LINUX_MEM_DEBUG
 	kmem_cache_free(slab, m);
-#endif
 }				/* ocfs_dbg_slab_free */
 
 
-
-/*
- * ocfs_linux_dbg_alloc()
- *
- */
-void *ocfs_linux_dbg_alloc (int Size, char *file, int line)
-{
-	void *m;
-
-	m = kmalloc (Size, GFP_NOFS);
-#ifdef OCFS_LINUX_MEM_DEBUG
-	if (m == NULL) {
-		LOG_ERROR_ARGS ("failed! (size=%d)", Size);
-	} else {
-		alloc_item *new;
-		new = kmalloc (sizeof (alloc_item), GFP_NOFS);
-		new->type = KMALLOC_ITEM;
-		new->address = m;
-		new->u.length = Size;
-		snprintf (new->tag, 30, "%d:%s", line, file);
-		new->tag[29] = '\0';
-		list_add (&new->list, &OcfsGlobalCtxt.item_list);
-#ifdef SUPER_VERBOSE_MEM_DEBUG
-		LOG_TRACE_ARGS (" + %x (%d, '%s')\n", m, Size, new->tag);
-#endif
-	}
-#endif
-	return m;
-}				/* ocfs_linux_dbg_alloc */
-
-/*
- * ocfs_linux_dbg_free()
- *
- */
-void ocfs_linux_dbg_free (const void *Buffer)
-{
-
-#ifdef OCFS_LINUX_MEM_DEBUG
-	struct list_head *iter, *tmpiter;
-	alloc_item *item = NULL;
-	int do_free = 0;
-
-	list_for_each_safe (iter, tmpiter, &OcfsGlobalCtxt.item_list) {
-		item = list_entry (iter, alloc_item, list);
-
-		if (item->address == Buffer && item->type == KMALLOC_ITEM) {
-#ifdef SUPER_VERBOSE_MEM_DEBUG
-			LOG_TRACE_ARGS (" - %x (%d, '%s')\n", Buffer,
-					item->u.length, item->tag);
-#endif
-			list_del (&item->list);
-			do_free = 1;
-			break;
-		}
-	}
-	if (do_free) {
-		kfree (Buffer);
-		kfree (item);
-		return;
-	}
-	LOG_ERROR_ARGS ("tried to free mem never allocated: %x", Buffer);
-#endif
-#ifndef OCFS_LINUX_MEM_DEBUG
-	kfree (Buffer);
-#endif
-}				/* ocfs_linux_dbg_free */
-
 /* prefetch has been declared to allow to build in debug mode */
 #ifdef DEBUG
 #ifndef ARCH_HAS_PREFETCH

Added: trunk/src/util.h
===================================================================
--- trunk/src/util.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/util.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,46 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * util.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_UTIL_H
+#define OCFS2_UTIL_H
+
+int ocfs_compare_qstr(struct qstr *s1, struct qstr *s2);
+void ocfs_clear_timeout(ocfs_timeout *to);
+void ocfs_daemonize(char *name, int len);
+void *ocfs_dbg_slab_alloc(kmem_cache_t *slab, char *file, int line);
+void ocfs_dbg_slab_free(kmem_cache_t *slab, void *m);
+void ocfs_init_timeout(ocfs_timeout *to);
+void *ocfs_linux_dbg_alloc(int Size, char *file, int line);
+void ocfs_linux_dbg_free(const void *Buffer);
+void ocfs_set_timeout(ocfs_timeout *to, __u32 timeout);
+void ocfs_show_stack(unsigned long *esp);
+void ocfs_show_trace(unsigned long *stack);
+int ocfs_sleep(__u32 ms);
+void ocfs_truncate_inode_pages(struct inode *inode, loff_t off);
+
+#endif /* OCFS2_UTIL_H */

Modified: trunk/src/ver.c
===================================================================
--- trunk/src/ver.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ver.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * ver.c
  *
  * version string
@@ -24,14 +26,20 @@
  *	    Manish Singh
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/module.h>
+#include <linux/fs.h>
 #include <linux/types.h>
-
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <linux/slab.h>
 #include <linux/kernel.h>
 
-#include <linux/module.h>
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "ver.h"
+
+
 MODULE_DESCRIPTION("The Oracle Cluster Filesystem (version "
 		   OCFS_BUILD_VERSION
 		   ")");

Added: trunk/src/ver.h
===================================================================
--- trunk/src/ver.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ver.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ver.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_VER_H
+#define OCFS2_VER_H
+
+void ocfs_version_print(void);
+
+#endif /* OCFS2_VER_H */

Modified: trunk/src/volcfg.c
===================================================================
--- trunk/src/volcfg.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/volcfg.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/* 
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * volcfg.c
  *
  * Auto configuration, namely, node number.
@@ -24,11 +26,21 @@
  *	    Manish Singh, Neeraj Goyal, Suchit Kaura
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
 
+#include "util.h"
+#include "volcfg.h"
+
+#include "ocfs_buffer_head.h"
+
 /* Tracing */
 #define  OCFS_DEBUG_CONTEXT  OCFS_DEBUG_CONTEXT_VOLCFG
 
@@ -68,7 +80,7 @@
 	/* Obtain the volume for which we need to reiterate the lock */
 	osb = cfg_task->osb;
 	bh = cfg_task->bh;
-	length = osb->sect_size;
+	length = osb->sb->s_blocksize;
 	offset = cfg_task->lock_off;
 
 	/* Write the sector back */
@@ -132,21 +144,22 @@
 	ocfs_node_config_info *disk_node = NULL;
 	char *buf;
 	__u32 node_num;
-	__u32 sect_size;
 	__u32 size;
-	struct buffer_head *cfg_bhs[OCFS_MAXIMUM_NODES];
+	struct buffer_head **cfg_bhs = NULL;
 	int done = 0;
 
 	LOG_ENTRY ();
 
-	memset(cfg_bhs, 0, OCFS_MAXIMUM_NODES * sizeof(*cfg_bhs));
+	cfg_bhs = kmalloc(sizeof(struct buffer_head *) * osb->max_nodes, GFP_KERNEL);
+	if (cfg_bhs == NULL) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
+		goto finally;
+	}
+	memset(cfg_bhs, 0, osb->max_nodes * sizeof(struct buffer_head *));
 
-	sect_size = osb->sect_size;
-
 	/* Read the nodecfg info for all nodes from disk */
-	size = OCFS_VOLCFG_HDR_SECTORS * sect_size;
-	offset = osb->vol_layout.node_cfg_off + size;
-	size = osb->vol_layout.node_cfg_size - size;
+	offset = (osb->autoconfig_blkno + OCFS_VOLCFG_HDR_SECTORS) << osb->sb->s_blocksize_bits;
+	size = (osb->autoconfig_blocks - OCFS_VOLCFG_HDR_SECTORS) << osb->sb->s_blocksize_bits;
 
 	status = ocfs_read_bhs(osb, offset, size, cfg_bhs, 0, NULL);
 	if (status < 0) {
@@ -156,7 +169,7 @@
 
 	/* Check if preferred node num is available */
 	node_num = OCFS_INVALID_NODE_NUM;
-	if (pref_node_num >= 0 && pref_node_num < OCFS_MAXIMUM_NODES) {
+	if (pref_node_num >= 0 && pref_node_num < osb->max_nodes) {
 		disk_node = OCFS_BH_GET_DATA_READ(cfg_bhs[pref_node_num]);
 		if (disk_node->node_name[0] == '\0')
 			node_num = pref_node_num;
@@ -165,7 +178,7 @@
 
 	/* if not, find the first available empty slot */
 	if (node_num == OCFS_INVALID_NODE_NUM) {
-		for (node_num = 0; node_num < OCFS_MAXIMUM_NODES; node_num++) {
+		for (node_num = 0; node_num < osb->max_nodes; node_num++) {
 			disk_node = OCFS_BH_GET_DATA_READ(cfg_bhs[node_num]);
 			if (disk_node->node_name[0] == '\0')
 				done = 1;
@@ -176,7 +189,7 @@
 	}
 
 	/* If no free slots, error out */
-	if (node_num >= OCFS_MAXIMUM_NODES) {
+	if (node_num >= osb->max_nodes) {
 		LOG_ERROR_STR ("Unable to allocate node number as no slots " \
 			       "are available");
 		status = -ENOSPC;
@@ -185,7 +198,7 @@
 
 	/* Copy the new nodecfg into the memory buffer */
 	buf = OCFS_BH_GET_DATA_WRITE(cfg_bhs[node_num]);
-	memcpy (buf, new_disk_node, sect_size);
+	memcpy (buf, new_disk_node, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(cfg_bhs[node_num]);
 
 	/* Write the new node details on disk */
@@ -203,9 +216,11 @@
 	}
 
 finally:
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++)
+	for (i = 0; i < osb->max_nodes; i++)
 		if (cfg_bhs[i])
 			brelse(cfg_bhs[i]);
+	if (cfg_bhs)
+		kfree(cfg_bhs);
 
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -227,14 +242,14 @@
 
 	node_cfg_bhs[0] = node_cfg_bhs[1] = NULL;
 	/* Read the nodecfg header */
-	offset = osb->vol_layout.node_cfg_off;
+	offset = osb->autoconfig_blkno << osb->sb->s_blocksize_bits;
 	status = ocfs_read_bh(osb, offset, &node_cfg_bhs[0], 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto bail;
 	}
 
-	offset = osb->vol_layout.new_cfg_off + osb->sect_size;
+	offset = (osb->new_autoconfig_blkno + 1) << osb->sb->s_blocksize_bits;
 	status = ocfs_read_bh(osb, offset, &node_cfg_bhs[1], 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
@@ -250,7 +265,7 @@
 	/* Increment the seq# to trigger other nodes to re-read node cfg */
 	hdr->cfg_seq_num++;
 
-	memcpy(hdr_copy, hdr, osb->sect_size);
+	memcpy(hdr_copy, hdr, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(node_cfg_bhs[0]);
 	OCFS_BH_PUT_DATA(node_cfg_bhs[1]);
 	/* Write the nodecfg header */
@@ -303,17 +318,14 @@
 	int tried_acq = 0;
 	int break_lock = 0;
 	int bh_locked = 0;
-	ocfs_disk_lock *disk_lock;
+	ocfs2_disk_lock *disk_lock;
 	ocfs_cfg_task *cfg_task;
-	__u32 sect_size;
 	__u64 lock_node_num = OCFS_INVALID_NODE_NUM;
 	struct buffer_head *bh = NULL;
 	int i;
 
 	LOG_ENTRY ();
 
-	sect_size = osb->sect_size;
-
 	cfg_task = ocfs_malloc (sizeof (ocfs_cfg_task));
 	if (cfg_task == NULL)
 	{
@@ -353,21 +365,21 @@
 		lock_buf = OCFS_BH_GET_DATA_WRITE(bh);
 		bh_locked = 1;
 
-		disk_lock = (ocfs_disk_lock *) lock_buf;
-		lock_node_num = DISK_LOCK_CURRENT_MASTER(disk_lock);
+		disk_lock = (ocfs2_disk_lock *)lock_buf;
+		lock_node_num = disk_lock->dl_master;
 
-		if (DISK_LOCK_FILE_LOCK (disk_lock) == 0 || break_lock) {
-			if (DISK_LOCK_FILE_LOCK (disk_lock) != 0)
+		if (disk_lock->dl_level == 0 || break_lock) {
+			if (disk_lock->dl_level != 0)
 				LOG_TRACE_STR ("Try to break node config lock");
 			else
 				LOG_TRACE_STR ("Lock node config");
 
 			/* Attempt to lock volcfg */
-			memcpy(disk_lock, cfg_buf, osb->sect_size);
+			memcpy(disk_lock, cfg_buf, osb->sb->s_blocksize);
 
-			DISK_LOCK_CURRENT_MASTER (disk_lock) = osb->node_num;
-			DISK_LOCK_FILE_LOCK (disk_lock) = 1;
-			memcpy(cfg_buf, disk_lock, osb->sect_size);
+			disk_lock->dl_master = osb->node_num;
+			disk_lock->dl_level = 1;
+			memcpy(cfg_buf, disk_lock, osb->sb->s_blocksize);
 		
 			/* Write into volcfg lock sector... */
 			OCFS_BH_PUT_DATA(bh);
@@ -396,8 +408,8 @@
 		lock_buf = OCFS_BH_GET_DATA_WRITE(bh);
 
 		/* If we tried to acquire and we still own it we take it... */
-		if ((tried_acq) && (memcmp (lock_buf, cfg_buf, sect_size) == 0)) {
-			memcpy (lock_buf, cfg_buf, sect_size);
+		if ((tried_acq) && (memcmp (lock_buf, cfg_buf, osb->sb->s_blocksize) == 0)) {
+			memcpy (lock_buf, cfg_buf, osb->sb->s_blocksize);
 			OCFS_BH_PUT_DATA(bh);
 
 			/* Set timer to reiterate lock every few jiffies */
@@ -410,10 +422,9 @@
 			add_timer(&osb->lock_timer);
 
 			/* Write the config info into the disk */
-			disk_lock = (ocfs_disk_lock *) cfg_buf;
-			DISK_LOCK_CURRENT_MASTER (disk_lock) =
-							OCFS_INVALID_NODE_NUM;
-			DISK_LOCK_FILE_LOCK (disk_lock) = 0;
+			disk_lock = (ocfs2_disk_lock *)cfg_buf;
+			disk_lock->dl_master = OCFS_INVALID_NODE_NUM;
+			disk_lock->dl_level = 0;
 
 			if (op == OCFS_VOLCFG_ADD)
 				status = ocfs_add_to_disk_config (osb, node_num,
@@ -429,12 +440,12 @@
 			}
 			break;
 		} else {
-			disk_lock = (ocfs_disk_lock *) lock_buf;
-			if (DISK_LOCK_CURRENT_MASTER (disk_lock) == lock_node_num)
+			disk_lock = (ocfs2_disk_lock *)lock_buf;
+			if (disk_lock->dl_master == lock_node_num)
 				break_lock = 1;
 			else {
 				LOG_TRACE_ARGS ("Node config locked by node: %d\n",
-					DISK_LOCK_CURRENT_MASTER (disk_lock));
+					disk_lock->dl_master);
 				ocfs_sleep (OCFS_VOLCFG_LOCK_TIME);
 			}
 			OCFS_BH_PUT_DATA(bh);
@@ -468,7 +479,6 @@
 {
 	int status = 0;
 	__s8 *buffer = NULL;
-	__u32 sect_size = osb->sect_size;
 	struct buffer_head *bh = NULL;
 	struct super_block *sb;
 	__u64 blocknum;
@@ -477,14 +487,8 @@
 
 	sb = osb->sb;
 
-	if (sect_size != sb->s_blocksize) {
-		LOG_ERROR_STR("sector_size != sb->s_blocksize!");
-		status = -999;
-		goto finally;
-	}
-
 	blocknum = lock_off >> sb->s_blocksize_bits;
-	bh = getblk(OCFS_GET_BLOCKDEV(sb), blocknum, sb->s_blocksize);
+	bh = sb_getblk(sb, blocknum);
 	if (bh == NULL) {
 		LOG_ERROR_STATUS (status = -EIO);
 		goto finally;
@@ -493,7 +497,7 @@
 	buffer = OCFS_BH_GET_DATA_WRITE(bh);
 
 	/* reset lock... */
-	memset (buffer, 0, sect_size);
+	memset (buffer, 0, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(bh);
 
 	/* Release the lock */
@@ -512,7 +516,7 @@
 
 	buffer = OCFS_BH_GET_DATA_WRITE(bh);
 	/* reset lock... */
-	memset (buffer, 0, sect_size);
+	memset (buffer, 0, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(bh);
 
 	/* Release the lock */
@@ -539,16 +543,15 @@
 	ocfs_node_config_info *disk;
 	void *buffer;
 	__u64 offset;
-	__u32 sect_size = osb->sect_size;
 
 	LOG_ENTRY ();
 
-	buffer = ocfs_malloc (sect_size);
+	buffer = ocfs_malloc (osb->sb->s_blocksize);
 	if (buffer == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto bail;
 	}
-	memset (buffer, 0, sect_size);
+	memset (buffer, 0, osb->sb->s_blocksize);
 
 	disk = buffer;
 
@@ -556,7 +559,7 @@
 	ocfs_volcfg_gblctxt_to_disknode (disk);
 
 	/* Write this nodes config onto disk */
-	offset = osb->vol_layout.new_cfg_off;
+	offset = (osb->new_autoconfig_blkno << osb->sb->s_blocksize_bits);
 	status = ocfs_config_with_disk_lock (osb, offset, (__u8 *) disk,
 					     OcfsGlobalCtxt.pref_node_num,
 					     OCFS_VOLCFG_ADD);
@@ -600,14 +603,12 @@
 
 	strncpy ((*node)->node_name, disk->node_name, MAX_NODE_NAME_LENGTH);
 
-	memcpy((*node)->guid.guid, disk->guid.guid, GUID_LEN);
+	memcpy((*node)->guid.guid, disk->guid.guid, OCFS2_GUID_LEN);
 
-	(*node)->ipc_config.type = disk->ipc_config.type;
 	(*node)->ipc_config.ip_port = disk->ipc_config.ip_port;
-	strncpy((*node)->ipc_config.ip_addr, disk->ipc_config.ip_addr,
-		MAX_IP_ADDR_LEN);
-	strncpy((*node)->ipc_config.ip_mask, disk->ipc_config.ip_mask,
-		MAX_IP_ADDR_LEN);
+	(*node)->ipc_config.addr_u.ip_addr4 =
+		disk->ipc_config.addr_u.ip_addr4;
+	(*node)->ipc_config.ip_version = disk->ipc_config.ip_version;
 
 bail:
 	LOG_EXIT_STATUS (status);
@@ -621,7 +622,6 @@
 static int ocfs_update_disk_config (ocfs_super * osb, __u32 node_num, ocfs_node_config_info * disk)
 {
 	int status = 0;
-	__u64 offset;
 	__u32 blocknum;
 	struct buffer_head *bh = NULL;
 	struct super_block *sb = NULL;
@@ -631,11 +631,9 @@
 
 	sb = osb->sb;
 	/* Write the node details */
-	offset = osb->vol_layout.node_cfg_off +
-		((node_num + OCFS_VOLCFG_HDR_SECTORS) * osb->sect_size);
-	blocknum = offset >> sb->s_blocksize_bits;
+	blocknum = osb->autoconfig_blkno + OCFS_VOLCFG_HDR_SECTORS + node_num;
 
-	bh = getblk(OCFS_GET_BLOCKDEV(sb), blocknum, sb->s_blocksize);
+	bh = sb_getblk(sb, blocknum);
 	if (bh == NULL) {
 		status = -EIO;
 		LOG_ERROR_STATUS(status);
@@ -643,7 +641,7 @@
 	}
 
 	buf = OCFS_BH_GET_DATA_WRITE(bh);
-	memcpy(buf, disk, osb->sect_size);
+	memcpy(buf, disk, osb->sb->s_blocksize);
 	OCFS_BH_PUT_DATA(bh);
 
 	status = ocfs_write_bh(osb, bh, 0, NULL);
@@ -684,14 +682,12 @@
 		strncpy (disk->node_name, OcfsGlobalCtxt.node_name,
 			 MAX_NODE_NAME_LENGTH);
 
-	memcpy(disk->guid.guid, OcfsGlobalCtxt.guid.guid, GUID_LEN);
+	memcpy(disk->guid.guid, OcfsGlobalCtxt.guid.guid,
+	       OCFS2_GUID_LEN);
 
-	ipc->type = g_ipc->type;
 	ipc->ip_port = g_ipc->ip_port;
-	if (g_ipc->ip_addr)
-		strncpy (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN);
-	if (g_ipc->ip_mask)
-		strncpy (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN);
+	ipc->ip_version = g_ipc->ip_version;
+	ipc->addr_u.ip_addr4 = g_ipc->addr_u.ip_addr4;
 
 	LOG_EXIT ();
 	return ;
@@ -715,14 +711,12 @@
 		strncpy (node->node_name, OcfsGlobalCtxt.node_name,
 			 MAX_NODE_NAME_LENGTH);
 
-	memcpy(node->guid.guid, OcfsGlobalCtxt.guid.guid, GUID_LEN);
+	memcpy(node->guid.guid, OcfsGlobalCtxt.guid.guid,
+	       OCFS2_GUID_LEN);
 
-	ipc->type = g_ipc->type;
 	ipc->ip_port = g_ipc->ip_port;
-	if (g_ipc->ip_addr)
-		strncpy (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN);
-	if (g_ipc->ip_mask)
-		strncpy (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN);
+	ipc->ip_version = g_ipc->ip_version;
+	ipc->addr_u.ip_addr4 = g_ipc->addr_u.ip_addr4;
 
 	LOG_EXIT ();
 	return ;
@@ -746,8 +740,8 @@
 	LOG_ENTRY ();
 
 	/* Read in the config on the disk */
-	offset = osb->vol_layout.node_cfg_off;
-	numblocks = osb->vol_layout.node_cfg_size >> sb->s_blocksize_bits;
+	offset = osb->autoconfig_blkno << sb->s_blocksize_bits;
+	numblocks = osb->autoconfig_blocks;
 
 	cfg_bhs = ocfs_malloc(numblocks * sizeof(*cfg_bhs));
 	if (cfg_bhs == NULL) {
@@ -757,7 +751,8 @@
 	}
 	memset(cfg_bhs, 0, numblocks * sizeof(*cfg_bhs));
 
-	status = ocfs_read_bhs(osb, offset, osb->vol_layout.node_cfg_size, cfg_bhs, 0, NULL);
+	status = ocfs_read_bhs(osb, offset, numblocks << sb->s_blocksize_bits,
+			       cfg_bhs, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
@@ -766,18 +761,18 @@
 	/* 1st block in buffer is the NodeCfgHdr */
 	hdr = OCFS_BH_GET_DATA_READ(cfg_bhs[0]);
 
-	if (strncmp (hdr->signature, NODE_CONFIG_HDR_SIGN,
-		     NODE_CONFIG_SIGN_LEN)) {
+	if (strncmp (hdr->signature, OCFS2_NODE_CONFIG_HDR_SIGN,
+		     OCFS2_NODE_CONFIG_SIGN_LEN)) {
 		LOG_ERROR_STR ("Invalid node config signature");
 		status = -EINVAL;
 		goto finally;
 	}
 
-	if  (hdr->version < NODE_MIN_SUPPORTED_VER ||
-	     hdr->version > NODE_CONFIG_VER) {
+	if  (hdr->version < OCFS2_NODE_MIN_SUPPORTED_VER ||
+	     hdr->version > OCFS2_NODE_CONFIG_VER) {
 		LOG_ERROR_ARGS ("Node config version mismatch, (%d) < minimum" \
 				" (%d) or > current (%d)", hdr->version,
-				NODE_MIN_SUPPORTED_VER, NODE_CONFIG_VER);
+				OCFS2_NODE_MIN_SUPPORTED_VER, OCFS2_NODE_CONFIG_VER);
 		status = -EINVAL;
 		goto finally;
 	}
@@ -791,7 +786,7 @@
 
 	/* Read the nodecfg for all possible nodes as there may be holes */
 	/* i.e., node numbers need not be dolled out in sequence */
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {//, p += sect_size) {
+	for (i = 0; i < osb->max_nodes; i++) {
 		int which;
 		which = i + OCFS_VOLCFG_HDR_SECTORS;
 		disk = OCFS_BH_GET_DATA_READ(cfg_bhs[which]);
@@ -821,14 +816,15 @@
 		 * the volume with the reclaimid option. Else, error.
 		 */
 		if (!memcmp(&OcfsGlobalCtxt.guid.guid, disk->guid.guid,
-			    GUID_LEN)) {
+			    OCFS2_GUID_LEN)) {
 			osb->node_num = i;
 			goto loop;
 		}
 
 		/* If the hostid does not match, goto next... */
 		if (memcmp(&OcfsGlobalCtxt.guid.id.host_id,
-			   disk->guid.id.host_id, HOSTID_LEN))
+			   disk->guid.id.host_id,
+			   OCFS2_GUID_HOSTID_LEN))
 			goto loop;
 
 		/* ...else allow node to reclaim the number if reclaimid set */
@@ -941,20 +937,15 @@
 		     MAX_NODE_NAME_LENGTH))
 		chg = 1;
 
-	if (!chg && ipc->type != g_ipc->type)
+	if (!chg && (ipc->ip_version != g_ipc->ip_version))
 		chg = 1;
 
-	if (!chg && ipc->ip_port != g_ipc->ip_port)
+	if (!chg && (ipc->ip_port != g_ipc->ip_port))
 		chg = 1;
 
-	if (!chg && g_ipc->ip_addr &&
-	    strncmp (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN))
+	if (!chg && (ipc->addr_u.ip_addr4 != g_ipc->addr_u.ip_addr4))
 		chg = 1;
 
-	if (!chg && g_ipc->ip_mask &&
-	    strncmp (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN))
-		chg = 1;
-
 	LOG_EXIT_INT (chg);
 	return chg;
 }				/* ocfs_has_node_config_changed */
@@ -973,13 +964,13 @@
 
 	LOG_ENTRY ();
 
-	buffer = ocfs_malloc (osb->sect_size);
+	buffer = ocfs_malloc (osb->sb->s_blocksize);
 	if (buffer == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto bail;
 	}
 
-	memset (buffer, 0, osb->sect_size);
+	memset (buffer, 0, osb->sb->s_blocksize);
 	disk = (ocfs_node_config_info *) buffer;
 
 	/* populate the nodecfg info in disk from global context */
@@ -990,7 +981,7 @@
 	ocfs_volcfg_gblctxt_to_node (node);
 
 	/* Update the nodecfg on disk with the new info */
-	offset = osb->vol_layout.new_cfg_off;
+	offset = (osb->new_autoconfig_blkno << osb->sb->s_blocksize_bits);
 	status = ocfs_config_with_disk_lock (osb, offset, (__u8 *) disk,
 					     osb->node_num, OCFS_VOLCFG_UPD);
 	if (status < 0) {
@@ -1014,15 +1005,17 @@
 	BARF_BARF_BARF *node;
 	__u32 i;
 
-	for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+	for (i = 0; i < osb->max_nodes; i++) {
 		node = osb->node_cfg_info[i];
 
 		if (!node || node->node_name[0] == '\0')
 			continue;
 
-		LOG_TRACE_ARGS ("Node (%u) is (%s)\n", i, node->node_name);
-		LOG_TRACE_ARGS ("ip=%s, port=%d\n", node->ipc_config.ip_addr,
-				node->ipc_config.ip_port);
+		LOG_TRACE_ARGS ("Node (%u) is (%s)\n", i,
+				node->node_name);
+		LOG_TRACE_ARGS ("ip=0x%08u, port=%d\n",
+				ntohl(node->ipc_config.addr_u.ip_addr4),
+				ntohs(node->ipc_config.ip_port));
 	}
 
 	return;

Added: trunk/src/volcfg.h
===================================================================
--- trunk/src/volcfg.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/volcfg.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,35 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * volcfg.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_VOLCFG_H
+#define OCFS2_VOLCFG_H
+
+int ocfs_chk_update_config(ocfs_super *osb);
+int ocfs_get_config(ocfs_super *osb);
+
+#endif /* OCFS2_VOLCFG_H */

Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/vote.c	2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
  * vote.c
  *
  * netdlm listener, receive, verify and send messages
@@ -24,11 +26,25 @@
  *	    Manish Singh
  */
 
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/inet.h>
+#include <linux/net.h>
+#include <linux/in.h>
 
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <asm/uaccess.h>
 
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "dlm.h"
+#include "nm.h"
+#include "util.h"
+#include "vote.h"
+
 /* Tracing */
 #define  OCFS_DEBUG_CONTEXT  OCFS_DEBUG_CONTEXT_VOTE
 
@@ -38,13 +54,14 @@
 
 static const char vote_state_str[] = { 'U', 'S', 'P', 'F', 'D' };
 
-static ocfs_vote_obj * ocfs_alloc_vote_obj (int bytes, __u32 reqlock, __u64 votemap);
+static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map * votemap);
 static void ocfs_dlm_recv_msg (void *val);
 static int ocfs_check_ipc_msg (__u8 * msg, __u32 msg_len);
 static int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg);
 static int ocfs_comm_process_msg (__u8 * msg);
 static void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 msg_len, __u32 type);
 
+static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg);
 
 static spinlock_t vote_obj_lock = SPIN_LOCK_UNLOCKED;
 
@@ -134,42 +151,57 @@
 	return status;
 }				/* ocfs_recv_udp_msg */
 
+
+static inline int ocfs2_comm_ip_version_to_family(u16 ip_version)
+{
+	switch (ntohs(ip_version)) {
+		case 4:
+			return PF_INET;
+		case 6:
+			return PF_INET6;
+		default:
+			BUG();
+	}
+
+	return 4;
+}
+
 /*
  * ocfs_send_bcast()
  *
  */
-static int ocfs_send_bcast (ocfs_super * osb, __u64 votemap, ocfs_dlm_msg * dlm_msg)
+static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg)
 {
 	int status = 0, error;
-	__u32 map, num;
+	__u32 num;
 	BARF_BARF_BARF *node;
 	struct sockaddr_in sin;
 	mm_segment_t oldfs;
 
-	LOG_ENTRY_ARGS ("(votemap=0x%x)\n",
-			(__u32)(votemap & 0xFFFFFFFFULL));
+	LOG_ENTRY ();
 
 	oldfs = get_fs ();
-	for (map = (__u32)(votemap & 0xFFFFFFFFULL), num = 0; map != 0; map >>= 1, num++) {
+	for (num=0; num<osb->max_nodes; num++) {
 		if (num == osb->node_num)
 			continue;
 
-		if (!(map & 0x1))
+		if (!ocfs_node_map_test_bit(votemap, num))
 			continue;
 
 		node = osb->node_cfg_info[num];
 		if (!node)
 			continue;
 
-		LOG_TRACE_ARGS ("Sending msg to node=%u, name=%s\n",
-				num, node->node_name);
+		LOG_TRACE_ARGS("Sending msg to node=%u, name=%s\n",
+       			       num, node->node_name);
 		memset (&sin, 0, sizeof (sin));
-		sin.sin_family = AF_INET;
-		sin.sin_addr.s_addr = in_aton(node->ipc_config.ip_addr);
-		sin.sin_port = htons(node->ipc_config.ip_port);
+		sin.sin_family = ocfs2_comm_ip_version_to_family(node->ipc_config.ip_version);
+		sin.sin_addr.s_addr = node->ipc_config.addr_u.ip_addr4;
+		sin.sin_port = node->ipc_config.ip_port;
 		
-		LOG_TRACE_ARGS ("about to send to %s:%u\n", node->ipc_config.ip_addr,
-				node->ipc_config.ip_port);
+		LOG_TRACE_ARGS("about to send to 0x%08u:%u\n",
+			       ntohl(node->ipc_config.addr_u.ip_addr4),
+			       ntohs(node->ipc_config.ip_port));
 		
 		status = -EFAIL;
 		if (OcfsIpcCtxt.send_sock) {
@@ -210,7 +242,8 @@
  * ocfs_init_udp_sock()
  *
  */
-int ocfs_init_udp_sock (struct socket **send_sock, struct socket **recv_sock)
+int ocfs_init_udp_sock(struct socket **send_sock,
+		       struct socket **recv_sock)
 {
 	struct sockaddr_in sin;
 	int status = -EFAIL;
@@ -218,43 +251,50 @@
 
 	LOG_ENTRY ();
 
+	comm = &(OcfsGlobalCtxt.comm_info);
+
 	/* Create Send Socket */
-	status = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, send_sock);
+	status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
+			     SOCK_DGRAM, IPPROTO_UDP,
+			     send_sock);
 	if (status < 0) {
 		LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
 		goto bail;
 	}
 
 	/* Bind Send Socket */
-	memset (&sin, 0, sizeof (sin));
-	sin.sin_family = AF_INET;
+	memset(&sin, 0, sizeof (sin));
+	sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
 	sin.sin_addr.s_addr = htonl (INADDR_ANY);
-	sin.sin_port = htons (0);
+	sin.sin_port = htons(0);
 
-	status = (*send_sock)->ops->bind (*send_sock, (struct sockaddr *) &sin,
-					 sizeof (sin));
+	status = (*send_sock)->ops->bind(*send_sock,
+					 (struct sockaddr *)&sin,
+					 sizeof(sin));
 	if (status < 0) {
 		LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
 		goto bail;
 	}
 
 	/* Create Receive Socket */
-	status = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, recv_sock);
+	status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
+			     SOCK_DGRAM, IPPROTO_UDP,
+			     recv_sock);
 	if (status < 0) {
 		LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
 		goto bail;
 	}
 
-	comm = &(OcfsGlobalCtxt.comm_info);
 
 	/* Bind Receive Socket */
-	memset (&sin, 0, sizeof (sin));
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl (INADDR_ANY);
-	sin.sin_port = htons (comm->ip_port);
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
+	sin.sin_addr.s_addr = htonl(INADDR_ANY);
+	sin.sin_port = comm->ip_port;
 
-	status = (*recv_sock)->ops->bind (*recv_sock, (struct sockaddr *) &sin,
-					 sizeof (sin));
+	status = (*recv_sock)->ops->bind(*recv_sock,
+					 (struct sockaddr *)&sin,
+					 sizeof(sin));
 	if (status < 0) {
 		LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
 		goto bail;
@@ -276,19 +316,21 @@
 	ocfs_dlm_reply_master *reply_master;
 	ocfs_dlm_msg *send_dlm_msg;
 	ocfs_vote_obj *obj;
-	__u64 vote_map;
 	int status = 0;
 	__u8 *buf = NULL;
 	__u32 msg_len, obj_len;
+	ocfs_node_map vote_map;
 
 	LOG_ENTRY ();
 
+	ocfs_node_map_init(osb, &vote_map);
+
 	req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf;
 
 	msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_reply_master);
 	obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_reply_master);
 
-	obj = ocfs_alloc_vote_obj (obj_len, 0, 0ULL);
+	obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
 	if (obj == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto finally;
@@ -304,14 +346,15 @@
 	reply_master->h.lock_seq_num = req_master->lock_seq_num;
 	reply_master->h.open_handle = inode_open;
 	reply_master->h.flags = req_master->flags;
-	vote_map = (1 << dlm_msg->src_node);
-	obj->req_vote_map = vote_map;
 
+	ocfs_node_map_set_bit(&vote_map, dlm_msg->src_node);
+	ocfs_node_map_set(&obj->req_vote_map, &vote_map);
+
 	spin_lock(&osb->vote_obj_queue_lock);
 	list_add_tail(&obj->list, &osb->vote_obj_queue);
 	spin_unlock(&osb->vote_obj_queue_lock);
 
-	ocfs_send_bcast (osb, vote_map, send_dlm_msg);
+	ocfs_send_bcast (osb, &vote_map, send_dlm_msg);
 	spin_lock (&obj->lock);
 	obj->vote_state = VOTE_OBJ_STATE_SENT;
 	spin_unlock (&obj->lock);
@@ -363,12 +406,6 @@
 		goto bail;
 	}
 
-	if ((dlm_msg->src_node < 0) ||
-	    (dlm_msg->src_node > OCFS_MAXIMUM_NODES)) {
-		LOG_TRACE_ARGS ("Invalid source node in netdlm message: %d\n",
-				dlm_msg->src_node);
-		goto bail;
-	}
 
 	ret = 1;
 
@@ -388,6 +425,7 @@
 	int max = data->u.proc.max - *len;
 	char *p = data->u.proc.page + *len;
 	int ret = 0;
+	char *reqstr=NULL, *gotstr=NULL, *openstr=NULL;
 
 	/* just run thru everything to populate /proc */
 	/* return -ENOENT to keep going */
@@ -396,23 +434,32 @@
 	switch (dlm_msg->msg_type) {
 		case OCFS_VOTE_REQUEST:
 			request = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
-			ret = snprintf(p, max, "REQST: %d %c %3d %08x %21llu %21llu %08x | %08x %08x\n",
+
+			if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
+				break;
+			if (ocfs_node_map_stringify(&obj->got_vote_map, &gotstr) < 0)
+				break;
+			if (ocfs_node_map_stringify(&obj->tmp_openmap, &openstr) < 0)
+				break;
+			ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s %s\n",
 				obj->pid,
 				vote_state_str[obj->vote_state],
 				obj->vote_status,
-				(__u32)(obj->req_vote_map & 0xFFFFFFFFULL),
+				reqstr,
 				request->lock_id,
-				request->lock_seq_num, request->flags, 
-				(__u32)(obj->got_vote_map & 0xFFFFFFFFULL),
-				(__u32)(obj->tmp_openmap & 0xFFFFFFFFULL));
+				request->lock_seq_num, 
+				request->flags, 
+				gotstr, openstr);
 			break;
 		case OCFS_VOTE_REPLY:
 			reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
-			ret = snprintf(p, max, "REPLY: %d %c %3d %08x %21llu %21llu %08x | %3d %c\n",
+			if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
+				break;
+			ret = snprintf(p, max, "REPLY: %d %c %3d %s %21llu %21llu %08x | %3d %c\n",
 				obj->pid,
 				vote_state_str[obj->vote_state],
 				obj->vote_status,
-				(__u32)(obj->req_vote_map & 0xFFFFFFFFULL),
+				reqstr,
 				reply->h.lock_id,
 				reply->h.lock_seq_num,
 				reply->h.flags, 
@@ -429,6 +476,13 @@
 	}
 	(*len) += ret;
 	p[max-1] = '\0';
+
+	if (reqstr)
+		kfree(reqstr);
+	if (gotstr)
+		kfree(gotstr);
+	if (openstr)
+		kfree(openstr);
 	return status;
 }
 
@@ -568,7 +622,7 @@
 
 	ocfs_process_one_vote_reply(osb, &ctxt, dlm_msg->src_node);
 
-	if (obj->got_vote_map == obj->req_vote_map)
+	if (ocfs_node_map_is_equal(&obj->got_vote_map, &obj->req_vote_map))
 		obj->vote_state = VOTE_OBJ_STATE_FULL_REPLY;
 	else 
 		obj->vote_state = VOTE_OBJ_STATE_PARTIAL_REPLY;
@@ -623,7 +677,6 @@
 	int status = 0;
 	ocfs_super *osb = NULL;
 	ocfs_dlm_msg *dlm_msg;
-	__u64 nodemap;
 	ocfs_dlm_req_master *req_master;
 	struct list_head *iter_osb, *temp_iter;
 	ocfs_vote_request_ctxt ctxt;
@@ -636,7 +689,7 @@
 	down (&(OcfsGlobalCtxt.global_res));
 	list_for_each_safe (iter_osb, temp_iter, &(OcfsGlobalCtxt.osb_next)) {
 		osb = list_entry (iter_osb, ocfs_super, osb_next);
-		if (!memcmp (osb->vol_layout.vol_id, dlm_msg->vol_id,
+		if (!memcmp (osb->uuid, dlm_msg->vol_id,
 			     MAX_VOL_ID_LENGTH))
 			break;
 		osb = NULL;
@@ -644,12 +697,18 @@
 	up (&(OcfsGlobalCtxt.global_res));
 	
 	if (osb == NULL) {
-		LOG_TRACE_STR("Ignoring netdlm message with invalid volume id");
+		LOG_ERROR_STR("Ignoring netdlm message with invalid volume id");
 		goto bail;
 	}
 
-	nodemap = (1 << dlm_msg->src_node);
-	if (!(osb->publ_map & nodemap)) {
+	if ((dlm_msg->src_node < 0) ||
+	    (dlm_msg->src_node > osb->max_nodes)) {
+		LOG_ERROR_ARGS ("Invalid source node in netdlm message: %d\n",
+				dlm_msg->src_node);
+		goto bail;
+	}
+
+	if (!ocfs_node_map_test_bit(&osb->publ_map, dlm_msg->src_node)) {
 		LOG_TRACE_STR("Ignoring netdlm message from dead node");
 		goto bail;
 	}
@@ -674,7 +733,7 @@
 			"from %s (node %d)\n", MAJOR(osb->sb->s_dev),
 			MINOR(osb->sb->s_dev), osb->node_cfg_info[src_node]->node_name,
 			src_node);
-		atomic_set (&(osb->vol_node_map.dismount[src_node]), 1);
+		atomic_set (&(osb->vol_node_map[src_node].dismount), 1);
 		break;
 
 	default:
@@ -687,25 +746,29 @@
 }				/* ocfs_comm_process_msg */
 
 
+
 /*
  * ocfs_send_dismount_msg()
  *
  */
-int ocfs_send_dismount_msg (ocfs_super * osb, __u64 vote_map)
+int ocfs_send_dismount_msg (ocfs_super * osb)
 {
 	int status = 0;
 	ocfs_dlm_msg *dlm_msg = NULL;
 	ocfs_dlm_msg_hdr *req;
 	ocfs_vote_obj *obj;
 	__u32 msg_len, obj_len;
+	ocfs_node_map map;
 
-	LOG_ENTRY_ARGS ("(osb=0x%p, vm=0x%08x)\n", osb,
-			(__u32)(vote_map & 0xFFFFFFFFULL));
+	LOG_ENTRY_ARGS ("(osb=0x%p)\n", osb);
 
+	ocfs_node_map_dup(osb, &map, &osb->publ_map);
+	ocfs_node_map_clear_bit(&map, osb->node_num);
+
 	msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
 	obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
 
-	obj = ocfs_alloc_vote_obj (obj_len, 0, 0ULL);
+	obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
 	if (obj == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto finally;
@@ -722,7 +785,7 @@
 	list_add_tail(&obj->list, &osb->vote_obj_queue);
 	spin_unlock(&osb->vote_obj_queue_lock);
 
-	ocfs_send_bcast (osb, vote_map, dlm_msg);
+	ocfs_send_bcast (osb, &map, dlm_msg);
 	spin_lock (&obj->lock);
 	obj->vote_state = VOTE_OBJ_STATE_SENT;
 	spin_unlock (&obj->lock);
@@ -754,17 +817,17 @@
 	dlm_msg->msg_len = msg_len;
 	dlm_msg->src_node = osb->node_num;
 	dlm_msg->msg_type = type;
-	memcpy (dlm_msg->vol_id, osb->vol_layout.vol_id, MAX_VOL_ID_LENGTH);
+	memcpy (dlm_msg->vol_id, osb->uuid, MAX_VOL_ID_LENGTH);
 
 	LOG_EXIT ();
 	return;
 }				/* ocfs_init_dlm_msg */
 
 
-static ocfs_vote_obj * ocfs_alloc_vote_obj (int bytes, __u32 reqlock, __u64 votemap)
+static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map *votemap)
 {
 	ocfs_vote_obj *obj = NULL;
-	
+
 	obj = ocfs_malloc (bytes);
 	if (obj == NULL)
 		return NULL;
@@ -777,13 +840,17 @@
 	init_waitqueue_head (&obj->voted_event);
 	INIT_LIST_HEAD (&obj->list);
 
-	obj->req_vote_map = votemap;
-	obj->got_vote_map = 0ULL;
-	obj->tmp_openmap = 0ULL;
+	if (votemap)
+		ocfs_node_map_dup(osb, &obj->req_vote_map, votemap);
+	else
+		ocfs_node_map_init(osb, &obj->req_vote_map);
+	ocfs_node_map_init(osb, &obj->got_vote_map);
+	ocfs_node_map_init(osb, &obj->tmp_openmap);
+
 	obj->seq_num = 0ULL;
 	obj->req_lock_type = reqlock;
 	obj->vote_status = 0;
-	obj->pid = ocfs_getpid();
+	obj->pid = current->pid;
 	
 	return obj;
 }
@@ -794,7 +861,7 @@
  * ocfs_send_dlm_request_msg()
  * inode is definitely non NULL
  */
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * vote_map, __u64 *openmap, struct inode *inode, int *vote_status)
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, int *vote_status)
 {
 	int status = 0;
 	ocfs_dlm_msg *dlm_msg = NULL;
@@ -802,13 +869,13 @@
 	ocfs_vote_obj *obj;
 	__u32 msg_len, obj_len;
 
-	LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, vm=0x%08x)\n",
-			osb, lock_id, lock_type, flags, (__u32)(*vote_map & 0xFFFFFFFFULL));
+	LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u)\n",
+			osb, lock_id, lock_type, flags);
 
 	msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
 	obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
 
-	obj = ocfs_alloc_vote_obj (obj_len, lock_type, *vote_map);
+	obj = ocfs_alloc_vote_obj (osb, obj_len, lock_type, votemap);
 	if (obj == NULL) {
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto finally;
@@ -834,7 +901,7 @@
 	list_add_tail(&obj->list, &osb->vote_obj_queue);
 	spin_unlock(&osb->vote_obj_queue_lock);
 
-	ocfs_send_bcast (osb, *vote_map, dlm_msg);
+	ocfs_send_bcast (osb, votemap, dlm_msg);
 	spin_lock (&obj->lock);
 	obj->vote_state = VOTE_OBJ_STATE_SENT;
 	spin_unlock (&obj->lock);
@@ -847,19 +914,15 @@
 		// extend and update need to update the openmap
  	    	if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE)) 
 			if (openmap)
-				*openmap = obj->tmp_openmap;
-		LOG_TRACE_ARGS ("OK vote, lockid=%llu, map: 0x%08x\n",
-			lock_id, (__u32)(obj->got_vote_map & 0xFFFFFFFFULL));
+				ocfs_node_map_set(openmap, &obj->tmp_openmap);
+		LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
 	} else {
-		LOG_ERROR_ARGS("vote_status=%d, vote_state=%d, lockid=%llu, map=0x%08x, got=0x%08x\n",
-			       obj->vote_status, obj->vote_state,
-			       lock_id,
-			       (__u32)(obj->req_vote_map & 0xFFFFFFFFULL),
-			       (__u32)(obj->got_vote_map & 0xFFFFFFFFULL));
+		LOG_ERROR_ARGS("vote_status=%d, vote_state=%d, lockid=%llu\n",
+			       obj->vote_status, obj->vote_state, lock_id);
 	}
 	*vote_status = obj->vote_status;
 	obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
-	*vote_map = (*vote_map) & ~(obj->got_vote_map);
+	ocfs_node_map_clear_bits(votemap, &obj->got_vote_map);
 	spin_unlock (&obj->lock);
 
 	spin_lock(&osb->vote_obj_queue_lock);
@@ -884,10 +947,14 @@
 	int status;
 	int reply_status;
 	int open_handle = 0;
-	__u64 mask = 0;
 
 	if (ctxt->reply_method == DISK_VOTE) {
-		reply_status = ctxt->u.vote->vote[osb->node_num];
+		if (ctxt->u.vote->node != osb->node_num) {
+			LOG_ERROR_ARGS("vote->node(%hu) != this(%u)\n",
+				       ctxt->u.vote->node, osb->node_num);
+			reply_status = 0;
+		} else
+			reply_status = ctxt->u.vote->type;
 		open_handle = ctxt->u.vote->open_handle;
 	} else {
 		reply_status = ctxt->u.reply->status;
@@ -895,24 +962,23 @@
 	}
 
 	status = 0;
-	mask = 1 << node_num;
 
 	switch (reply_status) {
 		case FLAG_VOTE_NODE:
-			*(ctxt->got_vote_map) |= mask;
+			ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
 			if (ctxt->flags & (FLAG_FILE_EXTEND|FLAG_FILE_UPDATE) && 
 			    open_handle && ctxt->open_map)
-				*(ctxt->open_map) |= mask;
+				ocfs_node_map_set_bit(ctxt->open_map, node_num);
 			break;
 		case FLAG_VOTE_OIN_ALREADY_INUSE:
-			*(ctxt->got_vote_map) |= mask;
+			ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
 			status = -EFAIL;
 			if (ctxt->flags & FLAG_FILE_DELETE)
 				status = -EBUSY;
 			break;
 		case FLAG_VOTE_OIN_UPDATED:
 			status = 0;
-			*(ctxt->got_vote_map) |= mask;
+			ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
 			break;
 		case FLAG_VOTE_UPDATE_RETRY:
 			status = -EAGAIN;
@@ -923,3 +989,238 @@
 	}
 	*(ctxt->status) = status;
 }
+
+/* special case -1 for now
+ * TODO: should *really* make sure the calling func never passes -1!!  */
+void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map)
+{
+	map->num_nodes = osb->max_nodes;
+	memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) * 
+	       sizeof(unsigned long));
+}
+
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit)
+{
+	if (bit==-1)
+		return;
+	OCFS_ASSERT(bit < map->num_nodes);
+	set_bit(bit, map->map);
+}
+
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit)
+{
+	if (bit==-1)
+		return;
+	OCFS_ASSERT(bit < map->num_nodes);
+	clear_bit(bit, map->map);
+}
+
+// clear all the bits in "target" which are set in "mask"
+void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask)
+{
+	int bit, prev=0;
+	while (1) {
+		bit = find_next_bit (mask->map, mask->num_nodes, prev);
+		if (bit > mask->num_nodes)
+			break;
+		ocfs_node_map_clear_bit(target, bit);
+		prev = bit+1;
+	}
+}
+
+// set all the bits in "target" which are set in "mask"
+void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask)
+{
+	int bit, prev=0;
+	while (1) {
+		bit = find_next_bit (mask->map, mask->num_nodes, prev);
+		if (bit > mask->num_nodes)
+			break;
+		ocfs_node_map_set_bit(target, bit);
+		prev = bit+1;
+	}
+}
+
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit)
+{
+	if (bit >= map->num_nodes) {
+		LOG_ERROR_ARGS("bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
+		BUG();
+	}
+	return test_bit(bit, map->map);
+}
+
+int ocfs_node_map_stringify(ocfs_node_map *map, char **str)
+{
+	int i, n;
+	char *s;
+
+	OCFS_ASSERT(map->num_nodes > 0);
+
+	*str = kmalloc( strlen("123 ") * map->num_nodes, GFP_KERNEL);
+	if (!(*str))
+		return -ENOMEM;
+
+	s = *str;	
+	for (i=0; i<map->num_nodes; i++) {
+		if (ocfs_node_map_test_bit(map, i)) {
+			n = sprintf(s, "%3d ", i);
+			if (n != strlen("123 ")) {
+				kfree(*str);
+				return -ENOMEM;
+			}
+			s += n;
+		}
+	}
+	return 0;
+}
+
+int ocfs_node_map_is_empty(ocfs_node_map *map)
+{
+	int bit;
+	OCFS_ASSERT(map->num_nodes > 0);
+       	bit = find_first_bit(map->map, map->num_nodes);
+	if (bit < map->num_nodes)
+		return 0;
+	return 1;
+}
+
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2)
+{
+	int num_longs, i; 
+
+	OCFS_ASSERT(map1->num_nodes == map2->num_nodes);
+	OCFS_ASSERT(map1->num_nodes > 0);
+	
+	num_longs = BITS_TO_LONGS(map1->num_nodes);
+	for (i=0; i<num_longs; i++) {
+		if (map1->map[i] != map2->map[i])
+			return 0;
+	}
+	return 1;
+}
+
+
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask)
+{
+	int num_longs, i; 
+
+	OCFS_ASSERT(target->num_nodes == mask->num_nodes);
+	OCFS_ASSERT(target->num_nodes > 0);
+	
+	num_longs = BITS_TO_LONGS(target->num_nodes);
+	for (i=0; i<num_longs; i++)
+		target->map[i] &= mask->map[i];
+}
+
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from)
+{
+	int num_longs, i; 
+
+	OCFS_ASSERT(target->num_nodes == from->num_nodes);
+	OCFS_ASSERT(target->num_nodes > 0);
+
+	num_longs = BITS_TO_LONGS(target->num_nodes);
+	for (i=0; i<num_longs; i++)
+		target->map[i] = from->map[i];
+}
+
+
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from)
+{
+	OCFS_ASSERT(from->num_nodes > 0);
+	ocfs_node_map_init(osb, target);
+	ocfs_node_map_set(target, from);
+}
+
+// expects that "target" is already inited with correct 
+// num_nodes, and that "from" is of course wide enough
+/* Force le32 as the endian format for on-disk node maps */
+#define OCFS_NODE_MAP_DISK_BITS_PER_LONG 32
+#define OCFS_NODE_MAP_DISK_BITS_TO_LONGS(bits) \
+	(((bits)+OCFS_NODE_MAP_DISK_BITS_PER_LONG-1)/OCFS_NODE_MAP_DISK_BITS_PER_LONG)
+void ocfs_node_map_set_from_disk(ocfs_node_map *target, void *from)
+{
+	int i, num_u32; 
+	u32 *t = (u32 *)target->map;
+	u32 *f = from;
+
+	OCFS_ASSERT(target->num_nodes > 0);
+
+	num_u32 = OCFS_NODE_MAP_DISK_BITS_TO_LONGS(target->num_nodes);
+	for (i = 0; i < num_u32; i++)
+		t[i] = le32_to_cpu(f[i]);
+}
+
+void ocfs_node_map_set_to_disk(void *target, ocfs_node_map *from)
+{
+	int i, num_u32; 
+	u32 *t = target;
+	u32 *f = (u32 *)from->map;
+
+	OCFS_ASSERT(from->num_nodes > 0);
+
+	num_u32 = OCFS_NODE_MAP_DISK_BITS_TO_LONGS(from->num_nodes);
+	for (i=0; i<num_u32; i++)
+		t[i] = cpu_to_le32(f[i]);
+}
+
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit)
+{
+	ocfs_node_map temp;
+	int ret;
+
+	ocfs_node_map_dup(osb, &temp, target);
+	ocfs_node_map_clear_bit(&temp, bit);
+	ret = ocfs_node_map_is_empty(&temp);
+	return ret;
+}
+
+/*
+**  All structures have a type, and a size associated with it.
+**  The type serves to identify the structure. The size is used for
+**  consistency checking ...
+*/
+void ocfs_publish_map_set(ocfs_node_map *pubmap, int num)
+{
+	ocfs_node_map_set_bit(pubmap, num);
+}
+
+void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num)
+{
+	ocfs_node_map_clear_bit(pubmap, num);
+}
+
+/* update the recovery map here */
+void ocfs_recovery_map_set(ocfs_super *osb, int num)
+{
+	spin_lock(&osb->recovery_map_lock);
+	osb->vol_state = VOLUME_IN_RECOVERY;
+	ocfs_node_map_set_bit(&osb->recovery_map, num);
+	spin_unlock(&osb->recovery_map_lock);
+}
+
+void ocfs_recovery_map_clear(ocfs_super *osb, int num)
+{
+	spin_lock(&osb->recovery_map_lock);
+	ocfs_node_map_clear_bit(&osb->recovery_map, num);
+	if (ocfs_node_map_is_empty(&osb->recovery_map))
+		osb->vol_state = VOLUME_ENABLED;
+	spin_unlock(&osb->recovery_map_lock);
+}
+
+int ocfs_node_is_recovering(ocfs_super *osb, int num)
+{	
+	if (num == -1)
+		return 0;
+	return ocfs_node_map_test_bit(&osb->recovery_map, num);
+}
+
+int ocfs_node_is_alive(ocfs_node_map *pubmap, int index)
+{
+	if (index == -1)
+		return 0;
+	return ocfs_node_map_test_bit(pubmap, index);
+}	
+

Added: trunk/src/vote.h
===================================================================
--- trunk/src/vote.h	2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/vote.h	2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,83 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * vote.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_VOTE_H
+#define OCFS2_VOTE_H
+
+void ocfs_get_vote_obj(ocfs_vote_obj *obj);
+int ocfs_init_udp_sock(struct socket **send_sock,
+		       struct socket **recv_sock);
+int ocfs_lookup_obj_for_proc(ocfs_vote_obj *obj,
+			     ocfs_vote_obj_lookup_data *data);
+int ocfs_lookup_obj_by_lockid(ocfs_vote_obj *obj,
+			      ocfs_vote_obj_lookup_data *data);
+int ocfs_lookup_vote_request_obj(ocfs_super *osb,
+				 ocfs_vote_obj_lookup_data *data);
+void ocfs_process_one_vote_reply(ocfs_super *osb,
+				 ocfs_vote_reply_ctxt *ctxt,
+				 __u32 node_num);
+int ocfs_recv_udp_msg(ocfs_recv_ctxt *recv_ctxt);
+int ocfs_send_dismount_msg(ocfs_super *osb);
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, 
+			       __u32 lock_type, __u32 flags, 
+			       ocfs_node_map *votemap, ocfs_node_map *openmap, 
+			       struct inode *inode, int *vote_status);
+int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
+			 __u32 vote_status, int inode_open);
+int ocfs_lookup_vote_request_obj (ocfs_super *osb, 
+				  ocfs_vote_obj_lookup_data *data);
+
+void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit);
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit);
+// clear all the bits in "target" which are set in "mask"
+void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask);
+// set all the bits in "target" which are set in "mask"
+void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask);
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit);
+int ocfs_node_map_stringify(ocfs_node_map *map, char **str);
+int ocfs_node_map_is_empty(ocfs_node_map *map);
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2);
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask);
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from);
+// expects that "target" is already inited with correct 
+// num_nodes, and that "from" is of course wide enough
+void ocfs_node_map_set_from_disk(ocfs_node_map *target, void *from);
+void ocfs_node_map_set_to_disk(void *target, ocfs_node_map *from);
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit);
+
+int ocfs_node_is_recovering(ocfs_super *osb, int num);
+int ocfs_node_is_alive(ocfs_node_map *pubmap, int index);
+void ocfs_publish_map_set(ocfs_node_map *pubmap, int num);
+void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num);
+void ocfs_recovery_map_set(ocfs_super *osb, int num);
+void ocfs_recovery_map_clear(ocfs_super *osb, int num);
+
+#endif /* OCFS2_VOTE_H */



More information about the Ocfs2-commits mailing list