[Ocfs2-commits] rev 15 - in trunk: . src src/inc

Sat Jan 24 01:22:17 CST 2004

Author: manish
Date: 2004-01-23 19:22:15 -0600 (Fri, 23 Jan 2004)
New Revision: 15

Modified:
   trunk/TODO
   trunk/config.guess
   trunk/config.sub
   trunk/configure.in
   trunk/install-sh
   trunk/mkinstalldirs
   trunk/src/Makefile
   trunk/src/alloc.c
   trunk/src/bitmap.c
   trunk/src/dcache.c
   trunk/src/dir.c
   trunk/src/dlm.c
   trunk/src/extmap.c
   trunk/src/file.c
   trunk/src/hash.c
   trunk/src/heartbeat.c
   trunk/src/inc/journal.h
   trunk/src/inc/ocfs.h
   trunk/src/inc/proto.h
   trunk/src/inode.c
   trunk/src/ioctl.c
   trunk/src/journal.c
   trunk/src/namei.c
   trunk/src/nm.c
   trunk/src/oin.c
   trunk/src/osb.c
   trunk/src/sem.c
   trunk/src/super.c
   trunk/src/sysfile.c
   trunk/src/util.c
   trunk/src/volcfg.c
Log:
Sync


Modified: trunk/TODO
===================================================================

--- trunk/TODO	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/TODO	2004-01-24 01:22:15 UTC (rev 15)
@@ -5,15 +5,11 @@
   for the main bitmap. Data writes to the bitmap files can be
   writethrough or journalled (with delayed playback).
 
-* Make bitmap reads/writes only read/write those blocks which we care about
+* Make bitmap reads only read those blocks which we care about
 
 * Make bitmap free functions do their job without relocking the bitmaps for
   each record.
 
-* Investigate whether we should put dirty cached writes into the
-  inodes dirty_data_buffers list or not. How does this interact with the
-  journalling code?
-
 * get rid of osb->curr_trans_id as it was never used (always zero)
 
 * get rid of all the:

Modified: trunk/config.guess
===================================================================
--- trunk/config.guess	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/config.guess	2004-01-24 01:22:15 UTC (rev 15)
@@ -3,7 +3,7 @@
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
 #   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
-timestamp='2003-10-07'
+timestamp='2004-01-05'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -221,6 +221,9 @@
     mvmeppc:OpenBSD:*:*)
 	echo powerpc-unknown-openbsd${UNAME_RELEASE}
 	exit 0 ;;
+    pegasos:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
     pmax:OpenBSD:*:*)
 	echo mipsel-unknown-openbsd${UNAME_RELEASE}
 	exit 0 ;;
@@ -307,6 +310,9 @@
     *:OS/390:*:*)
 	echo i370-ibm-openedition
 	exit 0 ;;
+    *:OS400:*:*)
+        echo powerpc-ibm-os400
+	exit 0 ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
 	exit 0;;
@@ -742,6 +748,11 @@
         FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
         echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
         exit 0 ;;
+    5000:UNIX_System_V:4.*:*)
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit 0 ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
 	exit 0 ;;
@@ -986,6 +997,9 @@
     i*86:atheos:*:*)
 	echo ${UNAME_MACHINE}-unknown-atheos
 	exit 0 ;;
+	i*86:syllable:*:*)
+	echo ${UNAME_MACHINE}-pc-syllable
+	exit 0 ;;
     i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
 	echo i386-unknown-lynxos${UNAME_RELEASE}
 	exit 0 ;;
@@ -1172,7 +1186,7 @@
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit 0 ;;
-    NSR-[DGKLNPTVWY]:NONSTOP_KERNEL:*:*)
+    NSR-?:NONSTOP_KERNEL:*:*)
 	echo nsr-tandem-nsk${UNAME_RELEASE}
 	exit 0 ;;
     *:NonStop-UX:*:*)
@@ -1216,6 +1230,9 @@
     SEI:*:*:SEIUX)
         echo mips-sei-seiux${UNAME_RELEASE}
 	exit 0 ;;
+    *:DRAGONFLY:*:*)
+	echo ${UNAME_MACHINE}-unknown-dragonfly${UNAME_RELEASE}
+	exit 0 ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2

Modified: trunk/config.sub
===================================================================
--- trunk/config.sub	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/config.sub	2004-01-24 01:22:15 UTC (rev 15)
@@ -3,7 +3,7 @@
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
 #   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
-timestamp='2003-10-07'
+timestamp='2004-01-05'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -118,7 +118,8 @@
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
+  kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
@@ -379,6 +380,9 @@
 	amd64)
 		basic_machine=x86_64-pc
 		;;
+	amd64-*)
+		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	amdahl)
 		basic_machine=580-amdahl
 		os=-sysv
@@ -743,6 +747,10 @@
 		basic_machine=or32-unknown
 		os=-coff
 		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
 	OSE68000 | ose68000)
 		basic_machine=m68000-ericsson
 		os=-ose
@@ -963,6 +971,10 @@
 	tower | tower-32)
 		basic_machine=m68k-ncr
 		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
 	udi29k)
 		basic_machine=a29k-amd
 		os=-udi
@@ -1137,13 +1149,13 @@
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* \
 	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei*)
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@@ -1182,6 +1194,9 @@
 	-opened*)
 		os=-openedition
 		;;
+        -os400*)
+		os=-os400
+		;;
 	-wince*)
 		os=-wince
 		;;
@@ -1203,6 +1218,9 @@
 	-atheos*)
 		os=-atheos
 		;;
+	-syllable*)
+		os=-syllable
+		;;
 	-386bsd)
 		os=-bsd
 		;;
@@ -1225,6 +1243,9 @@
 	-sinix*)
 		os=-sysv4
 		;;
+        -tpf*)
+		os=-tpf
+		;;
 	-triton*)
 		os=-sysv3
 		;;
@@ -1473,9 +1494,15 @@
 			-mvs* | -opened*)
 				vendor=ibm
 				;;
+			-os400*)
+				vendor=ibm
+				;;
 			-ptx*)
 				vendor=sequent
 				;;
+			-tpf*)
+				vendor=ibm
+				;;
 			-vxsim* | -vxworks* | -windiss*)
 				vendor=wrs
 				;;

Modified: trunk/configure.in
===================================================================
--- trunk/configure.in	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/configure.in	2004-01-24 01:22:15 UTC (rev 15)
@@ -38,12 +38,19 @@
     ;;
 esac
 
+KERNEL_CFLAGS=
+
 case "$host_cpu" in
+  powerpc64)
+    OCFS_PROCESSOR="ppc64"
+    KERNEL_CFLAGS="-m64"
+    ;;
   ia64)
     OCFS_PROCESSOR="ia64"
     ;;
   x86_64)
     OCFS_PROCESSOR="x86_64"
+    KERNEL_CFLAGS="-m64"
     ;;
   i386|i486|i586|i686|i786|k6|k7)
     OCFS_PROCESSOR="i686"
@@ -84,7 +91,7 @@
 fi
 AC_SUBST(OCFS_AIO)
 
-AC_ARG_ENABLE(memdebug, [  --enable-mem-debug=[yes/no]     Turn on memory debugging [default=no]],,enable_memdebug=no)
+AC_ARG_ENABLE(memdebug, [  --enable-memdebug=[yes/no]     Turn on memory debugging [default=no]],,enable_memdebug=no)
 OCFS_MEMDEBUG=
 if test "x$enable_memdebug" = "xyes"; then
   OCFS_MEMDEBUG=yes
@@ -125,7 +132,10 @@
 fi
 
 saved_CPPFLAGS="$CPPFLAGS"
+saved_CFLAGS="$CFLAGS"
+
 CPPFLAGS="-I$KERNELINC $CPPFLAGS"
+CFLAGS="$KERNEL_CFLAGS $CFLAGS"
 
 AC_MSG_CHECKING(for kernel version)
 rm -f conf.kvertest
@@ -259,6 +269,7 @@
 AC_SUBST(MODVERSIONS)
 
 CPPFLAGS="$saved_CPPFLAGS"
+CFLAGS="$saved_CFLAGS"
 
 AC_MSG_CHECKING(for directory for kernel modules)
 AC_ARG_WITH(moddir, [  --with-moddir=/path     Path to where modules should be installed [/lib/modules/<KVER>/fs]], moddir="$withval", moddir="/lib/modules/$kversion/kernel/fs")

Modified: trunk/install-sh
===================================================================
--- trunk/install-sh	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/install-sh	2004-01-24 01:22:15 UTC (rev 15)
@@ -1,7 +1,8 @@
 #!/bin/sh
-#
 # install - install a program, script, or datafile
-#
+
+scriptversion=2004-01-12.10
+
 # This originates from X11R5 (mit/util/scripts/install.sh), which was
 # later released in X11R6 (xc/config/util/install.sh) with the
 # following copyright and license.
@@ -41,13 +42,11 @@
 # from scratch.  It can only install one file at a time, a restriction
 # shared with many OS's install programs.
 
-
 # set DOITPROG to echo to test this script
 
 # Don't use :- since 4.3BSD and earlier shells don't like it.
 doit="${DOITPROG-}"
 
-
 # put in absolute paths if you don't have them in your path; or use env. vars.
 
 mvprog="${MVPROG-mv}"
@@ -59,236 +58,253 @@
 rmprog="${RMPROG-rm}"
 mkdirprog="${MKDIRPROG-mkdir}"
 
-transformbasename=""
-transform_arg=""
+transformbasename=
+transform_arg=
 instcmd="$mvprog"
 chmodcmd="$chmodprog 0755"
-chowncmd=""
-chgrpcmd=""
-stripcmd=""
+chowncmd=
+chgrpcmd=
+stripcmd=
 rmcmd="$rmprog -f"
 mvcmd="$mvprog"
-src=""
-dst=""
-dir_arg=""
+src=
+dst=
+dir_arg=
 
-while [ x"$1" != x ]; do
-    case $1 in
-	-c) instcmd=$cpprog
-	    shift
-	    continue;;
+usage="Usage: $0 [OPTION]... SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 -d DIRECTORIES...
 
-	-d) dir_arg=true
-	    shift
-	    continue;;
+In the first form, install SRCFILE to DSTFILE, removing SRCFILE by default.
+In the second, create the directory path DIR.
 
-	-m) chmodcmd="$chmodprog $2"
-	    shift
-	    shift
-	    continue;;
+Options:
+-b=TRANSFORMBASENAME
+-c         copy source (using $cpprog) instead of moving (using $mvprog).
+-d         create directories instead of installing files.
+-g GROUP   $chgrp installed files to GROUP.
+-m MODE    $chmod installed files to MODE.
+-o USER    $chown installed files to USER.
+-s         strip installed files (using $stripprog).
+-t=TRANSFORM
+--help     display this help and exit.
+--version  display version info and exit.
 
-	-o) chowncmd="$chownprog $2"
-	    shift
-	    shift
-	    continue;;
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
 
-	-g) chgrpcmd="$chgrpprog $2"
-	    shift
-	    shift
-	    continue;;
+while test -n "$1"; do
+  case $1 in
+    -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+        shift
+        continue;;
 
-	-s) stripcmd=$stripprog
-	    shift
-	    continue;;
+    -c) instcmd=$cpprog
+        shift
+        continue;;
 
-	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
-	    shift
-	    continue;;
+    -d) dir_arg=true
+        shift
+        continue;;
 
-	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
-	    shift
-	    continue;;
+    -g) chgrpcmd="$chgrpprog $2"
+        shift
+        shift
+        continue;;
 
-	*)  if [ x"$src" = x ]
-	    then
-		src=$1
-	    else
-		# this colon is to work around a 386BSD /bin/sh bug
-		:
-		dst=$1
-	    fi
-	    shift
-	    continue;;
-    esac
-done
+    --help) echo "$usage"; exit 0;;
 
-if [ x"$src" = x ]
-then
-	echo "$0: no input file specified" >&2
-	exit 1
-else
-	:
-fi
+    -m) chmodcmd="$chmodprog $2"
+        shift
+        shift
+        continue;;
 
-if [ x"$dir_arg" != x ]; then
-	dst=$src
-	src=""
+    -o) chowncmd="$chownprog $2"
+        shift
+        shift
+        continue;;
 
-	if [ -d "$dst" ]; then
-		instcmd=:
-		chmodcmd=""
-	else
-		instcmd=$mkdirprog
-	fi
-else
+    -s) stripcmd=$stripprog
+        shift
+        continue;;
 
-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad
-# if $src (and thus $dsttmp) contains '*'.
+    -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+        shift
+        continue;;
 
-	if [ -f "$src" ] || [ -d "$src" ]
-	then
-		:
-	else
-		echo "$0: $src does not exist" >&2
-		exit 1
-	fi
+    --version) echo "$0 $scriptversion"; exit 0;;
 
-	if [ x"$dst" = x ]
-	then
-		echo "$0: no destination specified" >&2
-		exit 1
-	else
-		:
-	fi
+    *)  # When -d is used, all remaining arguments are directories to create.
+	test -n "$dir_arg" && break
+        # Otherwise, the last argument is the destination.  Remove it from $@.
+	for arg
+	do
+          if test -n "$dstarg"; then
+	    # $@ is not empty: it contains at least $arg.
+	    set fnord "$@" "$dstarg"
+	    shift # fnord
+	  fi
+	  shift # arg
+	  dstarg=$arg
+	done
+	break;;
+  esac
+done
 
-# If destination is a directory, append the input filename; if your system
-# does not like double slashes in filenames, you may need to add some logic
-
-	if [ -d "$dst" ]
-	then
-		dst=$dst/`basename "$src"`
-	else
-		:
-	fi
+if test -z "$1"; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call `install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
 fi
 
-## this sed command emulates the dirname command
-dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+for src
+do
+  # Protect names starting with `-'.
+  case $src in
+    -*) src=./$src ;;
+  esac
 
-# Make sure that the destination directory exists.
-#  this part is taken from Noah Friedman's mkinstalldirs script
+  if test -n "$dir_arg"; then
+    dst=$src
+    src=
 
-# Skip lots of stat calls in the usual case.
-if [ ! -d "$dstdir" ]; then
-defaultIFS='
-	'
-IFS="${IFS-$defaultIFS}"
+    if test -d "$dst"; then
+      instcmd=:
+      chmodcmd=
+    else
+      instcmd=$mkdirprog
+    fi
+  else
+    # Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
 
-oIFS=$IFS
-# Some sh's can't handle IFS=/ for some reason.
-IFS='%'
-set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
-IFS=$oIFS
+    if test -z "$dstarg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
 
-pathcomp=''
+    dst=$dstarg
+    # Protect names starting with `-'.
+    case $dst in
+      -*) dst=./$dst ;;
+    esac
 
-while [ $# -ne 0 ] ; do
-	pathcomp=$pathcomp$1
-	shift
+    # If destination is a directory, append the input filename; won't work
+    # if double slashes aren't ignored.
+    if test -d "$dst"; then
+      dst=$dst/`basename "$src"`
+    fi
+  fi
 
-	if [ ! -d "$pathcomp" ] ;
-        then
-		$mkdirprog "$pathcomp"
-	else
-		:
-	fi
+  # This sed command emulates the dirname command.
+  dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
 
-	pathcomp=$pathcomp/
-done
-fi
+  # Make sure that the destination directory exists.
 
-if [ x"$dir_arg" != x ]
-then
-	$doit $instcmd "$dst" &&
+  # Skip lots of stat calls in the usual case.
+  if test ! -d "$dstdir"; then
+    defaultIFS='
+  	'
+    IFS="${IFS-$defaultIFS}"
 
-	if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dst"; else : ; fi &&
-	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dst"; else : ; fi &&
-	if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dst"; else : ; fi &&
-	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dst"; else : ; fi
-else
+    oIFS=$IFS
+    # Some sh's can't handle IFS=/ for some reason.
+    IFS='%'
+    set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+    IFS=$oIFS
 
-# If we're going to rename the final executable, determine the name now.
+    pathcomp=
 
-	if [ x"$transformarg" = x ]
-	then
-		dstfile=`basename "$dst"`
-	else
-		dstfile=`basename "$dst" $transformbasename |
-			sed $transformarg`$transformbasename
-	fi
+    while test $# -ne 0 ; do
+      pathcomp=$pathcomp$1
+      shift
+      test -d "$pathcomp" || $mkdirprog "$pathcomp"
+      pathcomp=$pathcomp/
+    done
+  fi
 
-# don't allow the sed command to completely eliminate the filename
+  if test -n "$dir_arg"; then
+    $doit $instcmd "$dst" \
+      && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
 
-	if [ x"$dstfile" = x ]
-	then
-		dstfile=`basename "$dst"`
-	else
-		:
-	fi
+  else
+    # If we're going to rename the final executable, determine the name now.
+    if test -z "$transformarg"; then
+      dstfile=`basename "$dst"`
+    else
+      dstfile=`basename "$dst" $transformbasename \
+               | sed $transformarg`$transformbasename
+    fi
 
-# Make a couple of temp file names in the proper directory.
+    # don't allow the sed command to completely eliminate the filename.
+    test -z "$dstfile" && dstfile=`basename "$dst"`
 
-	dsttmp=$dstdir/_inst.$$_
-	rmtmp=$dstdir/_rm.$$_
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=$dstdir/_inst.$$_
+    rmtmp=$dstdir/_rm.$$_
 
-# Trap to clean up temp files at exit.
+    # Trap to clean up those temp files at exit.
+    trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
+    trap '(exit $?); exit' 1 2 13 15
 
-	trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
-	trap '(exit $?); exit' 1 2 13 15
+    # Move or copy the file name to the temp name
+    $doit $instcmd "$src" "$dsttmp" &&
 
-# Move or copy the file name to the temp name
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $instcmd $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
 
-	$doit $instcmd "$src" "$dsttmp" &&
+    # Now remove or move aside any old file at destination location.  We
+    # try this two ways since rm can't unlink itself on some systems and
+    # the destination file might be busy for other reasons.  In this case,
+    # the final cleanup might fail but the new file should still install
+    # successfully.
+    {
+      if test -f "$dstdir/$dstfile"; then
+        $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+        || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+        || {
+  	  echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+  	  (exit 1); exit
+        }
+      else
+        :
+      fi
+    } &&
 
-# and set any options; do chmod last to preserve setuid bits
+    # Now rename the file to the real destination.
+    $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+  fi || { (exit 1); exit; }
+done
 
-# If any of these fail, we abort the whole thing.  If we want to
-# ignore errors from any of these, just make sure not to ignore
-# errors from the above "$doit $instcmd $src $dsttmp" command.
-
-	if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dsttmp"; else :;fi &&
-	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dsttmp"; else :;fi &&
-	if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dsttmp"; else :;fi &&
-	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dsttmp"; else :;fi &&
-
-# Now remove or move aside any old file at destination location.  We try this
-# two ways since rm can't unlink itself on some systems and the destination
-# file might be busy for other reasons.  In this case, the final cleanup
-# might fail but the new file should still install successfully.
-
-{
-	if [ -f "$dstdir/$dstfile" ]
-	then
-		$doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null ||
-		$doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null ||
-		{
-		  echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
-		  (exit 1); exit
-		}
-	else
-		:
-	fi
-} &&
-
-# Now rename the file to the real destination.
-
-	$doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
-
-fi &&
-
 # The final little trick to "correctly" pass the exit status to the exit trap.
-
 {
-	(exit 0); exit
+  (exit 0); exit
 }
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:

Modified: trunk/mkinstalldirs
===================================================================
--- trunk/mkinstalldirs	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/mkinstalldirs	2004-01-24 01:22:15 UTC (rev 15)
@@ -1,20 +1,32 @@
 #! /bin/sh
 # mkinstalldirs --- make directory hierarchy
-# Author: Noah Friedman <friedman at prep.ai.mit.edu>
+
+scriptversion=2003-11-08.23
+
+# Original author: Noah Friedman <friedman at prep.ai.mit.edu>
 # Created: 1993-05-16
-# Public domain
+# Public domain.
+#
+# This file is maintained in Automake, please report
+# bugs to <bug-automake at gnu.org> or send patches to
+# <automake-patches at gnu.org>.
 
 errstatus=0
 dirmode=""
 
 usage="\
-Usage: mkinstalldirs [-h] [--help] [-m mode] dir ..."
+Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
 
+Create each directory DIR (with mode MODE, if specified), including all
+leading file name components.
+
+Report bugs to <bug-automake at gnu.org>."
+
 # process command line arguments
 while test $# -gt 0 ; do
   case $1 in
     -h | --help | --h*)         # -h for help
-      echo "$usage" 1>&2
+      echo "$usage"
       exit 0
       ;;
     -m)                         # -m PERM arg
@@ -23,6 +35,10 @@
       dirmode=$1
       shift
       ;;
+    --version)
+      echo "$0 $scriptversion"
+      exit 0
+      ;;
     --)                         # stop option processing
       shift
       break
@@ -55,12 +71,25 @@
     if mkdir -p -- . 2>/dev/null; then
       echo "mkdir -p -- $*"
       exec mkdir -p -- "$@"
+    else
+      # On NextStep and OpenStep, the `mkdir' command does not
+      # recognize any option.  It will interpret all options as
+      # directories to create, and then abort because `.' already
+      # exists.
+      test -d ./-p && rmdir ./-p
+      test -d ./-- && rmdir ./--
     fi
     ;;
   *)
     if mkdir -m "$dirmode" -p -- . 2>/dev/null; then
       echo "mkdir -m $dirmode -p -- $*"
       exec mkdir -m "$dirmode" -p -- "$@"
+    else
+      # Clean up after NextStep and OpenStep mkdir.
+      for d in ./-m ./-p ./-- "./$dirmode";
+      do
+        test -d $d && rmdir $d
+      done
     fi
     ;;
 esac
@@ -84,17 +113,17 @@
       mkdir "$pathcomp" || lasterr=$?
 
       if test ! -d "$pathcomp"; then
-  	errstatus=$lasterr
+	errstatus=$lasterr
       else
-  	if test ! -z "$dirmode"; then
+	if test ! -z "$dirmode"; then
 	  echo "chmod $dirmode $pathcomp"
-    	  lasterr=""
-  	  chmod "$dirmode" "$pathcomp" || lasterr=$?
+	  lasterr=""
+	  chmod "$dirmode" "$pathcomp" || lasterr=$?
 
-  	  if test ! -z "$lasterr"; then
-  	    errstatus=$lasterr
-  	  fi
-  	fi
+	  if test ! -z "$lasterr"; then
+	    errstatus=$lasterr
+	  fi
+	fi
       fi
     fi
 
@@ -107,5 +136,8 @@
 # Local Variables:
 # mode: shell-script
 # sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
 # End:
-# mkinstalldirs ends here

Modified: trunk/src/Makefile
===================================================================
--- trunk/src/Makefile	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/Makefile	2004-01-24 01:22:15 UTC (rev 15)
@@ -2,17 +2,12 @@
 
 include $(TOPDIR)/Preamble.make
 
-ifeq ($(OCFS_PROCESSOR),x86_64)
-  WARNINGS = -Wall -Wstrict-prototypes -Wno-format
-else
-  WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
-	     -Wmissing-declarations
+WARNINGS = -Wall -Wstrict-prototypes -Wno-format
+
+ifneq ($(OCFS_PROCESSOR),x86_64)
+WARNINGS += -Wmissing-prototypes -Wmissing-declarations
 endif
 
-#REMOVE THIS NEXT LINE AFTER DONE MOVING PROTOTYPES AROUND
-#  WARNINGS = -Wall -Wstrict-prototypes -Wno-format \
-#	     -Wmissing-declarations
-
 ifdef OCFS_DEBUG
 OPTS += -g
 endif
@@ -44,15 +39,17 @@
 DEFINES += -DUSE_JOURNAL_CREATE_REPLACEMENT
 endif
 
+DEFINES += -DVERBOSE_BH_SEM
 DEFINES += -DDEBUG_LOCK_BUFFER
 DEFINES += -DVERBOSE_BH_JBD_TRACE
 DEFINES += -DVERBOSE_LOCKING_TRACE
 
 ifneq ($(OCFS_PROCESSOR),ia64)
-DEFINES += -DOCFS_DBG_TIMING
+#DEFINES += -DOCFS_DBG_TIMING
 endif
 
 DEFINES += -DALLOW_NO_HANDLE_SYNCING
+DEFINES += -DOCFS_PARANOID_ABORTS
 
 ifeq ($(KVER),vmware)
   KERNELINC = /usr/src/linux-2.4/include
@@ -92,23 +89,27 @@
 	-fomit-frame-pointer $(MODVERSIONS) $(WARNINGS)
 LDADD=-nostdlib
 
+OPTIMIZE = -O2
+
+ifeq ($(OCFS_PROCESSOR),ppc64)
+  DEFINES += -D__LP64__
+  CFLAGS += -m64 -fsigned-char -fno-builtin -msoft-float -mminimal-toc
+  LDADD += -m elf64ppc
+endif
 ifeq ($(OCFS_PROCESSOR),x86_64)
-  DEFINES += -D__OPTIMIZE__
-  CFLAGS += -mcmodel=kernel
-  CFLAGS += -O0 -m64 -finline-functions
+  CFLAGS += -m64 -mcmodel=kernel
 endif
 ifeq ($(OCFS_PROCESSOR),ia64)
-  #DEFINES += -D__OPTIMIZE__
-  CFLAGS += -O2
 endif
 ifeq ($(OCFS_PROCESSOR),i686)
   DEFINES += -D__ILP32__
-  CFLAGS += -O2
 endif
 ifeq ($(OCFS_PROCESSOR),i586)
-  CFLAGS += -O2
+  DEFINES += -D__ILP32__
 endif
 
+CFLAGS += $(OPTIMIZE)
+
 MODULES = ocfs.o
 
 
@@ -123,6 +124,7 @@
 	hash.c		\
 	heartbeat.c	\
 	inode.c		\
+	io.c		\
 	ioctl.c		\
 	journal.c	\
 	namei.c		\
@@ -158,7 +160,7 @@
 HFILES = \
 	inc/journal.h	\
 	inc/ocfs.h	\
-	inc/ocfsio.h	\
+	inc/io.h	\
 	inc/proto.h
 
 $(CFILES): $(HFILES)

Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/alloc.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -22,7 +22,7 @@
 				  __u64 length, struct inode *inode);
 
 static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, 
-				  __u32 *freeExtent, 
+				  __u8 *freeExtent, 
 				  ocfs_bitmap_free_head * free_head, 
 				  __u64 FileSize, bool flag, struct inode *inode) ;
 
@@ -44,7 +44,8 @@
 static inline int ocfs_free_main_bitmap(ocfs_super *osb, 
 					ocfs_free_rec *freelog);
 
-static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh);
+static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh,
+				 ocfs_journal_handle *hanlde);
 static int ocfs_sync_local_to_main(ocfs_super *osb, 
 				   ocfs_bitmap_free_head **f, 
 				   struct buffer_head *local_alloc_bh, 
@@ -98,6 +99,9 @@
 		       ( (type == DISK_ALLOC_EXTENT_NODE) ? 
 		         "DISK_ALLOC_EXTENT_NODE" : "DISK_ALLOC_DIR_NODE" ));
 
+	if (len == 0)
+		BUG();
+
 	log = f->tail;
 
 	/* need a new one? */
@@ -311,12 +315,10 @@
 	if (free_vol_bits != NULL) {
 		ocfs_bitmap_lock *bm_lock;
 
-		bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA(globalbh);
+		bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA_WRITE(globalbh);   /* write */
                 bm_lock->used_bits = ocfs_count_bits(&osb->cluster_bitmap);
 		OCFS_BH_PUT_DATA(globalbh);
 
-/*                status = ocfs_write_force_disk(osb, bm_lock, OCFS_SECTOR_SIZE, 
-		  OCFS_BITMAP_LOCK_OFFSET);*/
 		status = ocfs_write_bh(osb, globalbh, 0, NULL);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
@@ -398,6 +400,8 @@
 	int status;
 	__u32 bitmapblocks; /* we only care about the valid blocks */
 
+	LOG_ENTRY();
+
 	bitmap = &osb->cluster_bitmap;
 
 	bitmapblocks = (OCFS_ALIGN(bitmap->validbits, OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
@@ -418,6 +422,13 @@
 	for (i = 0; i < freelog->num_updates; i++)
 		ocfs_clear_bits(bitmap, freelog->update[i].file_off, freelog->update[i].length);
 
+	/* we don't know which blocks we've changed and which
+	 * haven't, so just write them all out */
+	for(i = 0; i < bitmapblocks; i++) {
+		OCFS_BH_GET_DATA_WRITE(bitmap->chunk[i]);
+		OCFS_BH_PUT_DATA(bitmap->chunk[i]);
+	}
+
 	status = ocfs_write_bhs(osb, bitmap->chunk, bitmapblocks, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
@@ -426,6 +437,7 @@
 
 	status = 0;
 bail:
+	LOG_EXIT_STATUS(status);
 	return(0);
 }
 
@@ -544,9 +556,14 @@
 	/* ocfs_free_main_bitmap handles all the reads/writes for the
 	 * main bitmap */
 	if (Type != DISK_ALLOC_VOLUME) {
-		status = ocfs_write_system_file(osb, fileId, tmpbitmap->chunk, 
-						bitmapblocks * osb->sect_size, 
-						offset);
+		/* we don't know which blocks we've changed and which
+		 * haven't, so just write them all out */
+		for(i = 0; i < bitmapblocks; i++) {
+			OCFS_BH_GET_DATA_WRITE(tmpbitmap->chunk[i]);
+			OCFS_BH_PUT_DATA(tmpbitmap->chunk[i]);
+		}
+		status = ocfs_write_bhs(osb, tmpbitmap->chunk, bitmapblocks, 
+					0, NULL);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
@@ -602,7 +619,7 @@
 				goto finally;
 			}
 		}
-		extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_header_bh);
+		extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_header_bh);  /* write */
 		bh_locked = 1;
 	}
 	if (extent_header != NULL) {
@@ -651,7 +668,7 @@
 				goto finally;
 			}
 		}
-		buff = OCFS_BH_GET_DATA(header_bhs[i]);
+		buff = OCFS_BH_GET_DATA_WRITE(header_bhs[i]);  /* write */
 		memset(buff, 0, osb->sect_size);
 
 		/* TODO: Do we really need to do this? */
@@ -687,7 +704,7 @@
 	/* Fill in all the headers and the leaf */
 	for (i = 0; i <= depth; i++) {
 		ocfs_extent_group *ext;
-		ext = (ocfs_extent_group *) OCFS_BH_GET_DATA(header_bhs[i]);
+		ext = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(header_bhs[i]);  /* write */
 
 		ext->last_ext_ptr = lastExtPointer;
 		ext->up_hdr_node_ptr = upHeaderPtr;
@@ -770,7 +787,7 @@
 					goto finally;
 				}
 			}
-			extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh);
+			extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh);  /* write */
 			if (!IS_VALID_EXTENT_HEADER(extent_header)) {
 				OCFS_BH_PUT_DATA(bh);
 				brelse(bh);
@@ -848,7 +865,7 @@
 		LOG_ERROR_STATUS (status = -ENOMEM);
 		goto finally;
 	}
-	real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+	real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 	memcpy(fe, real_fe, 512);
 	OCFS_BH_PUT_DATA(fe_bh);
 	real_fe = NULL;
@@ -881,7 +898,7 @@
 			LOG_ERROR_STATUS(status);
 			goto finally;
 		}
-		buf = OCFS_BH_GET_DATA(bhs[i]);
+		buf = OCFS_BH_GET_DATA_WRITE(bhs[i]);  /* write */
 		memset(buf, 0, osb->sect_size);
 #ifdef LINUX_2_5
 		set_buffer_uptodate(bhs[i]);
@@ -900,7 +917,7 @@
 		goto finally;
 	}
 
-	OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(bhs[0]);
+	OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bhs[0]);  /* write */
 	/* Copy the File Entry information in to the newly allocated sector */
 	for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) {
 		OcfsExtent->extents[k].file_off = fe->extents[k].file_off;
@@ -928,7 +945,7 @@
 	upHeaderPtr = fe->this_sector;
 
 	for (i = 0; i < fe->granularity; i++) {
-		ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA(bhs[i]);
+		ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bhs[i]);  /* write */
 
 		ExtentHeader->type = OCFS_EXTENT_HEADER;
 		ExtentHeader->granularity = (fe->granularity - 1) - i;
@@ -965,7 +982,7 @@
 	}
 
 	/* Update the Data Segment, which is the last one in our array */
-	OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(bhs[fe->granularity]);
+	OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bhs[fe->granularity]);  /* write */
 
 	i = (fe->granularity) ? 0 : OCFS_MAX_FILE_ENTRY_EXTENTS;
 
@@ -1022,7 +1039,7 @@
 			}
 		}
 
-		ext = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh);
+		ext = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh);  /* write */
 		if (!IS_VALID_EXTENT_DATA(ext)) {
 			OCFS_BH_PUT_DATA(bh);
 			brelse(bh);
@@ -1030,7 +1047,7 @@
 			goto finally;
 		}
 
-		ext->next_data_ext = OcfsExtent->this_ext;
+		ext->next_data_ext = lastExtentPtr;
 		OCFS_BH_PUT_DATA(bh);
 		
 		if (handle)
@@ -1069,7 +1086,7 @@
 				}
 			}
 
-			ext = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh);
+			ext = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh);  /* write */
 			ext->up_hdr_node_ptr = new_up_hdr_ptr;
 			OCFS_BH_PUT_DATA(bh);
 
@@ -1104,7 +1121,7 @@
 finally:
 
 	if (fe) {
-		real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+		real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(fe_bh);  /* write */
 		memcpy(real_fe, fe, 512);
 		OCFS_BH_PUT_DATA(fe_bh);
 		real_fe = NULL;
@@ -1131,7 +1148,7 @@
 
 	LOG_ENTRY_ARGS("(actualDiskOffset=%u.%u, actualLength=%u.%u)\n", actualDiskOffset, actualLength);
 
-	FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+	FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(fe_bh);  /* write */
 	OCFS_ASSERT (FileEntry);
 
 	if (!IS_VALID_FILE_ENTRY (FileEntry)) {
@@ -1140,6 +1157,7 @@
 	}
 
 	if (FileEntry->local_ext) {
+		LOG_TRACE_STR("Using local extents");
 		/* We are still using the local extents of File Entry */
 		if (FileEntry->next_free_ext > OCFS_MAX_FILE_ENTRY_EXTENTS) {
 			LOG_ERROR_STATUS(status = -EINVAL);
@@ -1170,6 +1188,7 @@
 		IncreaseTreeDepth = true;
 		goto increase_depth;
 	}
+	LOG_TRACE_STR("Using NON-local extents");
 
 	/*** Nonlocal Extents ***/
 	if (FileEntry->granularity > 3)
@@ -1183,7 +1202,7 @@
 		LOG_ERROR_STATUS (status = -EINVAL);
 		goto finally;
 	}
-	extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+	extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* but not if journalled */
 	if (!IS_VALID_EXTENT_DATA(extent)) {
 		LOG_ERROR_STATUS (status = -EINVAL);
 		goto finally;
@@ -1201,8 +1220,7 @@
 			OCFS_BH_PUT_DATA(extent_bh);
 			ocfs_journal_access(handle, extent_bh, 
 					    OCFS_JOURNAL_ACCESS_WRITE);
-			extent = (ocfs_extent_group *) 
-				OCFS_BH_GET_DATA(extent_bh);
+			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* journal_access */
 		}
 		extent->extents[k].num_bytes += actualLength;
 		status = 0;
@@ -1219,8 +1237,7 @@
 			OCFS_BH_PUT_DATA(extent_bh);
 			ocfs_journal_access(handle, extent_bh, 
 					    OCFS_JOURNAL_ACCESS_WRITE);
-			extent = (ocfs_extent_group *) 
-				OCFS_BH_GET_DATA(extent_bh);
+			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* journal access */
 		}
 		extent->extents[k].file_off = FileEntry->alloc_size;
 		extent->extents[k].num_bytes = actualLength;
@@ -1250,7 +1267,7 @@
 				LOG_ERROR_STATUS (status);
 				goto finally;
 			}
-			extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_header_bh);
+			extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_header_bh); /* read */
 			if (!IS_VALID_EXTENT_HEADER(extent_header)) {
 				LOG_ERROR_STATUS (status = -EINVAL);
 				goto finally;
@@ -1299,8 +1316,7 @@
 			OCFS_BH_PUT_DATA(extent_bh);
 			ocfs_journal_access(handle, extent_bh, 
 					    OCFS_JOURNAL_ACCESS_WRITE);
-			extent = (ocfs_extent_group *) 
-				OCFS_BH_GET_DATA(extent_bh);
+			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* journal access */
 		}
 
 		extent->next_data_ext = newExtentOff;
@@ -1353,8 +1369,7 @@
 				}
 			}
 
-			extent_header = (ocfs_extent_group *) 
-				OCFS_BH_GET_DATA(extent_header_bh);
+			extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_header_bh); /* write */
 			if (!IS_VALID_EXTENT_HEADER(extent_header)) {
 				LOG_ERROR_STATUS (status = -EINVAL);
 				goto finally;
@@ -1425,7 +1440,7 @@
 		 * mapping run.So just adding this entry will be
 		 * fine. */
 		if (FileEntry == NULL)
-			FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+			FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 		Vbo = FileEntry->alloc_size;
 		Lbo = actualDiskOffset;
@@ -1453,6 +1468,7 @@
 	if (FileEntry) {
 		OCFS_BH_PUT_DATA(fe_bh);
 	}
+
 	LOG_EXIT_STATUS (status);
 	return (status);
 }				/* ocfs_allocate_extent */
@@ -1468,7 +1484,7 @@
  * 'flag' seems to be an indicator that (if true) tells us that we already know
  *	we're gonna have to clear out all of extarr.
  */
-int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, __u32 *freeExtent, ocfs_bitmap_free_head *free_head, __u64 FileSize, bool flag, struct inode *inode) 
+int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, __u8 *freeExtent, ocfs_bitmap_free_head *free_head, __u64 FileSize, bool flag, struct inode *inode) 
 {
         int status = 0;
 	bool FirstTime = true;
@@ -1484,6 +1500,7 @@
 	LOG_ENTRY ();
 
         firstfree = *freeExtent;
+
 	/* loop through the used alloc_extents */
         for (i = 0; i < firstfree; i++) { 
                 ext = &(extarr[i]); 
@@ -1541,6 +1558,9 @@
  * including itself, it's children, and any data blocks they point to.
  * Works fine with any granularity (up to 4, in which case we'd need
  * more stack space)
+ *
+ * extent_grp_bh will be unchanged, though it will be marked for
+ * deletion in free_head.
  */
 
 /* We can't recurse, so we keep a simple stack of ocfs_extent_groups. */
@@ -1554,20 +1574,22 @@
 	__u64 tmp_off;
 	__u32 num_sectors = 0, bitmap_offset = 0;
 	ocfs_alloc_ext *ext;
-	struct buffer_head * bh_stack[OCFS_TREE_STACK_SIZE];
+	struct buffer_head *tmp_bh = NULL;
+	char * stack[OCFS_TREE_STACK_SIZE];
 	ocfs_extent_group * cur_extent; /* convenience, points to TOS */
 	int tos = 0;
 
 	LOG_ENTRY();
 
 	for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
-		bh_stack[i] = NULL;
+		stack[i] = NULL;
 
-	bh_stack[tos] = extent_grp_bh;
+	stack[tos] = ocfs_malloc(512);
+	memcpy(stack[tos], OCFS_BH_GET_DATA_READ(extent_grp_bh), 512);
+	OCFS_BH_PUT_DATA(extent_grp_bh);
 
 	do {
-		cur_extent = (ocfs_extent_group *) 
-			OCFS_BH_GET_DATA(bh_stack[tos]);
+		cur_extent = (ocfs_extent_group *) stack[tos];
 
 		if (!IS_VALID_EXTENT_DATA(cur_extent) && 
 		    !IS_VALID_EXTENT_HEADER(cur_extent)) {
@@ -1576,7 +1598,7 @@
 		}
 
 		if (IS_VALID_EXTENT_DATA(cur_extent)) {
-			LOG_PID_PRINTK("found some data to free (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext));
+			LOG_TRACE_ARGS("found some data to free (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext));
 			for(i = 0; i < cur_extent->next_free_ext; i++) {
 				/* Free the data associated with each header */
 				ext = &cur_extent->extents[i];
@@ -1594,7 +1616,7 @@
 			/* Did we already kill all his children, or
 			 * are they already dead? */
 			if (cur_extent->next_free_ext == 0) {
-				LOG_PID_PRINTK("Popping this header (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext), cur_extent->next_free_ext);
+				LOG_TRACE_ARGS("Popping this header (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext), cur_extent->next_free_ext);
 				goto free_meta;
 			}
 
@@ -1611,26 +1633,29 @@
 			tmp_off = cur_extent->extents[victim].disk_off;
 			cur_extent->next_free_ext--;
 
-			OCFS_BH_PUT_DATA(bh_stack[tos]);
 			cur_extent = NULL;
 			tos++;
 
 			/* should already be null, but we can do this
 			 * just in case. */
-			bh_stack[tos] = NULL;
+			stack[tos] = ocfs_malloc(512);
 
-			status = ocfs_read_bh(osb, tmp_off, &bh_stack[tos], 
+			status = ocfs_read_bh(osb, tmp_off, &tmp_bh, 
 					      OCFS_BH_COND_CACHED, inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS (status);
 				goto bail;
 			}
 
+			memcpy(stack[tos], OCFS_BH_GET_DATA_READ(tmp_bh), 512);
+			OCFS_BH_PUT_DATA(tmp_bh);
+			brelse(tmp_bh);
+			tmp_bh = NULL;
 			/* We only want to free on our way back up the tree */
 			continue;
 		}
 
-	free_meta:
+free_meta:
 		/* Free the metadata associated with this extent group */
 		status = ocfs_add_to_bitmap_free_head(osb, free_head, 1, cur_extent->alloc_file_off, cur_extent->alloc_node, DISK_ALLOC_EXTENT_NODE);
 		if (status < 0) {
@@ -1638,22 +1663,17 @@
 			goto bail;
 		}
 		/* Pop one off the stack */
-		OCFS_BH_PUT_DATA(bh_stack[tos]);
-		brelse(bh_stack[tos]);
-		bh_stack[tos] = NULL;
+		ocfs_free(stack[tos]);
+		stack[tos] = NULL;
 		cur_extent = NULL;
 		tos--;
 	} while (tos >= 0);
 
 	status = 0;
 bail:
-	if (cur_extent)
-		OCFS_BH_PUT_DATA(bh_stack[tos]);
-	/* brelse the stack. We never brelse the bottom of the stack
-	 * because we were passed that guy from the caller */
-	for(i = 1; i < OCFS_TREE_STACK_SIZE; i++)
-		if (bh_stack[i])
-			brelse(bh_stack[i]);
+	for(i = 0; i < OCFS_TREE_STACK_SIZE; i++)
+		if (stack[i])
+			ocfs_free(stack[i]);
 
 	LOG_EXIT_STATUS (status);
 	return(status);
@@ -1675,7 +1695,7 @@
 		goto bail;
 	}
 
-	group = (ocfs_extent_group *) OCFS_BH_GET_DATA(group_bh);
+	group = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(group_bh); /* write */
 
 	if (!IS_VALID_EXTENT_DATA(group) && 
 	    !IS_VALID_EXTENT_HEADER(group)) {
@@ -1737,7 +1757,7 @@
 	   because I can't recreate one. */
 	if (gran == 3) {
 		LOG_ERROR_STR("Truncating file with granularity 3, this is not tested and may be unsafe!");
-		LOG_PID_STR("Found a granularity 3 tree, trimming it.\n");
+		LOG_TRACE_STR("Found a granularity 3 tree, trimming it.\n");
 
 		status = ocfs_journal_access(handle, extent_grp_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
@@ -1745,7 +1765,7 @@
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
-		extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_grp_bh);
+		extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_grp_bh); /* write */ /* journal access */
 		for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
 			ext = &extent_grp->extents[i];
 
@@ -1799,7 +1819,6 @@
 			goto bail;
 		}
 		extent_grp_bh = tmp_bh2;
-		LOG_PID_STR("Ok, continuing as if granularity = 2");
 
 		/* We want to do the next bit of stuff too */
 		gran = 2;
@@ -1810,7 +1829,7 @@
 	/* get rid of everything from the top level HDR that we can, then
 	   proceeed as if we're granularity 1 (which we know works) */
 	if (gran == 2) {
-		LOG_PID_STR("Found a granularity 2 tree, trimming it.\n");
+		LOG_TRACE_STR("Found a granularity 2 tree, trimming it.\n");
 
 		status = ocfs_journal_access(handle, extent_grp_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
@@ -1819,7 +1838,7 @@
 			goto bail;
 		}
 
-		extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_grp_bh);
+		extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_grp_bh); /* write */ /* journal access */
 		for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
 			ext = &extent_grp->extents[i];
 
@@ -1874,7 +1893,6 @@
 		}
 
 		extent_grp_bh = tmp_bh;
-		LOG_PID_STR("Ok, continuing as if granularity = 1");
 
 		/* Right now, we don't use 'gran' below here, but just
 		 * in case */
@@ -1901,7 +1919,7 @@
 			goto bail;
 		}
 
-		AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh_stack[tos]);
+		AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */ /* journal access */
 
 		if (!IS_VALID_EXTENT_DATA(AllocExtent) && 
 		    !IS_VALID_EXTENT_HEADER(AllocExtent)) {
@@ -1911,10 +1929,8 @@
 		
 		if (IS_VALID_EXTENT_DATA(AllocExtent)) {
 			/* shall we just do away with him? */
-			LOG_PID_STR("Found a whole data extent!");
-			/* changed this from > to >= */
 			if (AllocExtent->extents[0].file_off >= newsize) {
-				LOG_PID_PRINTK("Killing this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
+				LOG_TRACE_ARGS("Killing this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
 				/* Boundary case - what if this guy is
 				 * the last DAT we should delete
 				 * (i.e., split no more ;) */
@@ -1926,12 +1942,11 @@
 					goto bail;
 				}
 				/* silly, but what to do? */
-				AllocExtent = (ocfs_extent_group *) 
-					OCFS_BH_GET_DATA(bh_stack[tos]);
+				AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(bh_stack[tos]); /* read */
 			} else {
 				/* Alright, we know for sure that
 				 * we're splitting in this guy. */
-				LOG_PID_PRINTK("Splitting this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
+				LOG_TRACE_ARGS("Splitting this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
 				fe->last_ext_ptr = AllocExtent->this_ext;
 				AllocExtent->next_data_ext = 0;
 				/* total_bytes is used below to know
@@ -1944,7 +1959,7 @@
 				 * it: */
 				ext = &AllocExtent->extents[AllocExtent->next_free_ext - 1];
 				if ((ext->file_off + ext->num_bytes)==newsize){
-					LOG_PID_STR("Ok, hit that boundary in the DAT");
+					LOG_TRACE_STR("Ok, hit that boundary in the DAT");
 					goto fix_headers;
 				}
 
@@ -1987,7 +2002,6 @@
 					}
 				} /* For loop */
 
-				LOG_PID_PRINTK("Writing that data extent back out to disk now (%u,%u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
 				/* Either way, we need to write this back out*/
 				OCFS_BH_PUT_DATA(bh_stack[tos]);
 				AllocExtent = NULL;
@@ -1998,20 +2012,19 @@
 					goto bail;
 				}
 
-				LOG_PID_PRINTK("Fixing the headers above us! (tos=%d)\n", tos);
-			fix_headers:
+				LOG_TRACE_ARGS("Fixing the headers above us! (tos=%d)\n", tos);
+fix_headers:
 				/*And here we should fix the headers above us*/
 				tos--;
 				while (tos >= 0) {
-					LOG_PID_PRINTK("at top of loop, tos=%d\n", tos);
+					LOG_TRACE_ARGS("at top of loop, tos=%d\n", tos);
 					status = ocfs_journal_access(handle, bh_stack[tos], OCFS_JOURNAL_ACCESS_WRITE);
 					if (status < 0) {
 						LOG_ERROR_STATUS(status);
 						goto bail;
 					}
 
-					AllocExtent = (ocfs_extent_group *) 
-					       OCFS_BH_GET_DATA(bh_stack[tos]);
+					AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */ /* journal access */
 					victim = AllocExtent->next_free_ext;
 					AllocExtent->next_free_ext++;
 					/* need to also update
@@ -2034,7 +2047,7 @@
 					}
 					tos--;
 				}
-				LOG_PID_STR("breaking to end function now!");
+				LOG_TRACE_STR("breaking to end function now!");
 				/* Ok, done! */
 				break;
 			}
@@ -2044,7 +2057,7 @@
                          * are they already dead? */
                         if (AllocExtent->next_free_ext == 0) {
 				/*Ok, we're done with this guy, pop the stack*/
-                                LOG_PID_PRINTK("Popping this header (%u.%u)\n",
+                                LOG_TRACE_ARGS("Popping this header (%u.%u)\n",
 					       HI(AllocExtent->this_ext), 
 					       LO(AllocExtent->this_ext), 
 					       AllocExtent->next_free_ext);
@@ -2064,7 +2077,7 @@
 			/* changed this from > to >= */
 			/* Do we just delete this whole part of the tree? */
 			if (AllocExtent->extents[0].file_off >= newsize) {
-				LOG_PID_PRINTK("whacking this tree: (%u.%u)\n",
+				LOG_TRACE_ARGS("whacking this tree: (%u.%u)\n",
 					       HI(AllocExtent->this_ext), 
 					       LO(AllocExtent->this_ext));
 
@@ -2094,8 +2107,7 @@
 					goto bail;
 				}
 
-				AllocExtent = (ocfs_extent_group *) 
-					OCFS_BH_GET_DATA(bh_stack[tos]);
+				AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */ /* journal access */
 
 				victim = AllocExtent->next_free_ext;
 				AllocExtent->extents[victim].file_off = 0;
@@ -2151,8 +2163,7 @@
 			}
 
 			/* need to get the next offset to read */
-			AllocExtent = (ocfs_extent_group *) 
-				OCFS_BH_GET_DATA(bh_stack[tos]);
+			AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */
 			AllocExtent->next_free_ext--;
 			victim = AllocExtent->next_free_ext;
 			ext = &AllocExtent->extents[victim];
@@ -2233,7 +2244,7 @@
 	LOG_ENTRY ();
 
 	if (fe->next_free_ext == 0) {
-		LOG_PID_STR("setting to zero as there isn't any used extents");
+		LOG_TRACE_STR("setting to zero as there isn't any used extents");
 		fe->last_ext_ptr = 0;
 		status = 0;
 		goto bail;
@@ -2247,7 +2258,7 @@
 		goto bail;
 	}
 
-	extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+	extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
 
 	if (!IS_VALID_EXTENT_DATA(extent) && 
 	    !IS_VALID_EXTENT_HEADER(extent)) {
@@ -2274,7 +2285,7 @@
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
-		extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+		extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
 	}
 	
 	fe->last_ext_ptr = extent->this_ext;
@@ -2317,9 +2328,10 @@
 
         /* local extents */
 	if (FileEntry->local_ext) {
-		LOG_PID_STR("local extents, calling _squish_extent_entries");
-                status = _squish_extent_entries(osb, FileEntry->extents, (__u32 *)&FileEntry->next_free_ext, free_head, alloc_size, false, inode);
-		LOG_PID_PRINTK("return from _squish_extent_entries, status=%d", status);
+                status = _squish_extent_entries(osb, FileEntry->extents, 
+						&FileEntry->next_free_ext, 
+						free_head, alloc_size, false, 
+						inode);
                 if (status < 0) {
                         LOG_ERROR_STATUS (status);
                         goto finally;
@@ -2327,14 +2339,14 @@
 		goto finally;
         }
 
-	LOG_PID_PRINTK("non-local extents. taking that code path, truncating to alloc_size of (%u.%u)\n", HI(alloc_size), LO(alloc_size));
+	LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to alloc_size of (%u.%u)\n", HI(alloc_size), LO(alloc_size));
 	/* non-local extents */
 
 	updated_lep = false;
 
 	/* Loop backwards through only the used free extent headers here */
 	for (i = (FileEntry->next_free_ext - 1); i >= 0; i--) {
-		LOG_PID_PRINTK("at top of loop, i = %d\n", i);
+		LOG_TRACE_ARGS("at top of loop, i = %d\n", i);
 		/* Go ahead and read that bit of the tree - we'll need it. */
 		status = ocfs_read_bh(osb, FileEntry->extents[i].disk_off,
 				      &extent_bh, OCFS_BH_CACHED, inode);
@@ -2344,7 +2356,7 @@
 		}
 		/* Figure out, do we want to kill this whole tree? */
 		if (FileEntry->extents[i].file_off >= alloc_size) {
-			LOG_PID_PRINTK("Found an entire tree to delete!\n");
+			LOG_TRACE_ARGS("Found an entire tree to delete!\n");
 			
 			status = ocfs_kill_this_tree(osb, extent_bh, free_head, inode);
 			if (status < 0) {
@@ -2361,7 +2373,7 @@
 			 * split this tree, but we call this function
 			 * anyways in order to update last_ext_ptr. */
 
-			LOG_PID_PRINTK("Splitting this tree!\n");
+			LOG_TRACE_ARGS("Splitting this tree!\n");
 			status = ocfs_split_this_tree(osb, extent_bh, free_head, FileEntry, handle, inode);
 			if (status < 0) {
 				LOG_ERROR_STATUS(status);
@@ -2369,7 +2381,7 @@
 			}
 
 			/* Ok, update the FileEntry */
-			LOG_PID_PRINTK("Alright. num_bytes = (%u,%u), alloc_size = (%u,%u) file_off = (%u,%u)\n", HI(FileEntry->extents[i].num_bytes), LO(FileEntry->extents[i].num_bytes), HI(alloc_size), LO(alloc_size), HI(FileEntry->extents[i].file_off), LO(FileEntry->extents[i].file_off));
+			LOG_TRACE_ARGS("Alright. num_bytes = (%u,%u), alloc_size = (%u,%u) file_off = (%u,%u)\n", HI(FileEntry->extents[i].num_bytes), LO(FileEntry->extents[i].num_bytes), HI(alloc_size), LO(alloc_size), HI(FileEntry->extents[i].file_off), LO(FileEntry->extents[i].file_off));
 			FileEntry->extents[i].num_bytes = alloc_size;
 			for (j=0; j < i; j++) 
 				FileEntry->extents[i].num_bytes += FileEntry->extents[j].num_bytes;
@@ -2393,7 +2405,6 @@
 	}
 
 	if (!updated_lep) {
-		LOG_PID_STR("Updating FileEntry->last_ext_ptr");
 		status = ocfs_update_last_ext_ptr(osb, FileEntry, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
@@ -2401,8 +2412,6 @@
 		}
 	}
 
-	LOG_PID_PRINTK("non-local extents, out of loop now, i = %d\n", i);
-
 finally:
 	if (extent_bh)
 		brelse(extent_bh);
@@ -2461,7 +2470,7 @@
 		goto finally;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY (fe)) {
 		LOG_ERROR_STATUS (status = -EINVAL);
@@ -2493,7 +2502,7 @@
 			goto finally;
 		}
 
-		OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(ext_bh);
+		OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(ext_bh); /* read */
 		while (1) {
 			status = ocfs_update_extent_map (osb, &oin->map, OcfsExtent,
 						 &localVbo, &remainingLength, NONLOCAL_EXT);
@@ -2528,7 +2537,7 @@
 					LOG_ERROR_STATUS(status);
 					goto finally;
 				}
-				OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(ext_bh);
+				OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(ext_bh); /* read */
 				if (!IS_VALID_EXTENT_DATA(OcfsExtent)) {
 					LOG_ERROR_STATUS (status = -EINVAL);
 					goto finally;
@@ -2600,7 +2609,7 @@
 			goto finally;
 		}
 
-		ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA(ext_bh);
+		ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(ext_bh); /* read */
 		if (!IS_VALID_EXTENT_HEADER(ExtentHeader)) {
 			LOG_ERROR_STATUS (status = -EINVAL);
 			goto finally;
@@ -2634,7 +2643,7 @@
 		goto finally;
 	}
 
-	tmp = (ocfs_extent_group *) OCFS_BH_GET_DATA(*data_extent_bh);
+	tmp = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(*data_extent_bh); /* read */
 	if (!IS_VALID_EXTENT_DATA(tmp)) {
 		LOG_ERROR_STATUS (status = -EINVAL);
 		OCFS_BH_PUT_DATA(*data_extent_bh);
@@ -2709,7 +2718,7 @@
 		}
 		bLockAcquired = true;
 	}
-	bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA(bh);
+	bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 
 	ByteCount = file_size;
 
@@ -2856,7 +2865,6 @@
 	return status;
 }				/* ocfs_find_contiguous_space_from_bitmap */
 
-
 /*
  * ocfs_alloc_node_block()
  *
@@ -2865,6 +2873,7 @@
 {
 	int status = 0;
 	int tmpstat = 0;
+	int startbh, numblocks;
 	__u64 fileSize = 0;
 	__u64 offset = 0;
 	__u64 lockId = 0;
@@ -2880,8 +2889,8 @@
 	__u32 blockSize = 0;
 	bool bLockAcquired = false;
 	ocfs_lock_res *pLockResource = NULL;
-	__u32 fileId = 0;
-	__u32 extendFileId = 0;
+	__u32 bm_file = 0;
+	__u32 alloc_file = 0;
 	struct buffer_head *bh = NULL;
 	ocfs_file_entry *fe = NULL;
 	bool needs_uninit = false;
@@ -2894,12 +2903,12 @@
 	ocfs_down_sem (&(osb->vol_alloc_lock), true);
 
 	if (Type == DISK_ALLOC_DIR_NODE) {
-		fileId = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum;
+		bm_file = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum;
 		blockSize = (__u32) osb->vol_layout.dir_node_size;
-		extendFileId = OCFS_FILE_DIR_ALLOC + NodeNum;
+		alloc_file = OCFS_FILE_DIR_ALLOC + NodeNum;
 	} else if (Type == DISK_ALLOC_EXTENT_NODE) {
-		fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
-		extendFileId = OCFS_FILE_FILE_ALLOC + NodeNum;
+		bm_file = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
+		alloc_file = OCFS_FILE_FILE_ALLOC + NodeNum;
 		blockSize = (__u32) osb->vol_layout.file_node_size;
 	}
 
@@ -2907,7 +2916,7 @@
 
 	OCFS_ASSERT (blockSize);
 
-	lockId = (fileId * OCFS_SECTOR_SIZE) + osb->vol_layout.root_int_off;
+	lockId = (bm_file * OCFS_SECTOR_SIZE) + osb->vol_layout.root_int_off;
 
 	/* Get a lock on the file */
 	status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK,
@@ -2924,25 +2933,24 @@
 
 	/* Read in the bitmap file for the dir alloc and look for the
 	 * required space, if found */
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
 	fileSize = fe->file_size;
 	allocSize = fe->alloc_size;
 	OCFS_BH_PUT_DATA(bh);
 	
 	prevFileSize = fileSize;
-	
+
 	if ((fileSize != 0) && (allocSize != 0)) {
 		/* Round this off to dirnodesize */
 		ocfs_initialize_bitmap (&bitmap, (__u32) fileSize * 8, (__u32) allocSize * 8);
 		needs_uninit = true;
 
-		status = ocfs_read_system_file (osb, fileId, bitmap.chunk, 
+		status = ocfs_read_system_file (osb, bm_file, bitmap.chunk, 
 						allocSize, offset);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		
 
 		/* Find the requisite number of bits... */
 
@@ -2958,7 +2966,6 @@
 
 	if (foundBit == -1) {
 		/* if not found add more allocation to the file and try again. */
-		
 		/* Lets get a 1MB chunks every time or clustersize which ever */
 		/* is greater or the number of bit asked */
 		extent = ((1 * ONE_MEGA_BYTE) > osb->vol_layout.cluster_size) ?
@@ -2969,7 +2976,7 @@
 		
 		extent = OCFS_ALIGN (extent, ONE_MEGA_BYTE);
 		
-		status = ocfs_get_system_file_size (osb, (extendFileId),
+		status = ocfs_get_system_file_size (osb, alloc_file,
 						    &newFileSize, &allocSize);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
@@ -2982,8 +2989,9 @@
 		if (allocSize > 0)
 			extent *= 2;
 		
-		status = ocfs_extend_system_file (osb, (extendFileId),
-						  newFileSize + extent, NULL, handle);
+		status = ocfs_extend_system_file (osb, alloc_file,
+						  newFileSize + extent, NULL, 
+						  handle, false);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
@@ -2996,12 +3004,13 @@
 		 * do a put_data first! */
 		/* Calculate the new bitmap size */
 
-		status = ocfs_extend_system_file (osb, fileId, bitMapSize, bh, handle);
+		status = ocfs_extend_system_file (osb, bm_file, bitMapSize, bh,
+						  handle, true);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
 		/* we wrote it back out in ocfs_extend_system_file so
 		 * we can trust the sizes here */
 		fileSize = fe->file_size;
@@ -3016,7 +3025,7 @@
 					       allocSize * 8);
 		needs_uninit = true;
 
-		status = ocfs_read_system_file (osb, fileId, bitmap.chunk, 
+		status = ocfs_read_system_file (osb, bm_file, bitmap.chunk, 
 						allocSize, offset);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
@@ -3034,17 +3043,21 @@
 
 	ocfs_set_bits (&bitmap, (__u32) foundBit, (__u32) numBits);
 
+	/* only write out what has changed... */
+	startbh = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit);
+	numblocks = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit + numBits) - startbh + 1;
+
 	/* Write the bitmap file back */
-	status = ocfs_write_system_file (osb, fileId, bitmap.chunk, 
-					 allocSize, offset);
+	status = ocfs_write_bhs(osb, &bitmap.chunk[startbh], numblocks, 
+				0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
 
-	LOG_TRACE_ARGS ("offset=%u.%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
-			foundBit * blockSize, Type, blockSize, foundBit, extendFileId);
-	*DiskOffset = ocfs_file_to_disk_off (osb, (extendFileId),
+	LOG_TRACE_ARGS ("offset=%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
+			foundBit * blockSize, Type, blockSize, foundBit, alloc_file);
+	*DiskOffset = ocfs_file_to_disk_off (osb, (alloc_file),
 					(foundBit * blockSize));
 	if (*DiskOffset == 0) {
 		LOG_ERROR_STATUS(status = -EFAIL);
@@ -3052,6 +3065,7 @@
 	}
 
 	*file_off = (__u64) ((__u64) foundBit * (__u64) blockSize);
+
 	/* this can just fall through */
 	if (*file_off == 0) {
 		LOG_TRACE_ARGS ("offset=%u.%u, type=%x, blksz=%u, foundbit=%u\n",
@@ -3116,7 +3130,7 @@
 		goto leave;
 	}
 
-	dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dir_hdr_bh);
+	dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(dir_hdr_bh); /* read */
 
 	while ((dirnode->node_disk_off != INVALID_NODE_POINTER) &&
 	       (IS_VALID_DIR_NODE (dirnode))) {
@@ -3138,7 +3152,7 @@
 				LOG_ERROR_STATUS (status);
 				goto leave;
 			}
-			dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dir_hdr_bh);
+			dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(dir_hdr_bh); /* read */
 			continue;
 		} else {
 			break;
@@ -3170,12 +3184,23 @@
 	struct buffer_head *extent_bh = NULL;
 	ocfs_file_entry *fe = NULL;
 	struct inode *inode = NULL;
+	__u64 offset;
 
 	LOG_ENTRY ();
-	
-	inode = ocfs_get_inode_from_bh(osb, fe_bh);
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
 
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
+	if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+		offset = fe->extents[0].disk_off;
+	else
+		offset = fe->this_sector;
+	OCFS_BH_PUT_DATA(fe_bh);
+
+	inode = ocfs_get_inode_from_offset(osb, offset, fe_bh);
+	if (inode)
+		SET_BH_SEQNUM(inode, fe_bh);
+
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
+
 	if (fe->local_ext) {
 		for (i = 0; i < fe->next_free_ext; i++) {
 			numBitsAllocated = (__u32) (fe->extents[i].num_bytes /
@@ -3199,7 +3224,7 @@
 				LOG_ERROR_STATUS (status);
 				goto leave;
 			}
-			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
 			if ((fe->granularity && (!IS_VALID_EXTENT_HEADER(extent))) || !IS_VALID_EXTENT_DATA(extent)) {
 				status = -EINVAL;
 				LOG_ERROR_STATUS(status);
@@ -3346,7 +3371,7 @@
 	if (!local_alloc_bh)
 		local_alloc_bh = osb->local_alloc_bh;
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(local_alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(local_alloc_bh); /* read */
 	if (alloc->alloc_size == 0) {
 		OCFS_BH_PUT_DATA(local_alloc_bh);
 		LOG_TRACE_STR("nothing to sync!");
@@ -3392,7 +3417,7 @@
 		}
 	}
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(local_alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(local_alloc_bh); /* read */
 
 	LOG_TRACE_ARGS("alloc->alloc_size = %u, COUNT = %u, num_used = %u\n", 
 		       alloc->alloc_size, ocfs_alloc_count_bits(alloc), 
@@ -3446,7 +3471,8 @@
  *
  * pass it the bitmap lock in lock_bh if you have it. 
  */
-static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh)
+static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh, 
+				 ocfs_journal_handle *handle)
 {
 	int status = 0;
 	__u64 alloc_bytes, cluster_off, cluster_count;
@@ -3455,7 +3481,7 @@
 
 	LOG_ENTRY();
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(osb->local_alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(osb->local_alloc_bh); /* read */
 	if (alloc->alloc_size != 0)
 		LOG_TRACE_STR("asking me to alloc a new window over a"
 			      " non-empty one");
@@ -3476,7 +3502,7 @@
 		goto bail;
 	}
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(osb->local_alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */
 
 	alloc->bitmap_start = cluster_off;
 	alloc->alloc_size = cluster_count;
@@ -3492,6 +3518,25 @@
 	LOG_TRACE_ARGS("window alloc_size = %u\n", alloc->alloc_size);
 
 	OCFS_BH_PUT_DATA(osb->local_alloc_bh);
+	if (handle->abort_bits)
+		LOG_ERROR_STR("Multiple window allocations in a transaction "
+			      "-- this is illegal!");
+	else
+		handle->abort_bits = alloc_bitmap_free_head();
+
+	status = ocfs_add_to_bitmap_free_head(osb, handle->abort_bits, 
+					      cluster_count, 
+					      alloc->bitmap_start, -1, 
+					      DISK_ALLOC_VOLUME);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+
+		/* In case of this error, we want to shutdown the
+		 * local alloc bitmap. We'll let shutdown handling
+		 * deal with freeing newly allocated bits. */
+		free_bitmap_free_head(handle->abort_bits);
+		handle->abort_bits = NULL;
+	}
 bail:
 	LOG_EXIT_STATUS(status);
 	return(status);
@@ -3569,7 +3614,6 @@
 	int status = 0, tmpstat;
 	int startoff, tmpoff;
 	__u32 tmpwanted;
-	bool dontdirty = false;
 	/* main bitmap variables. */
 	struct buffer_head *main_bm_bh = NULL;
 	ocfs_lock_res *bm_lock_res = NULL;
@@ -3592,7 +3636,7 @@
 		goto bail;
 	}
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(osb->local_alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
 tryagain:
 	/* If we need to initialize a new window, do so now. */
 	if (alloc->alloc_size == 0) {
@@ -3600,23 +3644,31 @@
 		alloc = NULL;
 		LOG_TRACE_STR("Allocating a new window...");
 
-		status = ocfs_alloc_new_window(osb, main_bm_bh);
+		status = ocfs_alloc_new_window(osb, main_bm_bh, handle);
 		if (status < 0) {
 			if (status == -ENOSPC) {
+				/* TODO: Remove this printk */
 				printk("ocfs: disabling local alloc "
 				       "bitmap for this mount.\n");
-				/* at this point, we shouldn't have
-				 * anything allocated for the local
-				 * alloc, so shutting it down won't
-				 * wind up free'ing anything... */
-				ocfs_shutdown_local_alloc(osb, NULL, true);
+
+				ocfs_shutdown_local_alloc(osb, NULL, false);
+
+				/* we want to make sure an empty alloc
+				 * hits disk. */
+				ocfs_handle_set_sync(handle, true);
+
+				/* the bh might not have been dirtied to
+				 * the journal yet. */
+				tmpstat = ocfs_journal_dirty(handle, 
+							  osb->local_alloc_bh);
+				if (tmpstat < 0)
+					LOG_ERROR_STATUS(tmpstat);
 				goto bail;
 			}
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
-		alloc = (ocfs_local_alloc *) 
-			OCFS_BH_GET_DATA(osb->local_alloc_bh);
+		alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
 	}
 
 	/* Alright, try to satisfy the request. */
@@ -3644,15 +3696,14 @@
 			goto bail;
 		}
 
-		status = ocfs_sync_local_to_main(osb, &(osb->alloc_free_head),
+		status = ocfs_sync_local_to_main(osb, &(handle->commit_bits),
 						 NULL, main_bm_bh);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto bail;
 		}
 
-		alloc = (ocfs_local_alloc *) 
-			OCFS_BH_GET_DATA(osb->local_alloc_bh);
+		alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
 
 		ocfs_clear_local_alloc(alloc);
 
@@ -3666,16 +3717,7 @@
 		 * alloc put back! */
 		ocfs_handle_set_sync(handle, true);
 
-		/* skip our own abort handling. */
-		status = journal_dirty_metadata(handle->k_handle, 
-						osb->local_alloc_bh);
-		dontdirty = true;
-		if (status < 0) {
-			LOG_ERROR_STATUS(status);
-			goto bail;
-		}
-		alloc = (ocfs_local_alloc *) 
-			OCFS_BH_GET_DATA(osb->local_alloc_bh);
+		alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
 		goto tryagain;
 	}
 
@@ -3698,12 +3740,10 @@
 	OCFS_BH_PUT_DATA(osb->local_alloc_bh);
 	alloc = NULL;
 
-	if (!dontdirty) {
-		status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
-		if (status < 0) {
-			LOG_ERROR_STATUS(status);
-			goto bail;
-		}
+	status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
 	}
 bail:
 	/* if we locked the main bitmap, cleanup after ourselves. */
@@ -3820,7 +3860,7 @@
 		goto leave;
 	}
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(alloc_bh); /* write */
 
 	memset(alloc, 0, sizeof(ocfs_local_alloc));
 	strcpy (alloc->signature, OCFS_LOCAL_ALLOC_SIGNATURE);
@@ -3864,7 +3904,7 @@
 		goto bail;
 	}
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(alloc_bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(alloc_bh); /* read */
 
 	/* do a little verification. */
 	num_used = ocfs_alloc_count_bits(alloc);
@@ -3918,8 +3958,6 @@
 	else
 		bh = osb->local_alloc_bh;
 
-	if (osb->alloc_free_head)
-		LOG_TRACE_STR("Shutting down with a pending bitmap_free_head!");
 	status = ocfs_sync_local_to_main(osb, &f, NULL, NULL);
 	if (status < 0)
 		LOG_ERROR_STATUS(status);
@@ -3931,7 +3969,7 @@
 		f = NULL;
 	}
 
-	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(bh);
+	alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
 	ocfs_clear_local_alloc(alloc);
 	OCFS_BH_PUT_DATA(bh);
 

Modified: trunk/src/bitmap.c
===================================================================
--- trunk/src/bitmap.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/bitmap.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -160,13 +160,14 @@
 	  globalsize, bitmap->validbits, sysonly);*/
 	/*LOG_TRACE_ARGS("before loop: c=%u, lastbh=%u, size=%u, "
 	  "localstart=%u\n", c, lastbh, size, localstart);*/
-	buffer = OCFS_BH_GET_DATA(currbh);
+	buffer = OCFS_BH_GET_DATA_READ(currbh); /* read */
 
 	while ((bitoff = find_next_zero_bit(buffer, OCFS_BITS_IN_CHUNK, 
 					    localstart)) != -1) {
 		/*LOG_TRACE_ARGS("c=%u, globaloff=%u, bitoff=%u, "
 			       "localstart=%u\n", c, globaloff, bitoff, 
 			       localstart);*/
+
 		/* find_next_zero_bit returns:
 		   >= size passed in: if no zero bits in here.
 		   some number < size: at the next zero bit
@@ -188,9 +189,10 @@
 			localstart = bitoff = 0;
 			c++;
 			currbh = bitmap->chunk[c];
-			buffer = OCFS_BH_GET_DATA(currbh);
+			buffer = OCFS_BH_GET_DATA_READ(currbh); /* read */
 			if (c == lastbh)
 				size = globalsize-(OCFS_BITS_IN_CHUNK*lastbh);
+			globaloff = c * OCFS_BITS_IN_CHUNK;
 			continue;
 		}
 
@@ -234,24 +236,21 @@
  */
 int ocfs_count_bits (ocfs_alloc_bm * bitmap)
 {
-	__u32 size, count = 0, i, j;
+	__u32 count = 0, i, j;
 	struct buffer_head *currbh;
 	unsigned char tmp;
 	__u8 *buffer;
-	__u32 validbh;
+	int validbytes, size;
 
 	LOG_ENTRY ();
 
 	size = (bitmap->validbits >> 3);
-	validbh = OCFS_ALIGN(bitmap->validbits, OCFS_BITS_IN_CHUNK) / 
-		  OCFS_BITS_IN_CHUNK;
-
-	for (i = 0; i < validbh; i++) {
+	
+	for (i = 0, validbytes = (size >= OCFS_BITMAP_CHUNK ? OCFS_BITMAP_CHUNK : size);
+	     size > 0; size -= validbytes, i++) {
 		currbh = bitmap->chunk[i];
-		buffer = OCFS_BH_GET_DATA(currbh);
-		for (j = 0; j < (size % OCFS_BITMAP_CHUNK ? 
-				 size % OCFS_BITMAP_CHUNK : 
-				 OCFS_BITMAP_CHUNK); j++) {
+		buffer = OCFS_BH_GET_DATA_READ(currbh); /* read */
+		for (j = 0; j < validbytes; j++) {
 			memcpy (&tmp, buffer, 1);
 			count += BITCOUNT (tmp);
 			buffer++;
@@ -290,7 +289,7 @@
 	local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
 	currbh = bitmap->chunk[i];
 
-	buff = OCFS_BH_GET_DATA(currbh);
+	buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
 
 	while (num--) {
 		set_bit (local++, buff);
@@ -299,7 +298,7 @@
 			OCFS_BH_PUT_DATA(currbh);
 			i++;
 			currbh = bitmap->chunk[i];
-			buff = OCFS_BH_GET_DATA(currbh);
+			buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
 		}
 	}
 
@@ -331,7 +330,7 @@
 	local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
 	currbh = bitmap->chunk[i];
 
-	buff = OCFS_BH_GET_DATA(currbh);
+	buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
 
 	while (num--) {
 		clear_bit (local++, buff);
@@ -340,7 +339,7 @@
 			OCFS_BH_PUT_DATA(currbh);
 			i++;
 			currbh = bitmap->chunk[i];
-			buff = OCFS_BH_GET_DATA(currbh);
+			buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
 		}
 	}
 

Modified: trunk/src/dcache.c
===================================================================
--- trunk/src/dcache.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/dcache.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -82,7 +82,7 @@
                 goto bail;
         }            
 	
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
         /* we now have a file entry to call read_inode */
         q.name = fe->filename;
@@ -145,7 +145,7 @@
 	spin_unlock (&dcache_lock);
 	ret = 1;
 
-      bail:
+bail:
 	LOG_EXIT_LONG (ret);
 	return ret;
 }				/* ocfs_empty */

Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/dir.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -100,7 +100,7 @@
                             } else {
                                     if (ocfs_find_files_on_disk (osb, rootOff, NULL, &entry_bh, ofile, inode) < 0)
 					    break;
-				    entry = (ocfs_file_entry *) OCFS_BH_GET_DATA(entry_bh);
+				    entry = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(entry_bh); /* read */
                                     r=filldir (dirent, entry->filename, strlen (entry->filename), filp->f_pos,
                                                  LO (entry->this_sector), DT_UNKNOWN);
                             	    if (r < 0) {
@@ -303,25 +303,18 @@
 	__u64 offset;
 	ocfs_dir_node *DirNode = NULL;
 	ocfs_file_entry *fe = NULL;
-	bool sync_hdr_write = false, sync_fe_write = false;
-	bool cached_hdr_write = false, cached_fe_write = false;
+	bool sync_fe_write = false;
 	
 	LOG_ENTRY ();
 
-	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 	
 	offset = DirNode->node_disk_off + ((idx + 1) * 512);
 
-	if ((DISK_LOCK_CURRENT_MASTER (DirNode) == osb->node_num) &&
-	    (DISK_LOCK_FILE_LOCK (DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK)) {
-		cached_hdr_write = true;
-	} else
-		sync_hdr_write = true;
-
 	OCFS_BH_PUT_DATA(bhs[0]);
 
 	if (idx != -1) {
-		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[idx+1]);
+		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bhs[idx+1]); /* read */
 
 		if(!IS_VALID_FILE_ENTRY(fe)) {
 			OCFS_BH_PUT_DATA(bhs[idx+1]);
@@ -329,29 +322,16 @@
 			goto bail;
 		}
 
-		if ((DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num) &&
-		    (DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK)) {
-			cached_fe_write = true;
-			if (!cached_hdr_write)
-				sync_fe_write = true;
-		} else {
-			ocfs_down_sem(&(osb->map_lock), true); 
-			ocfs_remove_extent_map_entry(osb, &osb->metadata_map, offset, 512); 
-			ocfs_remove_extent_map_entry(osb, &osb->trans_map, offset, 512); 
-			ocfs_up_sem(&(osb->map_lock)); 
-			sync_fe_write = true;
-			sync_hdr_write = true;
-		}
+		ocfs_down_sem(&(osb->map_lock), true); 
+		ocfs_remove_extent_map_entry(osb, &osb->metadata_map, offset, 512); 
+		ocfs_remove_extent_map_entry(osb, &osb->trans_map, offset, 512); 
+		ocfs_up_sem(&(osb->map_lock)); 
+		sync_fe_write = true;
 		
 		OCFS_BH_PUT_DATA(bhs[idx+1]);
 	}
 
 	/* Write the file entry at idx, if given */
-	if (cached_fe_write) {
-		status = ocfs_write_bh (osb, bhs[idx+1], OCFS_BH_CACHED, file_inode);
-		if (status < 0)
-			LOG_ERROR_STATUS (status);
-	}
 	if (sync_fe_write) {
 		status = ocfs_write_bh (osb, bhs[idx+1], 0, file_inode);
 		if (status < 0)
@@ -359,18 +339,10 @@
 	}
 	
 	/* Write the first sector last */
-	if (cached_hdr_write) {
-		status = ocfs_write_bh (osb, bhs[0], OCFS_BH_CACHED, dir_inode);
-		if (status < 0)
-			LOG_ERROR_STATUS (status);
-	}
-	if (sync_hdr_write) {
-		status = ocfs_write_bh (osb, bhs[0], 0, dir_inode);
-		if (status < 0)
-			LOG_ERROR_STATUS (status);
-	}
+	status = ocfs_write_bh (osb, bhs[0], 0, dir_inode);
+	if (status < 0)
+		LOG_ERROR_STATUS (status);
 
-	//IF_TRACE (ocfs_print_dir_node (osb, DirNode));
 bail:
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -399,7 +371,7 @@
 		goto bail;
 	}
 
-	tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+	tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 	memcpy(DirNode, tmp, 512);
 	OCFS_BH_PUT_DATA(bhs[0]);
 
@@ -410,6 +382,7 @@
 
 
 	if (!IS_VALID_DIR_NODE (DirNode)) {
+		LOG_TRACE_STR("Invalid Dir Node!\n");
 		bRet = false;
 		goto bail;
 	}
@@ -424,11 +397,7 @@
 			}
 
 			if (found) {
-				fe = FILEENT_GETBH(DirNode, bhs, i);
-				if (fe == NULL) {
-					// fe is locked by this kernel thread
-					continue;
-				}
+				fe = FILEENT_GETBH(DirNode, bhs, i); /* read */
 	
 				if (fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED ||
 				    !(fe->sync_flags & OCFS_SYNC_FLAG_VALID)) {
@@ -471,11 +440,12 @@
 				goto bail;
 			}
 
-			tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+			tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 			memcpy(DirNode, tmp, 512);
 			OCFS_BH_PUT_DATA(bhs[0]);
 
 			if (!IS_VALID_DIR_NODE (DirNode)) {
+				LOG_TRACE_STR("Invalid Dir Node!\n");
 				bRet = false;
 				goto bail;
 			}
@@ -492,7 +462,7 @@
 		OFile->curr_byte_off = i + 1;
 	}
 
-      bail:
+bail:
 	if (DirNode)
 		ocfs_safefree(DirNode);
 
@@ -504,6 +474,7 @@
 /*
  * ocfs_find_index()
  *
+ * Locks the dirnode bh, and then only one fe at a time.
  */
 static bool ocfs_find_index (ocfs_super * osb, struct buffer_head *bhs[], struct qstr * FileName, int *Index)
 {
@@ -516,7 +487,7 @@
 
 	LOG_ENTRY ();
 
-	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 	if (!IS_VALID_DIR_NODE (DirNode) || FileName==NULL) {
 		ret = false;
 		goto bail;
@@ -527,15 +498,11 @@
 
 	if (DirNode->index_dirty) {
 		for (index = start; index < DirNode->num_ent_used; index++) {
-			fe = FILEENT_GETBH(DirNode, bhs, index);
-			if (fe == NULL) {
-				// fe is locked by this kernel thread
-				continue;
-			}
+			fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
 
 			if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
 			    (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
-				OCFS_BH_PUT_DATA(bhs[index+1]);
+				FILEENT_PUTBH(DirNode, bhs, index);
 				continue;
 			}
                         q.name = fe->filename;
@@ -556,11 +523,7 @@
 
 	for (lowBnd = start, upBnd = (DirNode->num_ent_used - start); upBnd; upBnd >>= 1) {
 		index = lowBnd + (upBnd >> 1);
-		fe = FILEENT_GETBH(DirNode, bhs, index);
-		if (fe == NULL) {
-			// fe is locked by this kernel thread
-			continue;
-		}
+		fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
 
 		if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
 		    (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
@@ -568,11 +531,7 @@
 			FILEENT_PUTBH(DirNode, bhs, index);
 
 			for (index = lowBnd; index < (lowBnd + upBnd); index++) {
-				fe = FILEENT_GETBH(DirNode, bhs, index);
-				if (fe == NULL) {
-					// fe is locked by this kernel thread
-					continue;
-				}
+				fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
 				if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
 				    (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
 					FILEENT_PUTBH(DirNode, bhs, index);
@@ -634,7 +593,7 @@
 {
 	int status = 0;
 	ocfs_dir_node *dir = NULL;
-	ocfs_file_entry *target;
+	ocfs_file_entry *target = NULL;
 	ocfs_file_entry *fe;
 	__u32 i;
 	__u8 offset = 0;
@@ -652,6 +611,7 @@
 			LOG_ERROR_STATUS (status = -ENOMEM);
 			goto leave;
 		}
+		memset(arr, 0, bufsz);
 
 		status = ocfs_read_bhs (osb, DirNodeOffset, 
 					osb->vol_layout.dir_node_size, arr, 
@@ -663,7 +623,7 @@
 	} else
 		arr = bhs;
 
-	dir = (ocfs_dir_node *)OCFS_BH_GET_DATA(arr[0]);
+	dir = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(arr[0]); /* write */
 	if (!IS_VALID_DIR_NODE (dir)) {
 		OCFS_BH_PUT_DATA(arr[0]);
 		LOG_ERROR_STATUS(status = -EINVAL);
@@ -673,18 +633,25 @@
 	if (dir->index_dirty) {
 		offset = dir->bad_off;
 
-		target = (ocfs_file_entry *)OCFS_BH_GET_DATA(arr[offset+1]);
+		/* To preserve locking order, (we only want to lock 1
+		 * fe at a time, in incremental order), we copy this
+		 * one off. */
+		target = ocfs_allocate_file_entry();
+		if (target == NULL) {
+			OCFS_BH_PUT_DATA(arr[0]);
+			LOG_ERROR_STATUS(status = -ENOMEM);
+			goto leave;
+		}
+		memcpy(target, OCFS_BH_GET_DATA_READ(arr[offset+1]), 
+		       sizeof(ocfs_file_entry)); /* read */
+		OCFS_BH_PUT_DATA(arr[offset+1]);
 
 		for (i = 0; i < dir->num_ent_used; i++) {
 			/* don't need to check ourselves */
 			if (dir->index[i] == offset)
 				continue;
 
-			fe = FILEENT_GETBH(dir, arr, i);
-			if (fe == NULL) {
-				// buffer is locked, like the rename case
-				continue;
-			}
+			fe = FILEENT_GETBH(dir, arr, i); /* read */
 
 			if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
 			    (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
@@ -701,8 +668,6 @@
 			FILEENT_PUTBH(dir, arr, i);
 		}
 
-		OCFS_BH_PUT_DATA(arr[offset+1]);
-
 		if (i < dir->num_ent_used - 1) {
 			memmove (&dir->index[i+1], &dir->index[i], 
 				 dir->num_ent_used - i);
@@ -712,17 +677,22 @@
 		dir->index_dirty = 0;
 		OCFS_BH_PUT_DATA(arr[0]);
 
-		status = ocfs_write_dir_node (osb, arr, -1, dir_inode, NULL);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
+		if (!handle) {
+			status = ocfs_write_dir_node (osb, arr, -1, dir_inode, 
+						      NULL);
+			if (status < 0) {
+				LOG_ERROR_STATUS (status);
+				goto leave;
+			}
 		}
 	} else
 		OCFS_BH_PUT_DATA(arr[0]);
 
-      leave:
+leave:
 	if (bhs == NULL)
 		ocfs_safefree (arr);
+	if (target)
+		ocfs_release_file_entry(target);
 
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -745,7 +715,7 @@
 
 	LOG_ENTRY ();
 
-	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
 
 	if (!IS_VALID_DIR_NODE (DirNode)) {
 		LOG_ERROR_STATUS(status = -EINVAL);
@@ -762,7 +732,7 @@
 			LOG_ERROR_STATUS (status);
 			goto bail;
 		}
-		DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+		DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
 	}
 
 	/* Should status be updated here? */
@@ -784,21 +754,17 @@
 			status = -EEXIST;
 			goto bail;
 		}
-		DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+		DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
 		
 		if (index < DirNode->num_ent_used) {
-			fe = FILEENT_GETBH(DirNode, bhs, index);
-			if (fe == NULL) {
-				// fe is locked by this kernel thread
-				index = 0;
-			} else {
-				res = strcmp (fe->filename, InsertEntry->filename);
-				FILEENT_PUTBH(DirNode, bhs, index);
-				if (res > 0) {
-					/* We are greater than the entry in question we
-				 	* should be less than the one next to it */
-					index++;
-				}
+			fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
+
+			res = strcmp (fe->filename, InsertEntry->filename);
+			FILEENT_PUTBH(DirNode, bhs, index);
+			if (res > 0) {
+				/* We are greater than the entry in question we
+				 * should be less than the one next to it */
+				index++;
 			}
 		}
 	} else {
@@ -819,7 +785,7 @@
 			freeOffset = DirNode->first_del;
 			DirNode->num_del--;
 			if (DirNode->num_del) {
-				lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[freeOffset+1]);
+				lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bhs[freeOffset+1]); /* read */
 				DirNode->first_del = lastEntry->next_del;
 				OCFS_BH_PUT_DATA(bhs[freeOffset+1]);
 			}
@@ -849,7 +815,7 @@
 	}
 
 	/* Put the entry at the end */
-	lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[freeOffset+1]);
+	lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bhs[freeOffset+1]); /* write */ /* journal access */
 	InsertEntry->dir_node_ptr = DirNode->node_disk_off;
 	memcpy (lastEntry, InsertEntry, osb->sect_size);
 	OCFS_SET_FLAG (lastEntry->sync_flags, OCFS_SYNC_FLAG_VALID);
@@ -898,7 +864,6 @@
 	struct buffer_head *dirbh = NULL;
 	struct buffer_head **dirbhs = NULL;
 	__u64 lock_off, head_del, parent_off;
-	bool journal_lockbh = false;
 	const int numbhs = 256;
 	const int length = numbhs * sizeof(struct buffer_head *);
 
@@ -912,10 +877,8 @@
 	}
 	memset(dirbhs, 0, length);
 	
-	EntryToDel = (ocfs_file_entry *) OCFS_BH_GET_DATA(febh);
-
-	/* briefly grab LockNode and get useful bits of info */	
-	LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(lockbh);
+	/* briefly grab LockNode and get useful bits of info. */
+	LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(lockbh); /* read */
 	lock_off = LockNode->node_disk_off;
 	head_del = LockNode->head_del_ent_node;
 	OCFS_BH_PUT_DATA(lockbh);
@@ -927,13 +890,13 @@
 		goto leave;
 	}
 
+	EntryToDel = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(febh); /* write */
+
 	/* if fe comes from lower down in the dir chain, get the ocfs_dir_node 
 	 * for that chain.  otherwise, use the lockbh (toplevel) */
 	if (EntryToDel->dir_node_ptr == lock_off) {
-		journal_lockbh = false;
 		dirbhs[0] = lockbh;
 	} else {
-		journal_lockbh = true;
 		status = ocfs_read_bh(osb, EntryToDel->dir_node_ptr, 
 				      &dirbh, OCFS_BH_CACHED, dir_inode);
 		if (status < 0) {
@@ -942,14 +905,14 @@
 		}
 		dirbhs[0] = dirbh;
 
-		status= ocfs_journal_access(handle, dirbh, 
+		status= ocfs_journal_access(handle, dirbhs[0], 
 					    OCFS_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 	}
-	PDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dirbhs[0]);
+	PDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(dirbhs[0]); /* write */ /* journal access */
 	parent_off = PDirNode->node_disk_off;
 	offset= ((EntryToDel->this_sector - parent_off) >> 9) - 1;
 	for (index = 0; index < PDirNode->num_ent_used; index++)
@@ -996,12 +959,10 @@
 	EntryToDel = NULL;
 	PDirNode = NULL;
 
-	LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(lockbh);
-	if (LockNode->head_del_ent_node == INVALID_NODE_POINTER) {
-		if (lock_off != parent_off)
-			journal_lockbh = true;
+	LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(lockbh); /* write */ /* journal access */
+	if (LockNode->head_del_ent_node == INVALID_NODE_POINTER)
 		LockNode->head_del_ent_node = parent_off;
-	}
+
 	OCFS_BH_PUT_DATA(lockbh);
 	LockNode = NULL;
 
@@ -1010,12 +971,10 @@
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
-	if (journal_lockbh) {
-		status = ocfs_journal_dirty(handle, lockbh);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
-		}
+	status = ocfs_journal_dirty(handle, lockbh);
+	if (status < 0) {
+		LOG_ERROR_STATUS (status);
+		goto leave;
 	}
 
 leave:
@@ -1093,7 +1052,7 @@
 	DISK_LOCK_READER_NODE (InsertEntry) = osb->node_num;
 
 	/* route the new file entry to the proper dir_off */
-	LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+	LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(lock_bh); /* read */
 	locknode_off = LockNode->node_disk_off;
 	locknode_head_del = LockNode->head_del_ent_node;
 
@@ -1123,7 +1082,7 @@
 	}
 
 	/* see if it fits at dir_off */
-	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 	dir_num_ent_used = DirNode->num_ent_used;
 	dir_next_node = DirNode->next_node_ptr;
 	dir_cache_lock = ((DISK_LOCK_FILE_LOCK(DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK) && 
@@ -1158,7 +1117,7 @@
 			goto leave;
 		}
 
-		DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+		DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 		dir_num_ent_used = DirNode->num_ent_used;
 		dir_next_node = DirNode->next_node_ptr;
 		OCFS_BH_PUT_DATA(bhs[0]);
@@ -1182,7 +1141,7 @@
 		}
 	}
 		
-	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+	DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
 	dir_num_ent_used = DirNode->num_ent_used;
 	dir_next_node = DirNode->next_node_ptr;
 	dir_cache_lock = ((DISK_LOCK_FILE_LOCK(DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK) && 
@@ -1204,12 +1163,14 @@
 		goto leave;
 	}
 
-	if (new_head_del != 0) {
-		LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+	/* we always want to mark lock_bh at least once as it's going
+	 * to the journal, so leave this get_data_write outside of the
+	 * if statement. */
+	LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(lock_bh); /* write */ /* journal access */
+	if (new_head_del != 0)
 		LockNode->head_del_ent_node = new_head_del;
-		OCFS_BH_PUT_DATA(lock_bh);
-		LockNode = NULL;
-	}
+	OCFS_BH_PUT_DATA(lock_bh);
+	LockNode = NULL;
 
 	/* If we have a list of dir nodes go to the last dirnode */
 	/* and insert in that. */
@@ -1296,11 +1257,11 @@
 					LOG_ERROR_STATUS (status);
 					goto leave;
 				}
-				buf = OCFS_BH_GET_DATA(newbhs[i]);
+				buf = OCFS_BH_GET_DATA_WRITE(newbhs[i]); /* write */
 				memset(buf, 0, 512);
 				OCFS_BH_PUT_DATA(newbhs[i]);
 			}
-			pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(newbhs[0]);
+			pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(newbhs[0]); /* write */
 			ocfs_initialize_dir_node (osb, pNewDirNode, 
 						  bitmapOffset, fileOffset, 
 						  osb->node_num);
@@ -1308,7 +1269,7 @@
 			pNewDirNode = NULL;
 		}
 
-		pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(newbhs[0]);
+		pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(newbhs[0]); /* write */
 		new_disk_off = pNewDirNode->node_disk_off;
 		if (dir_cache_lock) {
 			DISK_LOCK_CURRENT_MASTER (pNewDirNode) = osb->node_num;
@@ -1323,13 +1284,13 @@
 					       file_inode);
 
 		if (status >= 0) {
-			LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+			LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(lock_bh); /* write */
 			LockNode->free_node_ptr = new_disk_off;
 			OCFS_BH_PUT_DATA(lock_bh);
 			LockNode = NULL;
 
 			/* Setup the pointer to this new directory block */
-			DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+			DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
 			DirNode->next_node_ptr = new_disk_off;
 			OCFS_BH_PUT_DATA(bhs[0]);
 			DirNode = NULL;

Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/dlm.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -189,7 +189,7 @@
 	}
 
 	for (i = 0; i < numnodes; i++) {
-		p = OCFS_BH_GET_DATA(bhs[i]);
+		p = OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
 		pubsect = (ocfs_publish *) p;
 		if (pubsect->time == (__u64) 0 || pubsect->publ_seq_num <= largestseqno) {
 			OCFS_BH_PUT_DATA(bhs[i]);
@@ -220,7 +220,7 @@
 
 	/* Increment the largest sequence number by one & */
 	/* write it in its own Publish Sector and set the Dirty Bit */
-	p = OCFS_BH_GET_DATA(bhs[osb->node_num]);
+	p = OCFS_BH_GET_DATA_WRITE(bhs[osb->node_num]); /* write */
 	pubsect = (ocfs_publish *)p;
 	largestseqno++;
 	LOG_TRACE_ARGS ("largestseqno : %u.%u\n", HILO (largestseqno));
@@ -277,6 +277,8 @@
 	__u32 timewaited = 0;
 	ocfs_file_entry *fe = NULL;
 	struct buffer_head *bh = NULL;
+	__u32 curr_master;
+	__u8 lock_level;
 
 	LOG_ENTRY ();
 
@@ -292,42 +294,39 @@
 			LOG_ERROR_STATUS (status = tmpstat);
 			goto finally;
 		}
-		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+		curr_master = DISK_LOCK_CURRENT_MASTER (fe);
+		lock_level = DISK_LOCK_FILE_LOCK (fe);
+		OCFS_BH_PUT_DATA(bh);
 
 		/* This will always be zero when the first Node comes up after reboot */
 		/* (for volume lock) */
-		if ((DISK_LOCK_CURRENT_MASTER (fe) == OCFS_INVALID_NODE_NUM) ||
-		    (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) {
+		if ((curr_master == OCFS_INVALID_NODE_NUM) ||
+		    (curr_master == osb->node_num)) {
 			goto got_it;
 		}
 
-		if (!IS_NODE_ALIVE (osb->publ_map,
-				    DISK_LOCK_CURRENT_MASTER (fe),
-				    OCFS_MAXIMUM_NODES)) {
-//			LOG_TRACE_ARGS ("old_ocfs_recover_vol(%u)\n",
-//					DISK_LOCK_CURRENT_MASTER (fe));
-//			old_ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(fe));
-
+		if (!IS_NODE_ALIVE (osb->publ_map, curr_master, OCFS_MAXIMUM_NODES)) {
 			/* Reset the lock as not owned and return success?? */
 			/* This needs to be under some sort of cluster wide lock */
+			fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 			DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
 			DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+			OCFS_BH_PUT_DATA(bh);
 			goto got_it;
 		}
 
 		/* If we are here in the code it means the local node is not the master */
-		if (DISK_LOCK_FILE_LOCK (fe) <= lock_type)
+		if (lock_level <= lock_type)
 			goto got_it;
 		
-		OCFS_BH_PUT_DATA(bh);
 		brelse(bh);
 		ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
 		timewaited += WAIT_FOR_VOTE_INCREMENT;
 		continue;
 got_it:
+		brelse(bh);
 		status = 0;
-		OCFS_BH_PUT_DATA(bh);
-		brelse(bh);
 		break;
 	}
 
@@ -347,6 +346,8 @@
 	__u32 timewaited = 0;
 	ocfs_file_entry *fe = NULL;
 	struct buffer_head *bh = NULL;
+	__u32 curr_master;
+	__u8 lock_level;
 
 	LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x, %u)\n", osb,
 			HI (offset), LO (offset), time_to_wait,
@@ -361,54 +362,53 @@
 			LOG_ERROR_STATUS (status = tmpstat);
 			goto finally;
 		}
-		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+		curr_master = DISK_LOCK_CURRENT_MASTER (fe);
+		lock_level = DISK_LOCK_FILE_LOCK (fe);
+		OCFS_BH_PUT_DATA(bh);
 
-		if ((DISK_LOCK_CURRENT_MASTER (fe) == OCFS_INVALID_NODE_NUM) ||
-		    (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) {
+		if ((curr_master == OCFS_INVALID_NODE_NUM) ||
+		    (curr_master == osb->node_num)) {
 			goto got_it;
 		}
 
-		if (!IS_NODE_ALIVE (osb->publ_map, DISK_LOCK_CURRENT_MASTER(fe),
-				    OCFS_MAXIMUM_NODES)) {
-//			LOG_ERROR_ARGS ("old_ocfs_recover_vol(%u)",
-//					DISK_LOCK_CURRENT_MASTER (fe));
-//			old_ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(fe));
-
+		if (!IS_NODE_ALIVE (osb->publ_map, curr_master, OCFS_MAXIMUM_NODES)) {
 			/* Reset the lock as not owned and return success?? */
 			/* This needs to be under some sort of cluster wide lock, */
+			fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 			DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
 			DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+			OCFS_BH_PUT_DATA(bh);
 			goto got_it;
 		}
 
 		/* The local node is not the master */
-		if (DISK_LOCK_FILE_LOCK (fe) >= OCFS_DLM_ENABLE_CACHE_LOCK) {
+		if (lock_level == OCFS_DLM_ENABLE_CACHE_LOCK) {
 			int tmpstat;
 
-			lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
-			lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
+			lockres->lock_type = lock_level;
+			lockres->master_node_num = curr_master;
 			status = ocfs_break_cache_lock (osb, lockres, inode);
 			if (status < 0) {
 				if (status != -EINTR)
 					LOG_ERROR_STATUS (status);
 				goto finally;
 			}
-			OCFS_BH_PUT_DATA(bh);
-			brelse(bh);
 			tmpstat = ocfs_read_bh (osb, offset, &bh, 0, inode);
 			if (tmpstat < 0) {
 				LOG_ERROR_STATUS (tmpstat);
 				status = tmpstat;
 				goto finally;
 			}
-			fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+			fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 			DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+			lock_level = OCFS_DLM_NO_LOCK;
+			OCFS_BH_PUT_DATA(bh);
 		}
 
-		if (DISK_LOCK_FILE_LOCK (fe) <= lock_type)
+		if (lock_level <= lock_type)
 			goto got_it;
 	
-		OCFS_BH_PUT_DATA(bh);
 		brelse(bh);
 		ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
 		timewaited += WAIT_FOR_VOTE_INCREMENT;
@@ -421,18 +421,18 @@
 finally:
 	if (lockres && status >= 0) {
 		ocfs_acquire_lockres (lockres);
-		if (fe) {
-			lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
-			lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
-			lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
-			lockres->last_lock_upd = DISK_LOCK_LAST_WRITE (fe);
-		} else
-			LOG_ERROR_STR("fe was null!");
+		fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+		lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
+		lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
+		lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
+		lockres->last_lock_upd = DISK_LOCK_LAST_WRITE (fe);
 		ocfs_release_lockres (lockres);
 		OCFS_BH_PUT_DATA(bh);
-		brelse(bh);
 	}
 
+	if (bh)
+		brelse(bh);
+
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_wait_for_lock_release */
@@ -520,7 +520,7 @@
 		bool node_in_map = (IS_NODE_ALIVE (vote_map, i, numnodes));
 
 		status = 0;
-		p = OCFS_BH_GET_DATA(bhs[i]);
+		p = OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
 		vote = (ocfs_vote *) p;
 
 		/* A node we were asking to vote is dead */
@@ -586,7 +586,7 @@
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
-	pubsect = (ocfs_publish *)OCFS_BH_GET_DATA(bh);
+	pubsect = (ocfs_publish *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 
 	pubsect->dirty = false;
 	pubsect->vote = 0;
@@ -607,7 +607,7 @@
 
 	atomic_set (&osb->node_req_vote, 0);
 
-      finally:
+finally:
 	if (bh != NULL)
 		brelse(bh);
 	up (&(osb->publish_lock));
@@ -778,7 +778,7 @@
 			    atomic_read (&lockres->voted_event_woken), 1000);
 	atomic_set (&lockres->voted_event_woken, 0);
 
-      finally:
+finally:
 	ocfs_safefree (dlm_msg);
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -800,7 +800,7 @@
 	LOG_ENTRY ();
 
 	ocfs_acquire_lockres (lockres);
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
 
 	vote_map = osb->publ_map;
 	if (((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) &&
@@ -951,7 +951,7 @@
 		}
 	}
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_acquire_lockres_ex */
@@ -1001,7 +1001,7 @@
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(*bh); /* write */
 
 	if (flags & DLOCK_FLAG_MASTER)
 		DISK_LOCK_CURRENT_MASTER (fe) = lockres->master_node_num;
@@ -1090,7 +1090,7 @@
 		}
 	}
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_update_master_on_open */
@@ -1272,7 +1272,7 @@
 
 	ocfs_release_lockres (lockres);
 
-      bail:
+bail:
 	ocfs_put_lockres(lockres);
 	LOG_EXIT_STATUS (status);
 	return status;
@@ -1368,8 +1368,9 @@
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
-		disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+		disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
 
+#ifdef SUSPICIOUS_CODE
 		// This code is added to avoid the case when fileentry is not yet updated 
 		// but the lockresource is updated by NMthread and needsflush is set to FALSE. 
 		if (lockres->master_node_num != osb->node_num &&
@@ -1379,6 +1380,7 @@
 			ocfs_sleep (1000);
 			goto again;
 		}
+#endif
 
 		if (lockres->master_node_num != osb->node_num || 
 		    lockres->master_node_num != DISK_LOCK_CURRENT_MASTER (disklock)) {
@@ -1417,19 +1419,26 @@
 		fast_path = true;
 	if (local_lock && truncate_extend) {
 #ifdef VERBOSE_LOCKING_TRACE
-		printk("local_lock but an extend or truncate request!  will do a master_request.\n");
+		LOG_TRACE_ARGS("local_lock but an extend or truncate request!  will do a master_request.\n");
 #endif
 		become_master = true;
 	}
 	if (!(fast_path || become_master || get_x || wait_for_release))
 		master_request = true;
 
+	/* hack upon hack... if the cachelock is still sitting around, skip voting */
+	if (!fast_path && (become_master || get_x)) {
+		if (ocfs_journal_new_file_search(osb, lock_id)==0) {
+			fast_path = true;
+		}
+	}
+
 	/* possible locking paths:               */
         /*   fast_path, become_master, get_x,    */
 	/*   wait_for_release, master_request    */
 
 #ifdef VERBOSE_LOCKING_TRACE
-	printk("acquire_lock: lock path is %s\n", 
+	LOG_TRACE_ARGS("acquire_lock: lock path is %s\n", 
 	       fast_path ? "fast_path" : 
 	        (become_master ? "become_master" : 
 		 (get_x ? "get_x" : 
@@ -1441,7 +1450,7 @@
 	if (fast_path) {
 		/* specifically keep an exclusive if we already have one on */
 		/* this node even if we are asking for a cache lock */
-		disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+		disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
 		keep_exclusive = (DISK_LOCK_FILE_LOCK (disklock) == OCFS_DLM_EXCLUSIVE_LOCK);
 		OCFS_BH_PUT_DATA(*b);
 		goto got_lock;
@@ -1546,40 +1555,32 @@
 
 got_lock:
 
-	disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+	disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
 	have_cache_already = (DISK_LOCK_CURRENT_MASTER (disklock) == osb->node_num &&
 			      DISK_LOCK_FILE_LOCK (disklock) == OCFS_DLM_ENABLE_CACHE_LOCK);
-	DISK_LOCK_CURRENT_MASTER (disklock) = osb->node_num;
+	OCFS_BH_PUT_DATA(*b);
 
-	if (!keep_exclusive) {
+	if (!keep_exclusive && !have_cache_already) {
+		disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(*b); /* write */
+		DISK_LOCK_CURRENT_MASTER (disklock) = osb->node_num;
 		DISK_LOCK_FILE_LOCK (disklock) = lock_type;
 		OCFS_BH_PUT_DATA(*b);
 
-		if (have_cache_already)
-			LOG_TRACE_STR("have cachelock already... skip the write");
-		else if (lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) {
-			status = ocfs_write_bh (osb, *b, 0, inode);
-			if (status < 0) {
-				LOG_ERROR_STATUS (status);
-				goto finally;
-			}
+		status = ocfs_write_bh (osb, *b, 0, inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			goto finally;
 		}
-	} else
-		OCFS_BH_PUT_DATA(*b);
-
-	status = ocfs_write_bh (osb, *b, lockflags, inode);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-	} else {
-		/* We got the lock */
-		disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
-		lockres->lock_type = lock_type;
-		lockres->master_node_num = osb->node_num;
-		lockres->oin_openmap = DISK_LOCK_OIN_MAP (disklock);
-		OCFS_BH_PUT_DATA(*b);
-		status = 0;
 	}
 
+	/* We got the lock */
+	disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
+	lockres->lock_type = lock_type;
+	lockres->master_node_num = osb->node_num;
+	lockres->oin_openmap = DISK_LOCK_OIN_MAP (disklock);
+	OCFS_BH_PUT_DATA(*b);
+	status = 0;
+
 skip_lock_write:
 	lockres->lock_holders++;
 	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
@@ -1617,15 +1618,18 @@
 	__u64 oin_node_map;
 	__u32 curr_master;
 	int lockflags = (lock_id >= osb->vol_layout.bitmap_off ? OCFS_BH_CACHED : 0);
+	bool clear_tmp = false;
 
 	LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (lock_id),
 			LO (lock_id), lock_type, flags, lockres);
 
 	if (bh != NULL)
 		b = &bh;
-	else
+	else {
 		b = &tmpbh;
-	
+		clear_tmp = true;
+	}
+
 	if (bh == NULL) {	
 		status = ocfs_read_bh (osb, lock_id, b, lockflags, inode);
 		if (status < 0) {
@@ -1633,7 +1637,7 @@
 			goto finito;
 		}
 	}
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
 	oin_node_map = DISK_LOCK_OIN_MAP (fe);
 	curr_master = DISK_LOCK_CURRENT_MASTER (fe);
 	OCFS_BH_PUT_DATA(*b);
@@ -1757,7 +1761,7 @@
 			LOG_ERROR_STATUS (tmpstat);
 	}
 
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(*b); /* write */
 
 	if (flags & FLAG_FILE_RELEASE_MASTER)
 		DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
@@ -1774,15 +1778,18 @@
 
 	/* Reset the lock on the disk */
 	if (!cachelock) {
-		tmpstat = ocfs_write_bh (osb, *b, lockflags, inode);
+		tmpstat = ocfs_write_bh (osb, *b, 0, inode);
 		if (tmpstat < 0)
 			LOG_ERROR_STATUS (tmpstat);
+		clear_tmp = false;
 	}
 
 finito:
-	if (tmpbh)
+	if (tmpbh) {
+		if (clear_tmp)
+			ocfs_clear_buffer_modified(tmpbh);
 		brelse(tmpbh);
-
+	}
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_disk_release_lock */
@@ -1802,6 +1809,14 @@
 
 	ocfs_acquire_lockres (lockres);
 
+	if (bh) {
+		/* always get the write lock on the bh */
+		/* make sure to do this AFTER the lockres acquire */
+		OCFS_BH_GET_DATA_WRITE(bh);
+		OCFS_BH_PUT_DATA(bh);
+	}
+
+
 	if (lock_type == OCFS_DLM_SHARED_LOCK) {
 		if (atomic_dec_and_test (&lockres->lr_share_cnt)) {
 			if (lockres->lock_type == OCFS_DLM_SHARED_LOCK)
@@ -1837,6 +1852,8 @@
 	}
 
 finally:
+	if (bh)
+		ocfs_clear_buffer_modified(bh);
 	lockres->lock_holders--;
 	LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
 	ocfs_release_lockres (lockres);
@@ -2006,7 +2023,8 @@
 			LOG_TRACE_STR ("Network vote");
 			jif = jiffies;
 			status = ocfs_send_dlm_request_msg (osb, lockres->sector_num,
-							lockres->lock_type, FLAG_FILE_RELEASE_CACHE,
+							lockres->lock_type, 
+							FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE,
 							lockres, votemap);
 			if (status >= 0) {
 				status = lockres->vote_status;
@@ -2031,8 +2049,8 @@
 		disk_reset = false;
 		status = ocfs_request_vote (osb, lockres->sector_num,
 					    lockres->lock_type,
-					    FLAG_FILE_RELEASE_CACHE, votemap,
-					    &lockseqno, inode);
+					    FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE, 
+					    votemap, &lockseqno, inode);
 		if (status < 0) {
 			if (status == -EAGAIN) {
 				retry = true;
@@ -2044,8 +2062,8 @@
 
 		status = ocfs_wait_for_vote (osb, lockres->sector_num,
 					     lockres->lock_type,
-					     FLAG_FILE_RELEASE_CACHE, votemap,
-					     15000, lockseqno, lockres);
+					     FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE, 
+					     votemap, 15000, lockseqno, lockres);
 		if (status < 0) {
 			if (status == -EAGAIN) {
 				retry = true;

Modified: trunk/src/extmap.c
===================================================================
--- trunk/src/extmap.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/extmap.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -217,7 +217,7 @@
 		victim->sectors = sectors;
 	}
 
-      bail:
+bail:
 
 	LOG_EXIT_ULONG (ret);
 	return ret;
@@ -628,7 +628,7 @@
 			status = 0;
 	}
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_update_extent_map */

Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/file.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -41,7 +41,6 @@
 	__u64 parent_off;
 	ocfs_sem *oin_sem = NULL;
 
-
 	LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", inode, file, 
                         file->f_dentry->d_name.len, file->f_dentry->d_name.name);
 	atomic_inc (&parent->i_count);
@@ -66,6 +65,7 @@
 	if (inode_data_is_oin (inode))
 		oin = GET_INODE_OIN(inode);
 	status = -EFAIL;
+
 	if (oin != NULL) {
 		if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN) &&
 			!(oin->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) {
@@ -98,6 +98,8 @@
 			oin = NULL;
 			ocfs_down_sem (&(osb->osb_res), true);
 			status = ocfs_create_oin_from_entry (osb, fe_bh, &oin, parent_off, inode);
+			if (status < 0)
+				LOG_ERROR_STATUS(status);
 			new_oin = true;
 			ocfs_up_sem (&(osb->osb_res));
 		}
@@ -105,7 +107,10 @@
 
 	if (status < 0) {	/* not found on disk or in mem */
 		if (status != -EINTR) {
-			LOG_ERROR_STR ("Open request made for nonexistent file!");
+			LOG_ERROR_ARGS("Open request made for nonexistent "
+				       "file! ('%*s')", 
+				       file->f_dentry->d_name.len, 
+				       file->f_dentry->d_name.name);
 			status = -ENOENT;
 		}
 		goto leave;
@@ -156,6 +161,7 @@
 			goto leave;
 		}
 	}
+
 	if (oin->open_hndl_cnt > 0) {
 		/*  The OIN is currently in use by some thread. */
 		/*  We must check whether the requested access/share access */
@@ -431,7 +437,6 @@
 	int tmpstat;
 	ocfs_file_entry *fileEntry = NULL;
 	__u64 dirOffset = 0;
-	__u32 size;
 	bool bAcquiredLock = false;
 	ocfs_lock_res *pLockResource = NULL;
 	__u64 changeSeqNum = 0;
@@ -452,7 +457,7 @@
 		goto leave;
 	}
 
-	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY(fileEntry)) {
 		LOG_ERROR_ARGS ("Invalid fe at offset %u.%u", HILO (*file_off));
@@ -500,7 +505,7 @@
 		goto leave;
 	}
 
-	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 
 	if (bCacheLock) {
 		DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_ENABLE_CACHE_LOCK;
@@ -517,7 +522,6 @@
 	fileEntry->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE);
 
 	dirOffset = fileEntry->this_sector;
-	size = (__u32) OCFS_SECTOR_ALIGN (sizeof (ocfs_file_entry));
 
 	flags = OCFS_FE_CACHE_FLAGS(osb, fileEntry);
 	OCFS_BH_PUT_DATA(bh);
@@ -551,7 +555,7 @@
 			LOG_ERROR_STATUS (tmpstat);
 		ocfs_put_lockres (pLockResource);
 	}
-	
+
 	if (bh != NULL)
 		brelse(bh);
 
@@ -617,11 +621,22 @@
 		ret = -EIO;
 		goto bail;
 	}
+	
+	if (filp->f_flags & O_APPEND) {
+		LOG_TRACE_ARGS("O_APPEND: inode->i_size=%u, ppos was %u\n",
+			       inode->i_size, *ppos);
+		*ppos = inode->i_size;
+	}
 
 	if (filp->f_flags & O_DIRECT) {
 		/* anything special for o_direct? */
 		LOG_TRACE_STR ("O_DIRECT");
-	} else {
+		if (((*ppos) & 511) || (count & 511) || 
+		    ((unsigned long)buf & 511) || (inode->i_size & 511)) {
+			filp->f_flags &= ~O_DIRECT;
+		}
+	}
+	if (!(filp->f_flags & O_DIRECT)) {
 		/* FIXME: is the down_sem supposed to be here?! */
 		LOG_TRACE_ARGS ("non O_DIRECT write, fileopencount=%d\n",
 				oin->open_hndl_cnt);
@@ -668,13 +683,12 @@
 			HI (*ppos), LO (*ppos), HI (newsize), LO (newsize),
 			HI (inode->i_size), LO (inode->i_size));
 
-	if (newsize > oin->alloc_size) {
+	if (writingAtEOF) {
 		LOG_TRACE_ARGS
 		    ("Will need more allocation: have=%u.%u, need=%u.%u\n",
 		     HI (oin->alloc_size), LO (oin->alloc_size), HI (newsize),
 		     LO (newsize));
 
-
 		status = ocfs_extend_file (osb, oin->parent_dirnode_off, oin, newsize, &oin->file_disk_off, NULL, inode, NULL);
 		if (status < 0) {
 			if (status != -EINTR && status != -ENOSPC) {
@@ -747,6 +761,11 @@
 	if (filp->f_flags & O_DIRECT) {
 		/* anything special for o_direct? */
 		LOG_TRACE_STR ("O_DIRECT");
+
+		if (((*ppos) & 511) || (count & 511) || 
+		    ((unsigned long)buf & 511) || (inode->i_size & 511)) {
+			filp->f_flags &= ~O_DIRECT;
+		}
 	}
 
 	if (OIN_NEEDS_VERIFICATION (oin)) {
@@ -796,7 +815,8 @@
 	ocfs_bitmap_free_head *free_head = NULL;
 	ocfs_journal_handle *handle = NULL;
 
-	LOG_ENTRY ();
+	LOG_ENTRY_ARGS ("(file_off = %u.%u, file_size = %u.%u\n", 
+		   HILO(file_off), HILO(file_size));
 
 	changeSeqNum = osb->curr_trans_id;
 
@@ -815,7 +835,7 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
 	if (!IS_VALID_FILE_ENTRY(fe)) {
 		LOG_ERROR_ARGS ("Invalid fe at offset %u.%u", HILO (file_off));
 		status = -EFAIL;
@@ -858,7 +878,34 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	/* if oin {
+	      take oin->main_res 
+	      take fe bh lock
+	      make file and alloc _size changes
+	      release bh lock
+	      drop oin->mani_res
+          } else {
+ 	      take fe bh lock
+	      make file and alloc _size changes
+	      release bh lock
+	      recheck inode to see if we have oin now
+	      if we have it now {
+	          take oin->main_res 
+		  drop oin->mani_res
+	      }
+	  }
+	*/
+
+	/* alright, we're going to try to get the oin at least twice
+	 * in this function if it hasn't already been passed to
+	 * us.. This is our first try... */
+	if (!oin && inode_data_is_oin(inode))
+		oin = GET_INODE_OIN(inode);
+
+	if (oin)
+		ocfs_down_sem(&oin->main_res, true);
+
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
 	
 	/* the file entry might have changed underneath us (while
 	 * waiting on the lock). make sure the size is still a valid
@@ -869,6 +916,8 @@
 			       "to size (%u.%u)!\n", HILO(fe->file_size), 
 			       HILO(file_size));
 		OCFS_BH_PUT_DATA(bh);
+		if (oin)
+			ocfs_up_sem(&oin->main_res);
 		status = -EINVAL;
 		LOG_ERROR_STATUS(status);
 		goto leave;
@@ -880,6 +929,8 @@
 	status = ocfs_free_extents_for_truncate (osb, fe, handle, free_head, inode);
 	if (status < 0) {
 		OCFS_BH_PUT_DATA(bh);
+		if (oin)
+			ocfs_up_sem(&oin->main_res);
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
@@ -899,13 +950,20 @@
 
 	status = ocfs_journal_dirty(handle, bh);
 	if (status < 0) {
+		if (oin)
+			ocfs_up_sem(&oin->main_res);
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
 
+	/* second try for the oin... */
+	if (!oin && inode_data_is_oin(inode)) {
+		oin = GET_INODE_OIN(inode);
+		ocfs_down_sem(&oin->main_res, true);
+	}
+
 	if (oin) {
-	/* if we updated correctly then we can update the OIN */
-		ocfs_down_sem (&(oin->main_res), true);
+		/* if we updated correctly then we can update the OIN */
 		oin->alloc_size = new_alloc_size;
 		ocfs_up_sem (&(oin->main_res));
 	}
@@ -984,7 +1042,7 @@
 		goto leave;
 	}
 
-	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY(fileEntry)) {
 		printk("fe->signature=%8s\n", fileEntry->signature);
@@ -994,11 +1052,12 @@
 		OCFS_BH_PUT_DATA(bh);
 		goto leave;
 	}
+	OCFS_BH_PUT_DATA(bh);
+	fileEntry = NULL;
 
+
 	if (passed_handle == NULL) {
 		/* cannot call start_trans with a locked buffer head. */
-		OCFS_BH_PUT_DATA(bh);
-
 		handle = ocfs_start_trans(osb, OCFS_FILE_EXTEND_CREDITS);
 		if (handle == NULL) {
 			LOG_ERROR_STATUS(status = -ENOMEM);
@@ -1012,14 +1071,15 @@
 #endif
 		/* Grab a lock on the entry found if we have more than
 		 * 1 extents and also make this node the master*/
-		fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+		fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
 
 		/* now we always take an EXTEND lock */
 		lockId = fileEntry->this_sector;
 		lockFlags = FLAG_FILE_EXTEND;
 		bFileLockAcquired = true;
 		
-		if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) {
+		if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && 
+		    (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) {
 			bCacheLock = true;
 		}
 		OCFS_BH_PUT_DATA(bh);
@@ -1037,8 +1097,6 @@
 		}
 		bAcquiredLock = true;
 	} else {
-		OCFS_BH_PUT_DATA(bh);
-		fileEntry = NULL;
 		handle = passed_handle;
 	}
 
@@ -1048,7 +1106,7 @@
 		goto leave;
 	}
 
-	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */ /* journal access */
 
 	if (bCacheLock) {
 		DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_ENABLE_CACHE_LOCK;
@@ -1093,6 +1151,21 @@
 		OCFS_BH_PUT_DATA(bh);
 		fileEntry = NULL;
 
+		{
+			struct buffer_head *alloc_bh;
+			unsigned long block;
+			struct super_block *sb = osb->sb;
+
+			for (block = actualDiskOffset >> 9; 
+			     block < (actualDiskOffset+actualLength) >> 9;
+			     block++) {
+				LOG_TRACE_ARGS("setting block %lu as new!\n", block);
+				alloc_bh = getblk(OCFS_GET_BLOCKDEV(sb), block, sb->s_blocksize);
+				alloc_bh->b_state |= (1UL << BH_New);
+				brelse(alloc_bh);
+			}
+		}
+
 		/* note: ok if oin is null here, not used in
 		 * ocfs_allocate_extent */
 		status = ocfs_allocate_extent (osb, oin, bh, handle,
@@ -1102,7 +1175,7 @@
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
-		fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+		fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */ /* journal access */
 
 		/* update the total allocation size here */
 		fileEntry->alloc_size += actualLength;
@@ -1149,9 +1222,6 @@
 leave:
 	if (passed_handle == NULL) {
 		if (handle) {
-			ocfs_bitmap_free_head *f = osb->alloc_free_head;
-			osb->alloc_free_head = NULL;
-
 			if (status < 0) {
 				ocfs_abort_trans(handle);
 			} else {
@@ -1167,11 +1237,6 @@
 				if (status < 0)
 					LOG_ERROR_STATUS(status);
 			}
-
-			if (f) {
-				ocfs_process_bitmap_free_head(osb, f);
-				free_bitmap_free_head(f);
-			}
 		}
 	}
 
@@ -1192,6 +1257,7 @@
 	if (bh != NULL)
 		brelse(bh);
 
+
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_extend_file */

Modified: trunk/src/hash.c
===================================================================
--- trunk/src/hash.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/hash.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -27,10 +27,21 @@
 
 #include <ocfs.h>
 
+
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_HASH
 
 
+extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+
+struct _ocfs_inode_num;
+
+static int ocfs_inode_hash_prune_all(ocfs_inode_hash *h);
+static struct _ocfs_inode_num * __ocfs_inode_hash_lookup(ocfs_inode_hash *h, 
+						 __u64 off);
+static inline struct _ocfs_inode_num * __ocfs_hash_remove(ocfs_inode_hash *h, 
+						  __u64 off);
+
 /*
  * ocfs_insert_sector_node()
  *
@@ -73,7 +84,7 @@
 				HILO(lock_res->sector_num));
 	}	
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_insert_sector_node */
@@ -115,7 +126,7 @@
 	} else
 		status = -ENOENT;		
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_lookup_sector_node */
@@ -147,7 +158,7 @@
 
 	ocfs_put_lockres (lock_res);
 
-      bail:
+bail:
 	LOG_EXIT ();
 	return ;
 }				/* ocfs_remove_sector_node */
@@ -190,7 +201,7 @@
 	memset (ht->buckets, 0, (ht->size * sizeof (HASHBUCKET)));
 	ret = 1;
 
-      bail:
+bail:
 	LOG_EXIT_LONG (ret);
 	return ret;
 }				/* ocfs_hash_create */
@@ -242,7 +253,7 @@
 	ocfs_safefree (ht->buckets);
 	ht->buckets = NULL;
 
-      bail:
+bail:
 	LOG_EXIT ();
 	return;
 }				/* ocfs_hash_destroy */
@@ -356,7 +367,7 @@
 	/* Increment the number of entries */
 	ht->entries++;
 
-      bail:
+bail:
 	/* Release Lock */
 	if (lockacqrd)
 		ocfs_up_sem (&(ht->hashlock));
@@ -427,7 +438,7 @@
 		bucket = bucket->next;
 	}
 
-      bail:
+bail:
 	/* Release Lock */
 	if (lockacqrd)
 		ocfs_up_sem (&(ht->hashlock));
@@ -472,7 +483,7 @@
 		bucket = bucket->next;
 	}
 
-      bail:
+bail:
 	/* Release Lock */
 	if (lockacqrd)
 		ocfs_up_sem (&(ht->hashlock));
@@ -530,7 +541,7 @@
 
 	data[datalen - 1] = '\0';
 
-      bail:
+bail:
 	/* Release Lock */
 	if (lockacqrd)
 		ocfs_up_sem (&(ht->hashlock));
@@ -621,3 +632,1063 @@
    /*-------------------------------------------- report the result */
 	return c;
 }				/* hash */
+
+
+
+
+/* bh semaphore hashtable stuff */
+
+
+ocfs_bh_sem * ocfs_bh_sem_alloc()
+{
+	return kmem_cache_alloc(OcfsGlobalCtxt.bh_sem_cache, GFP_NOFS);
+}
+
+void ocfs_bh_sem_free(ocfs_bh_sem *sem)
+{
+	kmem_cache_free(OcfsGlobalCtxt.bh_sem_cache, sem);
+}
+
+void ocfs_bh_sem_get(ocfs_bh_sem *sem)
+{
+	atomic_inc(&sem->s_refcnt);
+}
+
+void ocfs_bh_sem_put(ocfs_bh_sem *sem)
+{
+	if (atomic_dec_and_lock(&sem->s_refcnt, &OcfsGlobalCtxt.bh_sem_hash_lock)) {
+		if (buffer_modified(sem->s_bh)) {
+			LOG_ERROR_ARGS("putting last refcount of a modified buffer!  block %d\n",
+				       sem->s_bh->b_blocknr);
+		}
+		put_bh(sem->s_bh);
+		sem->s_bh = NULL;
+		spin_unlock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+	}
+}
+
+void ocfs_bh_sem_down(ocfs_bh_sem *sem)
+{
+	down(&sem->s_sem);
+}
+
+void ocfs_bh_sem_up(ocfs_bh_sem *sem)
+{
+	up(&sem->s_sem);
+}
+
+/* ripped right out of inode.c */
+#define ocfs_bh_hash_shift  (OcfsGlobalCtxt.bh_sem_hash_sz-1)
+#define _hashfn(dev,block)      \
+		((((dev)<<(ocfs_bh_hash_shift - 6)) ^ ((dev)<<(ocfs_bh_hash_shift - 9))) ^ \
+		(((block)<<(ocfs_bh_hash_shift - 6)) ^ ((block) >> 13) ^ \
+		((block) << (ocfs_bh_hash_shift - 12))))
+#define ocfs_bh_sem_hash_fn(_b)   \
+	(_hashfn((unsigned int)((_b)->b_dev), (_b)->b_blocknr) & ocfs_bh_hash_shift)
+
+int ocfs_bh_sem_hash_init()
+{
+	int i, ret;
+
+	LOG_ENTRY();
+	
+	spin_lock_init (&OcfsGlobalCtxt.bh_sem_hash_lock);
+	OcfsGlobalCtxt.bh_sem_hash = (struct list_head *)__get_free_pages(GFP_KERNEL, 2);
+	if (!OcfsGlobalCtxt.bh_sem_hash) {
+		LOG_ERROR_STR("ENOMEM allocating ocfs_bh_sem_hash");
+		ret = -ENOMEM;
+		goto bail;
+	}
+	OcfsGlobalCtxt.bh_sem_hash_sz = (PAGE_SIZE * 4) / sizeof(struct list_head);
+
+	for (i=OcfsGlobalCtxt.bh_sem_hash_sz-1; i>=0; i--)
+		INIT_LIST_HEAD(&OcfsGlobalCtxt.bh_sem_hash[i]);
+
+	atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
+	ret = 0;
+bail:
+	LOG_EXIT();
+	return ret;
+}
+
+int ocfs_bh_sem_hash_destroy()
+{
+	int missed;
+
+	LOG_ENTRY();
+
+	while (1) {
+		missed = ocfs_bh_sem_hash_prune_all();
+		if (missed == 0)
+			break;
+		LOG_TRACE_ARGS("still have %d entries in use in hashtable\n", missed);
+	}
+
+	spin_lock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+	free_pages((unsigned long)OcfsGlobalCtxt.bh_sem_hash, 2);
+	OcfsGlobalCtxt.bh_sem_hash = NULL;
+	
+	LOG_EXIT();
+	return 0;
+}
+
+
+ocfs_bh_sem * ocfs_bh_sem_lookup(struct buffer_head *bh)
+{
+	int depth, bucket;
+	struct list_head *head, *iter = NULL;
+	ocfs_bh_sem *sem = NULL, *newsem = NULL;
+
+#ifdef VERBOSE_BH_SEM
+	LOG_ENTRY();
+#endif
+	bucket = ocfs_bh_sem_hash_fn(bh);
+	head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+again:
+	depth = 0;
+	spin_lock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	list_for_each(iter, head) {
+		if (++depth > OCFS_BH_SEM_HASH_PRUNE_TRIGGER) {
+			/* Grandma, what a long list you have? */
+			atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, bucket);
+		}
+		sem = list_entry (iter, ocfs_bh_sem, s_list);
+		if (sem->s_blocknr == bh->b_blocknr &&
+		    sem->s_dev == bh->b_dev) {
+			if (atomic_read(&sem->s_refcnt)==0) {
+				if (sem->s_bh) {
+					LOG_ERROR_STR("refcount was zero but s_bh not NULL!");
+					BUG();
+				}
+				get_bh(bh);
+				sem->s_bh = bh;
+			}
+			if (sem->s_bh != bh) {
+				LOG_ERROR_STR("ocfs_bh_sem bufferhead does not match!");
+				BUG();
+			}
+			break;
+		}
+		sem = NULL;
+	}
+
+	if (newsem && !sem) {
+		/* second pass, we are first to insert */
+		sem = newsem;
+		list_add(&sem->s_list, head);
+		get_bh(bh);
+		sem->s_bh = bh;
+	}
+
+	if (sem) {
+		/* found something on first or second pass */
+		ocfs_bh_sem_get(sem);
+		if (newsem != sem) {
+			/* if not just added, mru to front */
+			list_del(&sem->s_list);
+			list_add(&sem->s_list, head);
+		}
+		//LOG_TRACE_ARGS("found bh_sem for %d, modified=%s, pid=%d\n",
+		//	      sem->s_bh->b_blocknr,
+		//	      buffer_modified(sem->s_bh) ? "true" : "false",
+		//	      sem->s_pid);
+			      
+		if (buffer_modified(sem->s_bh) && sem->s_pid == 0) {
+			LOG_ERROR_ARGS("found a%s sem with a modified bh but no pid!!! (block=%d)\n", 
+				       newsem != sem ? "n old" : " new",
+				       sem->s_bh->b_blocknr);
+		}
+	} else {
+		/* first pass. not found. do alloc */
+		spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+		newsem = ocfs_bh_sem_alloc();
+		if (newsem) {
+			newsem->s_bh = NULL;
+			atomic_set(&newsem->s_refcnt, 0);
+			newsem->s_blocknr = bh->b_blocknr;
+			newsem->s_dev = bh->b_dev;
+			init_MUTEX (&newsem->s_sem);
+			init_waitqueue_head(&newsem->s_wait);
+			newsem->s_pid = 0;
+			goto again;
+		}
+		sem = NULL;
+		goto bail;
+	}
+
+	spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	if (newsem && newsem != sem) {
+		/* another thread inserted while we were sleeping */
+		ocfs_bh_sem_free(newsem);
+	}
+
+bail:	
+#ifdef VERBOSE_BH_SEM
+	LOG_EXIT_PTR(sem);
+#endif
+	return sem;
+}
+
+
+/* look up the semaphore for this blocknum, and lock it too    */
+/* this can obviously block if someone else already has the bh */
+int ocfs_bh_sem_lock(struct buffer_head *bh)
+{
+	ocfs_bh_sem *sem;
+	int ret;
+
+#ifdef VERBOSE_BH_SEM
+	LOG_ENTRY_ARGS("(blocknr=%u)\n", bh->b_blocknr);
+#endif
+	sem = ocfs_bh_sem_lookup(bh);
+	if (!sem)
+		BUG();
+
+	ocfs_bh_sem_down(sem);
+	if (buffer_modified(bh) && sem->s_pid != current->pid) {
+#ifdef VERBOSE_BH_SEM
+		LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
+#endif
+		ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
+	} else {
+#ifdef VERBOSE_BH_SEM
+		LOG_TRACE_ARGS("got the lock\n");
+#endif
+		ret = OCFS_BH_SEM_GOT_LOCK;
+	}
+
+#ifdef VERBOSE_BH_SEM
+	LOG_EXIT_ULONG(ret);
+#endif
+	return ret;
+}
+
+int ocfs_bh_sem_lock_modify(struct buffer_head *bh)
+{
+	ocfs_bh_sem *sem;
+	int ret;
+
+#ifdef VERBOSE_BH_SEM
+	LOG_ENTRY_ARGS("(blocknr=%u)\n", bh->b_blocknr);
+#endif
+	sem = ocfs_bh_sem_lookup(bh);
+	if (!sem)
+		BUG();
+
+	ocfs_bh_sem_down(sem);
+	ret = OCFS_BH_SEM_GOT_LOCK;
+	if (buffer_modified(bh)) {
+		//LOG_TRACE_ARGS("buffer modified\n");
+		if (sem->s_pid == 0) {
+			LOG_ERROR_ARGS("modified, but pid is 0!\n");
+			// BUG();
+			sem->s_pid = current->pid;
+
+			/* this should really be a bug, but for now, up the */
+			/* refcount as if it weren't modified */
+			ocfs_bh_sem_get(sem);
+		} else if (sem->s_pid != current->pid) {
+			LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
+			ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
+		}
+	} else {
+		//LOG_TRACE_ARGS("buffer NOT modified\n");
+		/* this is the first call to modify it */
+		if (sem->s_pid != 0)
+			LOG_ERROR_ARGS("first to modify, but pid is NOT 0!\n");
+		sem->s_pid = current->pid;
+		
+		/* extra refcount for the modified bh */
+		ocfs_bh_sem_get(sem);
+		
+		set_buffer_modified(bh);
+	}
+
+#ifdef VERBOSE_BH_SEM
+	LOG_EXIT_ULONG(ret);
+#endif
+	return ret;
+}
+
+
+
+int ocfs_bh_sem_unlock(struct buffer_head *bh)
+{
+	ocfs_bh_sem *sem;
+
+#ifdef VERBOSE_BH_SEM
+	LOG_ENTRY();
+#endif	
+	sem = ocfs_bh_sem_lookup(bh);
+	if (!sem)
+		BUG();
+
+	/* take away one ref from this lookup */
+	ocfs_bh_sem_put(sem);
+	
+	ocfs_bh_sem_up(sem);
+
+	/* take away another ref from the lock lookup */
+	ocfs_bh_sem_put(sem);
+
+#ifdef VERBOSE_BH_SEM
+	LOG_EXIT();
+#endif
+	return 0;
+}
+
+
+/* returns number of pruned entries */
+int ocfs_bh_sem_hash_prune()
+{
+	int bucket, pruned;
+	struct list_head *head, *iter = NULL, *tmpiter = NULL;
+	ocfs_bh_sem *sem = NULL;
+	LIST_HEAD(tmp);
+
+	LOG_ENTRY();
+
+	/* The better to prune you with, my dear! */
+	bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
+	if (bucket == -1) {
+		pruned = 0;
+		goto bail;
+	}
+
+	spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
+	if (bucket == -1) {
+		spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+		pruned = 0;
+		goto bail;
+	}
+
+	head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+	pruned = 0;
+
+	/* run in lru order */
+	list_for_each_prev_safe(iter, tmpiter, head) {
+		sem = list_entry (iter, ocfs_bh_sem, s_list);
+		if (atomic_read(&sem->s_refcnt) < 1) {
+			list_del(&sem->s_list);
+			list_add(&sem->s_list, &tmp);
+			pruned++;
+		}
+		if (pruned >= OCFS_BH_SEM_HASH_PRUNE_MAX)
+			break;
+	}
+	atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
+
+	spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	list_for_each_safe(iter, tmpiter, &tmp) {
+		sem = list_entry (iter, ocfs_bh_sem, s_list);
+		if (sem->s_bh) {
+			LOG_ERROR_STR("s_bh is NOT NULL");
+			BUG();
+		}
+		list_del(&sem->s_list);
+		ocfs_bh_sem_free(sem);
+	}
+bail:
+	LOG_EXIT_ULONG(pruned);
+	return pruned;
+}
+
+int ocfs_bh_sem_hash_cleanup_pid(pid_t pid)
+{
+	int bucket, found = 0;
+	struct list_head *head, *iter = NULL;
+	ocfs_bh_sem *sem = NULL;
+
+	LOG_ENTRY();
+
+	bucket = 0;
+again:
+	spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+	list_for_each(iter, head) {
+		sem = list_entry (iter, ocfs_bh_sem, s_list);
+
+		if (sem->s_bh && 
+		    buffer_modified(sem->s_bh) && 
+		    sem->s_pid == pid) {
+			found++;
+
+			/* only do one buffer at a time. */
+			spin_unlock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+			ocfs_bh_sem_down(sem);
+			sem->s_pid = 0;
+			clear_buffer_modified(sem->s_bh);
+			ocfs_bh_sem_up(sem);
+
+			/* remove ref from ocfs_bh_sem_lock_modify */
+			ocfs_bh_sem_put(sem);
+
+			goto again;
+		}
+	}
+
+	spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	if (++bucket < OcfsGlobalCtxt.bh_sem_hash_sz)
+		goto again;
+
+	if (found)
+		LOG_ERROR_ARGS("Found %d modified buffers!\n", found);
+
+	LOG_EXIT_ULONG(found);
+	return found;
+}
+
+/* returns number of missed entries */
+int ocfs_bh_sem_hash_prune_all()
+{
+	int bucket, missed;
+	struct list_head *head, *iter = NULL, *tmpiter = NULL;
+	ocfs_bh_sem *sem = NULL;
+	LIST_HEAD(tmp);
+
+	LOG_ENTRY();
+	
+	missed = 0;
+	bucket = 0;
+	spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+	atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
+again:
+	head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+	/* run in lru order */
+	list_for_each_prev_safe(iter, tmpiter, head) {
+		sem = list_entry (iter, ocfs_bh_sem, s_list);
+		if (atomic_read(&sem->s_refcnt) < 1) {
+			list_del(&sem->s_list);
+			list_add(&sem->s_list, &tmp);
+		} else {
+			missed++;
+			LOG_TRACE_ARGS("missed block %lu, refcount %u, "
+				       "pid = %u\n",
+				       sem->s_blocknr, 
+				       sem->s_refcnt,
+				       sem->s_pid);
+		}
+	}
+
+	if (++bucket < OcfsGlobalCtxt.bh_sem_hash_sz)
+		goto again;
+
+	LOG_TRACE_ARGS("finished pruning, missed %d entries\n", missed);
+
+	spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+	list_for_each_safe(iter, tmpiter, &tmp) {
+		sem = list_entry (iter, ocfs_bh_sem, s_list);
+		if (sem->s_bh) {
+			LOG_ERROR_STR("s_bh is NOT NULL");
+			BUG();
+		}
+		list_del(&sem->s_list);
+		ocfs_bh_sem_free(sem);
+	}
+
+	LOG_EXIT_ULONG(missed);
+	return missed;
+}
+
+void wait_on_buffer_modified(struct buffer_head * bh)
+{
+	ocfs_bh_sem *sem = ocfs_bh_sem_lookup(bh);
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	LOG_ENTRY_ARGS("(block=%lu, sem->s_pid=%d)\n", bh->b_blocknr, 
+		       sem->s_pid );
+
+	add_wait_queue(&sem->s_wait, &wait);
+	do {
+		run_task_queue(&tq_disk);
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (!buffer_modified(bh) ||
+		    sem->s_pid == current->pid)
+			break;
+		schedule();
+	} while (buffer_modified(bh) && sem->s_pid != current->pid);
+	tsk->state = TASK_RUNNING;
+	remove_wait_queue(&sem->s_wait, &wait);
+	ocfs_bh_sem_put(sem);
+
+	LOG_EXIT();
+}
+
+void ocfs_clear_buffer_modified(struct buffer_head *bh)
+{
+	ocfs_bh_sem *sem = NULL;
+	
+	LOG_ENTRY();
+	if (buffer_modified(bh)) {
+		sem = ocfs_bh_sem_lookup(bh);
+		if (ocfs_bh_sem_lock_modify(bh) != OCFS_BH_SEM_GOT_LOCK) {
+			printk("ocfs: sem->s_pid=%d, my pid=%d\n", sem->s_pid, current->pid);
+			BUG();
+		}
+		clear_buffer_modified(bh);
+		sem->s_pid = 0;
+		ocfs_bh_sem_unlock(bh);
+		if (waitqueue_active(&sem->s_wait))
+			wake_up(&sem->s_wait);
+		/* remove ref from lookup above */
+		ocfs_bh_sem_put(sem);
+
+		/* remove additional ref from ocfs_bh_sem_lock_modify */
+		ocfs_bh_sem_put(sem);
+	}
+	LOG_EXIT();
+}
+
+typedef struct _ocfs_inode_num {
+	struct list_head  i_list;
+	unsigned long     i_ino;
+	__u64             i_off;    /* fe->this_sector OR
+				     * fe->extents[0].disk_off */
+	__u64             i_fe_off; /* used only for directory inodes,
+				     * points to parent fe of
+				     * dirnode. for files i_off == i_fe_off,
+				     * for root directory this is 0 */
+	atomic_t          i_refcnt; /* this is very short lived */
+} ocfs_inode_num;
+
+static inline ocfs_inode_num *ocfs_create_inode_num(void)
+{
+	ocfs_inode_num *inum = NULL;
+
+	inum = ocfs_malloc(sizeof(ocfs_inode_num));
+	if (inum == NULL) {
+		LOG_ERROR_STATUS(-ENOMEM);
+		goto bail;
+	}
+	memset(inum, 0, sizeof(ocfs_inode_num));
+
+	INIT_LIST_HEAD(&inum->i_list);
+	atomic_set(&inum->i_refcnt, 1);
+
+bail:
+	return(inum);
+}
+#define ocfs_free_inode_num(inum) (ocfs_free(inum))
+
+/* 
+ * ocfs_inode_hash_init()
+ *
+ * init the inode hash off an osb
+ */
+int ocfs_inode_hash_init(ocfs_super *osb)
+{
+	ocfs_inode_hash *h = &osb->inode_hash;
+	int i, status = 0;
+
+	LOG_ENTRY();
+
+	spin_lock_init(&h->lock);
+	h->num_ents = 0;
+
+	/* we only want one page for the hash. */
+	h->hash = (struct list_head *)__get_free_pages(GFP_KERNEL, 0);
+	if (!h->hash) {
+		LOG_ERROR_STATUS(status = -ENOMEM);
+		goto bail;
+	}
+
+	h->size = PAGE_SIZE / sizeof(struct list_head);
+
+	LOG_TRACE_ARGS("h->size = %d\n", h->size);
+
+	for (i = 0; i < h->size; i++)
+		INIT_LIST_HEAD(&(h->hash[i]));
+
+bail:
+	LOG_EXIT_STATUS(status);
+	return status;
+} /* ocfs_inode_hash_init */
+
+/* 
+ * ocfs_inode_hash_prune_all
+ *
+ * forcefully prunes the hash -- anything left with ANY refcount will
+ * be deleted. Returns the number of hash items we had to prune. 
+ */
+static int ocfs_inode_hash_prune_all(ocfs_inode_hash *h)
+{
+	int retval = 0;
+	int i = 0;
+	struct list_head *head;
+	struct list_head *iter = NULL;
+	struct list_head *tmpiter = NULL;
+	ocfs_inode_num *inum;
+
+	LOG_ENTRY();
+
+	spin_lock(&h->lock);
+
+	for(i = 0; i < h->size; i++) {
+		head = &h->hash[i];
+
+		if (list_empty(head))
+			continue;
+
+		list_for_each_safe(iter, tmpiter, head) {
+			inum = list_entry(iter, ocfs_inode_num, i_list);
+			if (atomic_read(&inum->i_refcnt) != 1)
+				LOG_TRACE_ARGS("inum %lu has refcount %u "
+					       "(offset = %u.%u)\n",
+					       inum->i_ino, 
+					       atomic_read(&inum->i_refcnt),
+					       HILO(inum->i_off));
+
+			list_del(&inum->i_list);
+			ocfs_free_inode_num(inum);
+			retval++;
+			h->num_ents--;
+		}
+	}
+
+	spin_unlock(&h->lock);
+
+	LOG_EXIT_STATUS(retval);
+	return retval;
+} /* ocfs_inode_hash_prune_all */
+
+/*
+ * ocfs_inode_hash_destroy()
+ */
+void ocfs_inode_hash_destroy(ocfs_inode_hash *h)
+{
+	int n;
+
+	LOG_ENTRY();
+
+	/* by shutdown, we shouldn't have anything left in the hash. */
+	n = ocfs_inode_hash_prune_all(h);
+	if (n)
+		LOG_TRACE_ARGS("%d items pruned from inode hash.\n", n);
+
+	spin_lock(&h->lock);
+	free_pages((unsigned long) h->hash, 0);
+	h->hash = NULL;
+	h->num_ents = 0;
+	h->size = 0;
+
+	LOG_EXIT();
+	return;
+} /* ocfs_inode_hash_destroy */
+
+#define OCFS_INODE_HASH(h, off) ((off / 512) % (h->size))
+
+/*
+ * __ocfs_inode_hash_lookup()
+ *
+ * You MUST be holding the inode hash lock before calling this! 
+ */
+static ocfs_inode_num * __ocfs_inode_hash_lookup(ocfs_inode_hash *h, 
+						 __u64 off)
+{
+	ocfs_inode_num *inum = NULL;
+	int bucket;
+	struct list_head *head;
+	struct list_head *iter = NULL;
+
+	if (spin_trylock(&h->lock))
+		BUG();
+
+	bucket = OCFS_INODE_HASH(h, off);
+	LOG_TRACE_ARGS("off = %u.%u, bucket = %d\n", HILO(off), bucket);
+
+	head = &h->hash[bucket];
+
+	if (list_empty(head))
+		goto bail;
+
+	list_for_each(iter, head) {
+		inum = list_entry(iter, ocfs_inode_num, i_list);
+
+		if (inum->i_off == off)
+			break;
+		inum = NULL;
+	}
+
+bail:
+	return(inum);
+} /* __ocfs_inode_hash_lookup */
+
+/* 
+ * ocfs_inode_hash_lookup()
+ * 
+ * lookup an offset in the hash. return '0' if it doesn't exist or error,
+ * otherwise return the inode number and (optionally) the fe_off. You
+ * have no guaruntee that it will stay in the hash after this call, or
+ * that it won't get inserted either! */
+unsigned long ocfs_inode_hash_lookup(ocfs_inode_hash *h, 
+				     __u64 offset, 
+				     __u64 *fe_off)
+{
+	unsigned long ino = 0;
+	ocfs_inode_num *inum = NULL;
+
+	LOG_ENTRY_ARGS("(offset=%u.%u)\n", HILO(offset));
+
+	spin_lock(&h->lock);
+
+	inum = __ocfs_inode_hash_lookup(h, offset);
+	if (inum) {
+		ino = inum->i_ino;
+		if (fe_off)
+			*fe_off = inum->i_fe_off;
+	}
+
+	spin_unlock(&h->lock);
+
+	LOG_EXIT_ULONG(ino);
+	return(ino);
+} /* ocfs_inode_hash_lookup */
+
+/*
+ * ocfs_inode_hash_insert()
+ *
+ * returns the inode number for that offset if it already exists in
+ * the hash, otherwise inserts a new inode and returns the inode
+ * number passed in. 
+ */
+unsigned long ocfs_inode_hash_insert(ocfs_super *osb,
+				     __u64 offset,
+				     __u64 fe_off,
+				     unsigned long ino)
+{
+	ocfs_inode_hash *h = &osb->inode_hash;
+	ocfs_inode_num *inum = NULL;
+	ocfs_inode_num *new_inum = NULL;
+	struct list_head *head;
+	int bucket;
+	unsigned long retval = ino;
+
+	LOG_ENTRY_ARGS("(offset = %u.%u, ino = %lu, fe_off = %u.%u)\n", 
+		       HILO(offset), ino, HILO(fe_off));
+
+again:
+	spin_lock(&h->lock);
+
+	inum = __ocfs_inode_hash_lookup(h, offset);
+
+	/* whoa, offset better be the same! */
+	if (inum && (inum->i_off != offset))
+		BUG();
+
+	if (inum && (inum->i_fe_off != fe_off))
+		BUG();
+
+	/* (second pass) we didn't find anything, insert new one. */
+	if (inum == NULL && new_inum) {
+		bucket = OCFS_INODE_HASH(h, offset);
+		head = &h->hash[bucket];
+
+		list_add(&new_inum->i_list, head);
+		h->num_ents++;
+	}
+
+	spin_unlock(&h->lock);
+
+	/* if this is our first pass and we haven't found anything,
+	 * create it now and go back up to try an insert. */
+	if (inum == NULL && new_inum == NULL) {
+		new_inum = ocfs_create_inode_num();
+		new_inum->i_ino = ino;
+		new_inum->i_off = offset;
+		new_inum->i_fe_off = fe_off;
+
+		goto again;
+	}
+
+	/* we created a new one to add, but someone added it before we
+	 * could start our second pass, so just clean up. */
+	if (inum && new_inum) {
+		ocfs_free(new_inum);
+		new_inum = NULL;
+	}
+
+	if (inum)
+		retval = inum->i_ino;
+
+	if (retval != ino)
+		LOG_TRACE_ARGS("Returning a different i_ino! "
+			       "(offset = %u.%u, passed ino = %lu, "
+			       "returned = %lu\n", HILO(offset), ino, retval);
+
+	LOG_EXIT_ULONG(retval);
+
+	return retval;
+} /* ocfs_inode_hash_insert */
+
+/* 
+ * __ocfs_hash_remove()
+ *
+ * only return inum if we're supposed to free it, otherwise return NULL. 
+ */
+static inline ocfs_inode_num * __ocfs_hash_remove(ocfs_inode_hash *h, 
+						  __u64 off)
+{
+	ocfs_inode_num *inum;
+
+	inum = __ocfs_inode_hash_lookup(h, off);
+
+	if (inum == NULL) {
+		printk("Cannot remove a nonexistent inum from hash! (%u.%u)\n",
+		       HILO(off));
+
+		BUG();
+	}
+
+	if (atomic_dec_and_test(&inum->i_refcnt)) {
+		list_del(&inum->i_list);
+		h->num_ents--;
+	} else {
+		inum = NULL;
+	}
+
+	return(inum);
+} /* __ocfs_hash_remove */
+
+/*
+ * ocfs_inode_hash_remove()
+ */
+void ocfs_inode_hash_remove(ocfs_inode_hash *h, __u64 off)
+{
+	ocfs_inode_num *inum = NULL;
+
+	LOG_ENTRY_ARGS("(off = %u.%u)\n", HILO(off));
+
+	spin_lock(&h->lock);
+
+	inum = __ocfs_hash_remove(h, off);
+
+	spin_unlock(&h->lock);
+
+	if (inum)
+		ocfs_free(inum);
+
+	LOG_EXIT();
+	return;
+} /* ocfs_inode_hash_remove */
+
+/* 
+ * ocfs_inode_rehash()
+ *
+ * update the offset value returned by a lookup on this key. Used
+ * during rename. 
+ *
+ * TODO: This should also take an inode argument and reset
+ * the offset on that while holding the hash lock. 
+ */
+int ocfs_inode_rehash(ocfs_inode_hash *h, 
+		      __u64 oldoff, 
+		      __u64 newoff, 
+		      __u64 new_fe_off)
+{
+	int status = 0;
+	ocfs_inode_num *inum = NULL;
+	struct list_head *head;
+	int bucket;
+
+	LOG_ENTRY_ARGS("(oldoff = %u.%u, newoff = %u.%u, "
+		       "new_fe_off = %u.%u)\n", 
+		       HILO(oldoff), HILO(newoff), HILO(new_fe_off));
+
+	spin_lock(&h->lock);
+
+	inum = __ocfs_inode_hash_lookup(h, oldoff);
+	if (inum == NULL)
+		BUG();
+
+	list_del(&inum->i_list);
+
+	inum->i_off = newoff;
+	inum->i_fe_off = new_fe_off;
+
+	bucket = OCFS_INODE_HASH(h, newoff);
+	head = &h->hash[bucket];
+	list_add(&inum->i_list, head);
+
+	spin_unlock(&h->lock);
+
+	LOG_EXIT_STATUS(status);
+	return status;
+} /* ocfs_inode_rehash */
+
+#if 0
+/* NOTE: After a put, you can't trust 'inum', as it may have been
+ * freed. Returns true if we freed it from memory. */
+static inline int __put_inum(ocfs_inode_hash *h, ocfs_inode_num *inum)
+{
+	int free = 0;
+
+	free = atomic_dec_and_lock(&inum->i_refcnt, &h->lock);
+
+	if (free) {
+		list_del(&inum->i_list);
+		h->num_ents--;
+		spin_unlock(&h->lock);
+		ocfs_free(inum);
+	}
+
+	return(free);
+}
+#endif
+
+/* 
+ * ocfs_get_inode_from_offset()
+ *
+ * Ok, because we don't have inode->i_sem when going into this, things
+ * are a bit tricky. Basically the kernel can call clear_inode on it
+ * while we're lookup up the inode number. Clear inode will call
+ * remove, and though we've got a number, it'll have been deleted from
+ * the hash. So we up a refcount on the inode_num to avoid it being
+ * deleted during remove. This doesn't prevent the inode itself from
+ * being removed however, and we might have to recreate it. 
+ */
+struct inode *ocfs_get_inode_from_offset(ocfs_super *osb, 
+					 __u64 offset, 
+					 struct buffer_head *fe_bh)
+{
+	struct inode *inode = NULL;
+	ocfs_inode_num *inum = NULL;
+	ocfs_inode_num *new_inum = NULL;
+	struct super_block *sb = osb->sb;
+	unsigned long new_ino = 0;
+	ocfs_inode_hash *h = &(osb->inode_hash);
+	int bucket;
+	struct list_head *head;
+	ocfs_find_inode_args args;
+	__u64 fe_off;
+	ocfs_file_entry *fe;
+
+	LOG_ENTRY_ARGS("(offset = %u.%u)\n", HILO(offset));
+
+	/* This is ugly, but...
+	 * There are several cases where we may not want an inode:
+	 * 1) any time during 1st mount (root_start_off will be 0)
+	 * 2) any system file, EXCEPT the journal as JBD requires one
+	 */
+	if (osb->vol_layout.root_start_off == 0 
+	    || offset < osb->vol_layout.root_start_off) {
+		/* OHMYGODTHISISTHEUGLIESTIFEVER */
+		if (offset < (JOURNAL_FILE_BASE_ID * osb->sect_size 
+			      + osb->vol_layout.root_int_off) 
+		    || 
+		    offset >= ((JOURNAL_FILE_BASE_ID + OCFS_MAXIMUM_NODES)
+			       * osb->sect_size 
+			       + osb->vol_layout.root_int_off)) {
+			printk("skipping inode create for %u.%u\n", 
+			       HILO(offset));
+			goto bail;
+		}
+	}
+
+	/* if they ask for the root dirnode, just return it. */
+	if (offset == osb->vol_layout.root_start_off) {
+		LOG_TRACE_ARGS("Asked for root dirnode (%u.%u)\n",
+			       HILO(offset));
+
+		inode = osb->sb->s_root->d_inode;
+
+		/* should we iget it or not? i suppose if you're in
+		 * here and you've asked for the root inode you don't
+		 * know what it is and will prolly iput it later... */
+		if (inode)
+			atomic_inc(&inode->i_count);
+		goto bail;
+	}
+
+	/* if it's a directory, we want the parent fe off so get it here. */
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh);
+	if (!IS_VALID_FILE_ENTRY(fe)) {
+		OCFS_BH_PUT_DATA(fe_bh);
+		LOG_ERROR_STATUS(-EINVAL);
+		goto bail;
+	}
+	fe_off = fe->this_sector;
+	OCFS_BH_PUT_DATA(fe_bh);
+
+	/* this is allowed to be slow. Create the inode num 1st to
+	 * simplify stuff.*/
+	new_inum = ocfs_create_inode_num();
+	if (new_inum == NULL) {
+		LOG_ERROR_STATUS(-ENOMEM);
+		goto bail;
+	}
+
+	spin_lock(&h->lock);
+
+	inum = __ocfs_inode_hash_lookup(h, offset);
+	LOG_TRACE_ARGS("return from lookup, inum=0x%x\n", inum);
+
+	/* if not found, insert it into hash (create new one) and inc
+	 * refcount */
+	if (!inum) {
+		inum = new_inum;
+
+		new_ino = iunique(sb, OCFS_ROOT_INODE_NUMBER);
+		inum->i_ino = new_ino;
+		inum->i_off = offset;
+		inum->i_fe_off = fe_off;
+
+		LOG_TRACE_ARGS("Allocating a new inode number, "
+			       "(offset = %u.%u, i_ino = %lu\n", 
+			       HILO(offset), new_ino);
+
+		bucket = OCFS_INODE_HASH(h, offset);
+		head = &h->hash[bucket];
+		list_add(&inum->i_list, head);
+		h->num_ents++;
+	}
+
+	atomic_inc(&inum->i_refcnt);
+
+	spin_unlock(&h->lock);
+
+	if (inum != new_inum)
+		ocfs_free(new_inum);
+
+	/* call iget4, return inode */
+	args.offset = offset;
+	args.fe_bh = fe_bh;
+	inode = iget4(sb, inum->i_ino, (find_inode_t) ocfs_find_inode, &args);
+	if (!inode || is_bad_inode (inode)) {
+		LOG_ERROR_STATUS(-EINVAL);
+		if (inode) {
+			iput(inode);
+			inode = NULL;
+		}
+		/* we want to cleanup after ourselves. */
+		atomic_dec(&inum->i_refcnt);
+		ocfs_inode_hash_remove(h, inum->i_off);
+		inum = NULL;
+	}
+
+bail:
+	/* We don't have to worry about freeing the inum after this
+	 * dec because the inode cannot have been destroyed yet (it's
+	 * still got a refcount of at least 1) */
+	if (inum)
+		atomic_dec(&inum->i_refcnt);
+
+	if (inode)
+		LOG_TRACE_ARGS("returning inode with number %lu\n", 
+			       inode->i_ino);
+
+	LOG_EXIT_PTR(inode);
+
+	return(inode);
+} /* ocfs_get_inode_from_offset */

Modified: trunk/src/heartbeat.c
===================================================================
--- trunk/src/heartbeat.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/heartbeat.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -71,7 +71,7 @@
 		        }
                 }
 
-		publish = (ocfs_publish *) OCFS_BH_GET_DATA(*pub_bh);
+		publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(*pub_bh); /* write */
 		if ((publish->dirty) && (!osb->publish_dirty)) { 
 			LOG_TRACE_STR(("NMThread reads the bit as dirty")); 
 			publish->dirty = false; 
@@ -136,7 +136,7 @@
 	if (first_time) {
 #if !defined(USERSPACE_TOOL)
 		/* Read the last comm_seq_num */
-		publish = (ocfs_publish *) OCFS_BH_GET_DATA(bhs[osb->node_num]);
+		publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bhs[osb->node_num]); /* read */
 		spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
 		OcfsGlobalCtxt.comm_seq_num = publish->comm_seq_num + 10;
 		spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
@@ -144,7 +144,7 @@
 #endif
 		/* Refresh local buffers */
 		for (i = 0;  i < num_nodes; i++) {
-			publish = (ocfs_publish *) OCFS_BH_GET_DATA(bhs[i]);
+			publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
 			node_map->time[i] = publish->time;
 			node_map->scan_rate[i] = publish->hbm[i];
 			node_map->scan_time[i] = curr_time;
@@ -166,7 +166,7 @@
 	}
 
 	for (i = 0; i < num_nodes; i++) {
-		publish = (ocfs_publish *) OCFS_BH_GET_DATA(bhs[i]);
+		publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
 
 		/* Loop if slot is unused */
 		if (publish->time == (__u64) 0) {

Modified: trunk/src/inc/journal.h
===================================================================
--- trunk/src/inc/journal.h	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inc/journal.h	2004-01-24 01:22:15 UTC (rev 15)
@@ -102,6 +102,25 @@
 	struct list_head lock_list;
 };
 
+#ifdef OCFS_PARANOID_ABORTS
+typedef struct _ocfs_journal_copyout ocfs_journal_copyout;
+struct _ocfs_journal_copyout {
+	unsigned long       blocknr; /* what block is this for? */
+	char                *data;   /* the actual data */
+};
+
+#define ocfs_handle_free_all_copyout(handle)				      \
+do {									      \
+	while (handle->num_co) {					      \
+		handle->num_co--;					      \
+		if (handle->co_buffs[handle->num_co].data)		      \
+			ocfs_free(handle->co_buffs[handle->num_co].data);     \
+	}								      \
+	ocfs_free(handle->co_buffs);					      \
+	handle->co_buffs = NULL;					      \
+} while (0)
+#endif
+
 typedef struct _ocfs_journal_handle ocfs_journal_handle;
 struct _ocfs_journal_handle {
 	handle_t            *k_handle; /* kernel handle.                */
@@ -130,6 +149,32 @@
 	struct list_head    locks;     /* A bunch of locks to 
 					* release on commit/abort. This 
 					* should be a list_head */
+
+#ifdef OCFS_PARANOID_ABORTS
+	int                  num_co;
+	ocfs_journal_copyout *co_buffs; /* Copy-out buffers. On 1st
+					 * journal_access of a buffer
+					 * we make a copy of it into
+					 * one of these. That way if we
+					 * abort we can place the
+					 * original copy back into the
+					 * buffer. */
+#endif
+	/* The next two structures are ONLY to be used for local alloc
+	 * code. It's very, very ugly. */
+	struct _ocfs_bitmap_free_head *commit_bits; /* bits to be
+						     * freed ONLY if
+						     * we commit the
+						     * handle. */
+	struct _ocfs_bitmap_free_head *abort_bits;  /* bits to be
+						     * freed ONLY if
+						     * we abort the
+						     * handle. */
+	__u64                new_file_lockid;  /* offset for the 
+						* most recently 
+						* created file
+						* sitting on this
+						* journal handle */  
 };
 
 /* should we checkpoint this handle on commit? */
@@ -184,6 +229,7 @@
 int    ocfs_journal_wipe(ocfs_journal *journal, int full);
 int    ocfs_journal_load(ocfs_journal *journal);
 void   ocfs_recovery_thread(struct _ocfs_super *osb, int node_num);
+int    ocfs_journal_new_file_search(struct _ocfs_super *osb, __u64 lockid);
 
 /*
  *  Transaction Handling:
@@ -311,7 +357,9 @@
 			      - OCFS_JOURNAL_FUZZ_CREDITS)
 
 /* fe change, locknode change, dirnode head, times two plus a possible
- * delete, and fuzz */
+ * delete, three to fix the up_node_hdr_ptr values of any extents
+ * below the moved fe, and fuzz */
 #define OCFS_FILE_RENAME_CREDITS  (2 * (1 + 1 + 1) + OCFS_FILE_DELETE_CREDITS \
-	 			   + OCFS_JOURNAL_FUZZ_CREDITS)
+	 			   + OCFS_MAX_FILE_ENTRY_EXTENTS              \
+				   + OCFS_JOURNAL_FUZZ_CREDITS)
 #endif /* _OCFSJOURNAL_H_ */

Modified: trunk/src/inc/ocfs.h
===================================================================
--- trunk/src/inc/ocfs.h	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inc/ocfs.h	2004-01-24 01:22:15 UTC (rev 15)
@@ -84,8 +84,7 @@
 #define OCFS_GCC_ATTR_PACKED	__attribute__ ((packed))
 #define OCFS_GCC_ATTR_ALIGNED	__attribute__ ((aligned(4)))
 #define OCFS_GCC_ATTR_PACKALGN	__attribute__ ((aligned(4), packed))
-#endif
-#ifdef __i386__
+#else
 #define OCFS_GCC_ATTR_PACKED
 #define OCFS_GCC_ATTR_ALIGNED
 #define OCFS_GCC_ATTR_PACKALGN
@@ -173,7 +172,13 @@
 	for (pos = (head)->next, n = pos->next; pos != (head); pos = n, n = pos->next)
 #endif
 
+#ifndef list_for_each_prev_safe
+#define list_for_each_prev_safe(pos, n, head) \
+	for (pos = (head)->prev, n = pos->prev; pos != (head); \
+		pos = n, n = pos->prev)
+#endif
 
+
 #ifdef LINUX_2_5
 #define OcfsQuerySystemTime(t)						      \
 	do {								      \
@@ -205,7 +210,7 @@
 typedef struct _ocfs_inode_private
 {
 	void *           generic_ip;
-	__u8             pad[8];
+	__u64            offset;
 //	struct list_head i_clean_buffers;
 	atomic_t         i_clean_buffer_seq;
 } ocfs_inode_private;
@@ -214,48 +219,53 @@
 #define INODE_PRIVATE_OFF    	((unsigned long)(&((struct inode *)0)->u.generic_ip))
 #define GET_INODE_CLEAN_SEQ(i)  (atomic_t *)(((unsigned long)i) + INODE_PRIVATE_OFF + CLEAN_SEQ_OFF)
 
-/* i_flags flag - heh yeah i know it's evil! */
-#define S_OCFS_OIN_VALID          256
-
-#define inode_data_is_oin(i)      (i->i_flags & S_OCFS_OIN_VALID)
-
 #define OCFS_GENERIC_IP(i)        ((ocfs_inode_private *)(&(i->u.generic_ip)))
 
-//#define GET_INODE_CLEAN_LIST(i)   (OCFS_GENERIC_IP(i)->i_clean_buffers)
-//#define EVIL_LIST_HEAD(_inode)    (&(GET_INODE_CLEAN_LIST(_inode)))
+#define inode_data_is_oin(i)      (OCFS_GENERIC_IP(i)->generic_ip != NULL)
 
-//#define GET_INODE_CLEAN_SEQ(i)    (&(OCFS_GENERIC_IP(i)->i_clean_buffer_seq))
+#define SET_INODE_OFFSET(i,o)						      \
+do {									      \
+	OCFS_GENERIC_IP(i)->offset = o;					      \
+} while (0)
 
+#define GET_INODE_OFFSET(i) OCFS_GENERIC_IP(i)->offset
 
-#define GET_INODE_OIN(i)          ((ocfs_inode *)(OCFS_GENERIC_IP(i)->generic_ip))
+#define CLEAR_INODE_OIN(i)						      \
+do {									      \
+	GET_INODE_OIN(i)= (void *)NULL;					      \
+} while (0)
 
-#define SET_INODE_OFFSET(i,o)     do { \
-                                      i->i_flags     &= ~S_OCFS_OIN_VALID; \
-				      GET_INODE_OIN(i)= (void *)HI(o); \
-                                      i->i_ino        = LO(o); \
-                                  } while (0)
+#define SET_INODE_OIN(i,o)						      \
+do {									      \
+	GET_INODE_OIN(i)= (void *)o;					      \
+} while (0)
 
-#define GET_INODE_OFFSET(i)       (__u64)((((__u64)((unsigned long)i->u.generic_ip))<<32) + \
-                                        ((__u64)i->i_ino))
+#define GET_INODE_OIN(i) ((ocfs_inode *)(OCFS_GENERIC_IP(i)->generic_ip))
 
-#define SET_INODE_OIN(i,o)        do { \
-                                      i->i_flags     |= S_OCFS_OIN_VALID; \
-				      GET_INODE_OIN(i)= (void *)o; \
-                                  } while (0)
-
 #define FIRST_FILE_ENTRY(dir)   ((char *) ((char *)dir)+OCFS_SECTOR_SIZE)
 #define FILEENT(dir,idx)        (ocfs_file_entry *) ( ((char *)dir) + \
                                 ((dir->index[idx]+1) * OCFS_SECTOR_SIZE))
-#define FILEENT_GETBH(dir,bhs,idx)    ({ \
-				          int _i = dir->index[idx]+1; \
-				          ocfs_file_entry *_ret = NULL; \
-					  if (!buffer_locked(bhs[_i])) \
-					    _ret = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[_i]); \
-					  _ret; \
-				      })
-#define FILEENT_PUTBH(dir,bhs,idx)    OCFS_BH_PUT_DATA(bhs[(dir->index[idx]+1)])
 
 
+#define FILEENT_GETBH_WRITE(dir,bhs,idx)				      \
+({									      \
+	int _i = dir->index[idx]+1;					      \
+	ocfs_file_entry *_ret = NULL;					      \
+	_ret = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bhs[_i]);	      \
+	_ret;								      \
+})
+
+#define FILEENT_GETBH(dir,bhs,idx)					      \
+({									      \
+	int _i = dir->index[idx]+1;					      \
+	ocfs_file_entry *_ret = NULL;					      \
+	_ret = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bhs[_i]);	      \
+	_ret;								      \
+})
+
+#define FILEENT_PUTBH(dir,bhs,idx)   OCFS_BH_PUT_DATA(bhs[(dir->index[idx]+1)])
+
+
 #define  OCFS_DIR_FILENAME                 "DirFile"
 #define  OCFS_DIR_BITMAP_FILENAME          "DirBitMapFile"
 #define  OCFS_FILE_EXTENT_FILENAME         "ExtentFile"
@@ -430,7 +440,6 @@
 #define  OCFS_OIN_DELETE_ON_CLOSE                (0x00000200)
 #define  OCFS_OIN_NEEDS_DELETION                 (0x00000400)
 #define  OCFS_INITIALIZED_MAIN_RESOURCE          (0x00002000)
-#define  OCFS_INITIALIZED_PAGING_IO_RESOURCE     (0x00004000)
 #define  OCFS_OIN_INVALID                        (0x00008000)
 #define  OCFS_OIN_IN_USE                         (0x00020000)
 #define  OCFS_OIN_OPEN_FOR_DIRECTIO              (0x00100000)
@@ -529,6 +538,11 @@
 #define OCFS_NM_HEARTBEAT_TIME		500	/* in ms */
 #define OCFS_HEARTBEAT_INIT             10      /* number of NM iterations to stabilize the publish map */
 
+
+#define OCFS_BH_SEM_HASH_PRUNE_TRIGGER  50      /* trigger nm to prune the hash when list size is > this */
+#define OCFS_BH_SEM_HASH_PRUNE_MAX      20      /* nm will prune at most this many in one cycle */
+
+	
 #ifndef O_DIRECT
 #warning this depends on the architecture!
 #define O_DIRECT        040000
@@ -715,15 +729,33 @@
 	ocfs_linux_dbg_free(x);					\
 } while (0)
 
-# define ocfs_vmalloc(size)	({ void *p = vmalloc(size); \
-				   if (!p) printk("ERROR: unable to allocate %u bytes of memory\n", (size)); \
-				   p; \
-				})
+# define ocfs_vmalloc(size)						      \
+({ 									      \
+	void *p = vmalloc(size);					      \
+	if (!p)								      \
+		printk("ERROR: unable to allocate %u bytes of memory\n",      \
+			(size));					      \
+	else								      \
+		memset(p, 0, size);					      \
+	p;								      \
+})
 # define ocfs_vfree		vfree
 #elif !defined(OCFS_LINUX_MEM_DEBUG)
-# define ocfs_malloc(size)	kmalloc((size_t)(size), GFP_KERNEL)
+# define ocfs_malloc(size)						      \
+({									      \
+	void *__ptr = kmalloc((size_t)(size), GFP_KERNEL);		      \
+	if (__ptr)							      \
+		memset(__ptr, 0, size);					      \
+	__ptr;								      \
+})
 # define ocfs_free              kfree
-# define ocfs_vmalloc(size)	vmalloc(size)
+# define ocfs_vmalloc(size)						      \
+({									      \
+	void *__ptr = vmalloc((size_t)(size));				      \
+	if (__ptr)							      \
+		memset(__ptr, 0, size);					      \
+	__ptr;								      \
+})
 # define ocfs_vfree		vfree
 #endif				/* ! defined(OCFS_MEM_DBG) */
 
@@ -1173,13 +1205,16 @@
 	__u32 lohi[2];
 } my_timing_t;
 
-#define IO_FUNC_TIMING_DECL		my_timing_t begin, end;  \
-					rdtsc (begin.lohi[0], begin.lohi[1]);
-#define IO_FUNC_TIMING_PRINT(_fn,_ret)	rdtsc (end.lohi[0], end.lohi[1]); \
-					IF_LEVEL_NO_CONTEXT(OCFS_DEBUG_LEVEL_TIMING) \
-						printk("(%d) EXIT : %s() = %d  => [%u.%u]\n",\
-					       		ocfs_getpid(), _fn, \
-							_ret, HILO(end.q-begin.q));
+#define IO_FUNC_TIMING_DECL	my_timing_t begin, end;	rdtsc (begin.lohi[0], begin.lohi[1]); 
+
+#define IO_FUNC_TIMING_PRINT(_fn,_ret)					      \
+	do {								      \
+		rdtsc (end.lohi[0], end.lohi[1]);			      \
+		IF_LEVEL_NO_CONTEXT(OCFS_DEBUG_LEVEL_TIMING)		      \
+			printk("(%d) EXIT : %s() = %d  => [%u.%u]\n",	      \
+				ocfs_getpid(), _fn, _ret,		      \
+				HILO(end.q-begin.q));			      \
+	} while(0)		      
 #else
 #define IO_FUNC_TIMING_DECL
 #define IO_FUNC_TIMING_PRINT(_fn,_ret)
@@ -1502,6 +1537,24 @@
 }
 HASHTABLE;
 
+enum {
+	OCFS_BH_SEM_GOT_LOCK,
+	OCFS_BH_SEM_WAIT_ON_MODIFY,
+};
+
+typedef struct _ocfs_bh_sem
+{
+	struct semaphore s_sem;
+	struct list_head s_list;
+	unsigned long s_blocknr;
+	kdev_t s_dev;
+	atomic_t s_refcnt;
+	struct buffer_head *s_bh;
+	wait_queue_head_t s_wait;
+	pid_t s_pid;
+} ocfs_bh_sem;
+
+
 typedef struct _ocfs_vol_disk_hdr		   // CLASS
 {
 	__u32 minor_version;                       // NUMBER RANGE(0,UINT_MAX)
@@ -1762,26 +1815,24 @@
 struct _ocfs_inode
 {
 	ocfs_obj_id obj_id;
-	__s64 alloc_size;
 	struct inode *inode;
+	struct _ocfs_super *osb;	/* ocfs_inode belongs to this volume */
+	struct list_head needs_flush_list;
+	struct list_head recovery_list;
 	ocfs_sem main_res;
-	ocfs_sem paging_io_res;
 	ocfs_lock_res *lock_res;
+	ocfs_extent_map map;
+	__s64 alloc_size;
 	__u64 file_disk_off;	/* file location on the volume */
 	__u64 dir_disk_off;	/* for dirs, offset to dirnode structure */
 	__u64 chng_seq_num;
 	__u64 parent_dirnode_off;	/* from the start of vol */
-	ocfs_extent_map map;
-	struct _ocfs_super *osb;	/* ocfs_inode belongs to this volume */
+	__u32 open_hndl_cnt;
 	__u32 oin_flags;
-	struct list_head next_ofile;	/* list of all ofile(s) */
-	__u32 open_hndl_cnt;
 	bool needs_verification;
 	bool cache_enabled;
-	struct list_head needs_flush_list;
 	bool in_needs_flush_list;
 	bool journal_inode;    /* is this the journal oin? */
-	struct list_head recovery_list;
 };
 
 typedef enum _ocfs_vol_state
@@ -1846,6 +1897,13 @@
 
 struct _ocfs_bitmap_free_head;
 
+typedef struct _ocfs_inode_hash {
+	spinlock_t        lock;     /* protects the whole hash */
+	int               size;     /* number of lists in the hash */
+	int               num_ents; /* global number of offsets in there */
+	struct list_head  *hash; 
+} ocfs_inode_hash;
+
 /*
  * ocfs_super
  *
@@ -1932,9 +1990,9 @@
 	struct list_head lock_recovery_lists[OCFS_MAXIMUM_NODES];
 	__u64 last_publ_seq_num[OCFS_MAXIMUM_NODES];
 	bool have_local_alloc;
-	/* These two are protected by the trans_lock. */
+	/* Protected by the trans_lock. */
 	struct buffer_head *local_alloc_bh;
-	struct _ocfs_bitmap_free_head *alloc_free_head;
+	ocfs_inode_hash inode_hash;
 };
 
 typedef struct _ocfs_comm_info
@@ -1956,6 +2014,7 @@
 	kmem_cache_t *fe_cache;
 	kmem_cache_t *lockres_cache;
 	kmem_cache_t *extent_cache;
+	kmem_cache_t *bh_sem_cache;
 	__u32 flags;
 	__u32 pref_node_num;		/* preferred... osb has the real one */
 	ocfs_guid guid;			/* uniquely identifies a node */
@@ -1970,6 +2029,10 @@
         struct list_head item_list;
 #endif
 	atomic_t cnt_lockres;		/* count of allocated lockres */
+	struct list_head *bh_sem_hash;
+	spinlock_t bh_sem_hash_lock;
+	int bh_sem_hash_sz;
+	atomic_t bh_sem_hash_target_bucket;
 }
 ocfs_global_ctxt;
 
@@ -2490,7 +2553,7 @@
 #endif				/* !USERSPACE_TOOL */
 
 
-#include "ocfsio.h"
+#include "io.h"
 
 #define OCFS_FE_CACHE_FLAGS(__osb, __fe)				  \
 ({									  \

Modified: trunk/src/inc/proto.h
===================================================================
--- trunk/src/inc/proto.h	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inc/proto.h	2004-01-24 01:22:15 UTC (rev 15)
@@ -25,7 +25,7 @@
 
 int ocfs_create_log_extent_map (ocfs_super * osb, __u64 diskOffset, __u64 ByteCount);
 int ocfs_write_map_file (ocfs_super * osb);
-int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle);
+int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle, bool zero);
 
 
 void ocfs_extent_map_init (ocfs_extent_map * map);
@@ -55,7 +55,7 @@
 
 
 int ocfs_find_inode (struct inode *inode, unsigned long ino, void *opaque);
-void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr);
+void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr, bool create_ino);
 void ocfs_read_locked_inode (struct inode *inode, ocfs_file_entry *entry);
 void ocfs_read_inode2 (struct inode *inode, void *opaque);
 void ocfs_read_inode (struct inode *inode);
@@ -81,6 +81,23 @@
 int ocfs_hash_get (HASHTABLE * ht, void *key, __u32 keylen, void **val, __u32 * vallen);
 int ocfs_hash_add (HASHTABLE * ht, void *key, __u32 keylen, void *val, __u32 vallen, void **found, __u32 *foundlen);
 void ocfs_hash_stat (HASHTABLE * ht, char *data, __u32 datalen);
+int ocfs_bh_sem_hash_init(void);
+int ocfs_bh_sem_hash_destroy(void);
+int ocfs_bh_sem_hash_prune(void);
+int ocfs_bh_sem_hash_prune_all(void);
+int ocfs_bh_sem_lock(struct buffer_head *bh);
+int ocfs_bh_sem_lock_modify(struct buffer_head *bh);
+int ocfs_bh_sem_unlock(struct buffer_head *bh);
+int ocfs_bh_sem_hash_cleanup_pid(pid_t pid);
+void ocfs_bh_sem_up(ocfs_bh_sem *sem);
+void ocfs_bh_sem_down(ocfs_bh_sem *sem);
+void ocfs_bh_sem_put(ocfs_bh_sem *sem);
+void ocfs_bh_sem_get(ocfs_bh_sem *sem);
+void ocfs_bh_sem_free(ocfs_bh_sem *sem);
+ocfs_bh_sem * ocfs_bh_sem_alloc(void);
+ocfs_bh_sem * ocfs_bh_sem_lookup(struct buffer_head *bh);
+void wait_on_buffer_modified(struct buffer_head * bh);
+void ocfs_clear_buffer_modified(struct buffer_head *bh);
 
 
 void ocfs_version_print (void);
@@ -121,7 +138,6 @@
 void ocfs_update_publish_map (ocfs_super * osb, struct buffer_head *bhs[], bool first_time);
 
 
-struct inode * ocfs_get_inode_from_bh(ocfs_super * osb, struct buffer_head *bh);
 int ocfs_recv_thread (void *unused);
 int ocfs_volume_thread (void *arg);
 int ocfs_init_udp_sock (struct socket **send_sock, struct socket **recv_sock);
@@ -148,7 +164,6 @@
 
 int ocfs_init_system_file (ocfs_super * osb, __u32 file_id, char *filename);
 int ocfs_read_system_file (ocfs_super * osb, __u32 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset);
-int ocfs_write_system_file (ocfs_super * osb, __u64 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset);
 int ocfs_get_system_file_size (ocfs_super * osb, __u32 FileId, __u64 * Length, __u64 * AllocSize);
 __u64 ocfs_file_to_disk_off (ocfs_super * osb, __u32 FileId, __u64 Offset);
 
@@ -204,3 +219,23 @@
 			       struct buffer_head **local_alloc_bh, bool sync);
 int ocfs_find_space(ocfs_super * osb, __u64 file_size, __u64 * cluster_off, __u64 * cluster_count, bool sysfile, ocfs_journal_handle *handle);
 int ocfs_recover_local_alloc(ocfs_super *osb, int node_num);
+
+void ocfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate);
+
+int ocfs_inode_hash_init(ocfs_super *osb);
+void ocfs_inode_hash_destroy(ocfs_inode_hash *h);
+
+unsigned long ocfs_inode_hash_insert(ocfs_super *osb,
+				     __u64 offset,
+				     __u64 fe_off,
+				     unsigned long ino);
+void ocfs_inode_hash_remove(ocfs_inode_hash *h, __u64 off);
+int ocfs_inode_rehash(ocfs_inode_hash *h, 
+		      __u64 oldoff, 
+		      __u64 newoff, 
+		      __u64 new_fe_off);
+struct inode *ocfs_get_inode_from_offset(ocfs_super *osb, __u64 offset, 
+					 struct buffer_head *fe_bh);
+unsigned long ocfs_inode_hash_lookup(ocfs_inode_hash *h, 
+				     __u64 offset, 
+				     __u64 *fe_off);

Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inode.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -24,7 +24,13 @@
 #endif
 #endif  /* version >= 2.4.10 */
 
+#ifdef AIO_ENABLED
+int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos); 
+int ocfs_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos); 
+int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos);
+#endif /* AIO_ENABLED */
 
+
 static struct address_space_operations ocfs_aops = {
 	.readpage = ocfs_readpage,
 	.writepage = ocfs_writepage,
@@ -130,7 +136,7 @@
 		goto bail;
 	}
 	
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(args->fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
 	if (S_ISDIR (inode->i_mode)) {
 		LOG_TRACE_STR ("find_inode -> S_ISDIR");
 		if (fe->extents[0].disk_off != fileOff) {
@@ -179,8 +185,9 @@
 	}
 	oin = NULL;		/* set it back to our current OIN if we have one */
 	if (inode_data_is_oin (inode))
-		oin = ((ocfs_inode *)inode->u.generic_ip);
-	ocfs_populate_inode (inode, fe, mode, oin);
+		oin = GET_INODE_OIN(inode);
+
+	ocfs_populate_inode (inode, fe, mode, oin, false);
 #endif /* REPOPULATE_INODE */
 
 	ret = 1;
@@ -197,18 +204,26 @@
  * ocfs_populate_inode()
  *
  */
-void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr)
+void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr, bool create_ino)
 {
 	struct super_block *sb;
 	ocfs_super *osb;
-	__u64 offset;
+	__u64 offset, fe_off;
+	unsigned long uniq_ino;
 
 	LOG_ENTRY_ARGS ("(0x%08x, %u, size:%u)\n", inode, mode, fe->file_size);
 
 	sb = inode->i_sb;
 	osb = (ocfs_super *) OCFS_GENERIC_SB_P(sb);
+	fe_off = fe->this_sector;
 	offset = S_ISDIR (mode) ? fe->extents[0].disk_off : fe->this_sector;
 
+	if (!IS_VALID_FILE_ENTRY(fe)) {
+		printk("ocfs: invalid file entry!\n");
+
+		BUG();
+	}
+
 	OCFS_SET_INODE_DEV(sb, inode);
 	inode->i_mode = mode;
 	inode->i_uid = fe->uid;
@@ -222,10 +237,22 @@
 	OCFS_SET_INODE_TIME(inode, i_mtime, fe->modify_time);
 	OCFS_SET_INODE_TIME(inode, i_ctime, fe->create_time);
 	if (genptr)
-		SET_INODE_OIN (inode, genptr);
+		SET_INODE_OIN(inode, genptr);
 	else
-		SET_INODE_OFFSET (inode, offset);
+		CLEAR_INODE_OIN(inode);
 
+	SET_INODE_OFFSET(inode, offset);
+
+	if (create_ino) {
+		uniq_ino = iunique(sb, OCFS_ROOT_INODE_NUMBER);
+		uniq_ino = ocfs_inode_hash_insert(osb, offset, fe_off, 
+						  uniq_ino);
+		inode->i_ino = uniq_ino;
+	}
+	LOG_TRACE_ARGS("offset = %u.%u, ino = %lu, create_ino = %s\n",
+		       HILO(offset), inode->i_ino, 
+		       create_ino ? "true" : "false");
+
 	switch (inode->i_mode & S_IFMT) {
 	    case S_IFREG:
 		    atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
@@ -299,6 +326,7 @@
 		inode->i_uid = osb->vol_layout.uid;
 		inode->i_gid = osb->vol_layout.gid;
 		SET_INODE_OIN (inode, osb->oin_root_dir);
+		SET_INODE_OFFSET(inode, osb->vol_layout.root_start_off);
 		goto bail;
 	}
 
@@ -339,7 +367,7 @@
 		    mode |= S_IFREG;
 		    break;
 	}
-	ocfs_populate_inode (inode, entry, mode, newoin);
+	ocfs_populate_inode (inode, entry, mode, newoin, false);
 
 bail:
 	LOG_EXIT ();
@@ -389,6 +417,7 @@
 		inode->i_uid = osb->vol_layout.uid;
 		inode->i_gid = osb->vol_layout.gid;
 		SET_INODE_OIN (inode, osb->oin_root_dir);
+		SET_INODE_OFFSET(inode, osb->vol_layout.root_start_off);
 		goto bail;
 	}
 
@@ -399,7 +428,7 @@
 
 	args = (ocfs_find_inode_args *) opaque;
 	newoin = NULL;
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(args->fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
 
 	mode = fe->prot_bits;
 
@@ -429,7 +458,7 @@
 		    mode |= S_IFREG;
 		    break;
 	}
-	ocfs_populate_inode (inode, fe, mode, newoin);
+	ocfs_populate_inode (inode, fe, mode, newoin, false);
 
 bail:
 	if (fe)
@@ -487,7 +516,7 @@
 
 	if (S_ISDIR (inode->i_mode)) {
 		LOG_TRACE_STR ("find_actor -> S_ISDIR\n");
-		fe = OCFS_BH_GET_DATA(args->fe_bh);
+		fe = OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
 		if (fe->extents[0].disk_off != fileOff) {
 			LOG_TRACE_ARGS
 			    ("DIR : inode number same but full offset does not match: %u.%u != %u.%u\n",
@@ -502,7 +531,7 @@
 	}
 
 	ret = 1;
-      bail:
+bail:
 	if (fe)
 		OCFS_BH_PUT_DATA(args->fe_bh);
 	LOG_EXIT_LONG (ret);
@@ -570,7 +599,7 @@
 	if (inode->i_state & I_NEW) {
 		LOG_TRACE_STR("Inode was not in inode cache, reading it.");
 		if (args)
-			fe = OCFS_BH_GET_DATA(args->fe_bh);
+			fe = OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
 		ocfs_read_locked_inode(inode, fe);
 		if (args)
 			OCFS_BH_PUT_DATA(fe_bh);
@@ -591,7 +620,7 @@
  */
 void ocfs_put_inode (struct inode *inode)
 {
-	LOG_ENTRY_ARGS ("(0x%08x)\n", inode);
+	LOG_ENTRY_ARGS ("(0x%08x, inode_i_ino=%lu)\n", inode, inode->i_ino);
 	LOG_TRACE_ARGS ("put_inode: count=%d\n", inode->i_count);
 	if (inode_data_is_oin(inode) && (atomic_read (&inode->i_count) == 1) ) {
 	     ocfs_inode *oin;
@@ -620,12 +649,16 @@
  */
 void ocfs_clear_inode (struct inode *inode)
 {
+	__u64 offset;
+	ocfs_super *osb;
+
 	LOG_ENTRY();
 
 	if (inode) {
+		ocfs_linux_get_inode_offset(inode, &offset, NULL);
+
 		if (inode_data_is_oin (inode)) {
 			ocfs_inode *oin;
-			ocfs_super *osb;
 
 			LOG_TRACE_STR ("inode with oin : clear inode");
 
@@ -651,7 +684,6 @@
 		} else {
 			__u64 fileOff;
 			ocfs_lock_res *lockres = NULL;
-			ocfs_super *osb;
 
 			osb = (ocfs_super *) OCFS_GENERIC_SB_P(inode->i_sb);
 
@@ -675,8 +707,12 @@
 				LOG_TRACE_STR ("Could not find offset");
 			}
 		}
+		/* we may be called after unmount, in which case
+		 * don't do this. */
+		if (osb->inode_hash.size)
+			ocfs_inode_hash_remove(&osb->inode_hash, offset);
 	}
-      bail:
+bail:
 	LOG_EXIT ();
 	return;
 }				/* ocfs_clear_inode */
@@ -827,7 +863,7 @@
 		LOG_ERROR_STATUS (status);
 		goto bail;
 	}
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY (fe)) {
 		OCFS_BH_PUT_DATA(bh);
@@ -845,8 +881,6 @@
 	map_bh(bh_result, inode->i_sb, (fe->extents[0].disk_off >> 9) + iblock);
 	OCFS_BH_PUT_DATA(bh);
 
-	if (create)
-		bh_result->b_state |= (1UL << BH_New);
 	err = 0;
 
 bail:
@@ -918,17 +952,8 @@
 		goto bail;
 	}
 
-	if (create) {
-		LOG_TRACE_ARGS ("CREATE: offset: %u -> block#: %d\n", iblock,
-				lbo >> inode->i_sb->s_blocksize_bits);
-		bh_result->b_state |= (1UL << BH_New);
-	}
-
 	map_bh(bh_result, inode->i_sb, lbo >> inode->i_sb->s_blocksize_bits);
 
-	if (create)
-		bh_result->b_state |= (1UL << BH_New);
-
 	err = 0;
 
 	if (bh_result->b_blocknr == 0) {
@@ -1052,7 +1077,7 @@
 				HILO(vbo), HILO(lbo), len, oin->file_disk_off);
 	}
 
-      bail:
+bail:
 	if (err < 0)
 		err = -EIO;
 	LOG_EXIT_LONG (err);
@@ -1532,8 +1557,6 @@
 
 
 #ifdef AIO_ENABLED
-static int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos);
-
 int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos) 
 {
 	return ocfs_kvec_rw(file, READ, cb, size, pos);

Modified: trunk/src/ioctl.c
===================================================================
--- trunk/src/ioctl.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/ioctl.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -63,7 +63,7 @@
 		    break;
 	}
 
-      exit_ioctl:
+exit_ioctl:
 	LOG_EXIT_LONG (ret);
 	return ret;
 }				/* ocfs_ioctl */

Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/journal.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -1,5 +1,5 @@
 /*
- * ocfsjournal.c
+ * journal.c
  *
  * Defines functions of journalling api
  *
@@ -21,7 +21,7 @@
  * Boston, MA 021110-1307, USA.
  *
  * Authors: Kurt Hackel, Sunil Mushran, Manish Singh, Wim Coekaerts,
- *          Mark Fasheh
+ *          Mark Fasheh, Joel Becker
  */
 
 #include <ocfs.h>
@@ -66,19 +66,31 @@
 
 	retval->buffs = ocfs_malloc(sizeof(struct buffer_head *) * max_buffs);
 	if (!retval->buffs) {
-		LOG_ERROR_STR("Failed to allocate memory for journal buffers!");
+		LOG_ERROR_STR("Failed to allocate memory for journal buffs!");
 		goto done_free;
 	}
 	memset(retval->buffs, 0, sizeof(struct buffer_head *) * max_buffs);
 
+#ifdef OCFS_PARANOID_ABORTS
+	retval->co_buffs = ocfs_malloc(sizeof(ocfs_journal_copyout)*max_buffs);
+	if (!retval->co_buffs) {
+		LOG_ERROR_STR("Failed to allocate memory for journal co_buffs!");
+		goto done_free;
+	}
+	memset(retval->co_buffs, 0, sizeof(ocfs_journal_copyout) * max_buffs);
+#endif
 	spin_lock_init(&(retval->list_lock));
 	INIT_LIST_HEAD(&(retval->h_list));
 	INIT_LIST_HEAD(&(retval->locks));
 	retval->max_buffs = max_buffs;
 	retval->num_buffs = 0;
 	retval->num_locks = 0;
+#ifdef OCFS_PARANOID_ABORTS
+	retval->num_co = 0;
+#endif
 	retval->journal = &osb->journal;
 	retval->osb = osb;
+	retval->commit_bits = retval->abort_bits = NULL;
 
 	/* actually start the transaction now */
 	retval->k_handle = journal_start(journal, max_buffs);
@@ -114,10 +126,12 @@
 	return(NULL);
 }  /*  ocfs_start_trans  */
 
+#define OCFS_JOURNAL_CHECKPOINT_RETRIES 3
 static int ocfs_checkpoint_handle(ocfs_journal_handle *handle) 
 {
 	int retval = 0;
 	ocfs_super *osb = NULL;
+	int i;
 
 	LOG_ENTRY();
 
@@ -126,10 +140,15 @@
 	if (!handle->num_buffs)
 		goto done;
 
-	retval = ocfs_write_bhs(osb, handle->buffs, handle->num_buffs, 
-				OCFS_BH_IGNORE_JBD, NULL);
-	if (retval < 0)
-		LOG_ERROR_STR("Error checkpointing handle.");
+	/* Try up to 3 times to checkpoint the handle */
+	for (i = 0; i < OCFS_JOURNAL_CHECKPOINT_RETRIES; i++) {
+		retval = ocfs_write_bhs(osb, handle->buffs, handle->num_buffs, 
+					OCFS_BH_IGNORE_JBD, NULL);
+		if (retval < 0)
+			LOG_ERROR_STATUS(retval);
+		else
+			break;
+	}
 
 done:
 
@@ -145,7 +164,8 @@
  * 
  * The call to journal_revoke does a brelse. It also winds up removing
  * the journal_head from the buffer, and therefore the JBD bit is no
- * longer set.
+ * longer set. We do a get_bh before calling journal_revoke so that
+ * the count doesn't change.
  */
 static int ocfs_revoke_handle(ocfs_journal_handle *handle) 
 {
@@ -176,6 +196,8 @@
 	for(i = 0; i < handle->num_buffs; i++) {
 		bh = handle->buffs[i];
 
+		get_bh(bh); /* want to keep this around after the revoke */
+
 		retval = journal_revoke(new_handle, bh->b_blocknr, bh);
 		if (retval < 0) {
 			LOG_ERROR_STR("Could not revoke buffer!");
@@ -236,7 +258,36 @@
 	return(status);
 }
 
+int ocfs_journal_new_file_search(ocfs_super *osb, __u64 lockid)
+{
+	ocfs_journal_handle *handle=NULL;
+	ocfs_journal *journal=NULL;
+	int status = -ENOENT;
+	struct list_head *p1;
+	
+	LOG_ENTRY_ARGS("(%u.%u)\n", lockid);
 
+	/* make sure that we have a root_start off     */
+	/* this can be called early in the first mount */
+	if (osb->vol_layout.root_start_off != 0 &&
+	    lockid >= osb->vol_layout.root_start_off) {
+		journal = &osb->journal;
+		down(&journal->commit_sem);
+		list_for_each(p1, &(journal->commited)) {
+			handle = list_entry(p1, ocfs_journal_handle, h_list);
+			if (handle->new_file_lockid == lockid) {
+				status = 0;
+				break;
+			}
+		}
+		up(&journal->commit_sem);
+	}
+
+	LOG_EXIT_STATUS(status);
+	return status;
+}
+
+
 /* This for loop is for debug purposes. Basically we want to check the
  * BH_JBD bit on our buffers. If the handle was checkpointed, then
  * none of them should have that bit set after the revoke
@@ -253,9 +304,8 @@
 } while (0)
 
 /*
-  Should this function also mark the buffers dirty (journal_dirty_*)
-  or should we expect a higher layer to be doing that?
-*/
+ * ocfs_commit_trans
+ */
 int ocfs_commit_trans(ocfs_journal_handle * handle) 
 {
 	ocfs_super *osb;
@@ -263,7 +313,6 @@
 	transaction_t *kern_trans;
 	int retval = 0, i;
 	struct buffer_head *bh;
-	bool revoked = false;
 	bool checkpoint, sync;
 	ocfs_journal *journal;
 
@@ -291,12 +340,16 @@
 	else
 		kern_handle->h_sync = 0;
 
+	for(i = 0; i < handle->num_buffs; i++)
+		check_rootdir_overwrite(handle->buffs[i]);
+
 	/* actually stop the transaction. if we've set h_sync,
 	 * it'll have been commited when we return */
 	retval = journal_stop(kern_handle);
 	if (retval < 0) {
 		LOG_ERROR_STATUS(retval);
-		goto done;
+		LOG_ERROR_STR("Could not commit transaction");
+		BUG();
 	}
 
 	/* for now we manually checkpoint and force out our revoke
@@ -311,7 +364,7 @@
 		retval = ocfs_checkpoint_handle(handle);
 		if (retval < 0) {
 			LOG_ERROR_STR("Could not checkpoint transaction!");
-			goto done;
+			BUG();
 		}
 
 		/* revoke from buffer_head list, commit revoke records */
@@ -319,31 +372,25 @@
 		if (retval < 0) {
 			LOG_ERROR_STR("Could not completely revoke "
 				      "transaction!");
-			goto done;
+			BUG();
 		}
-
-		revoked = true;
-	} else {
-
-		/* we'll want to get rid of the buffers now as
-		 * journal_flush does the other work for us, so leave
-		 * revoked to false. */
-	}
-
-
-done:
-	if (!revoked) {
-		/* usually the journal_revoke in ocfs_revoke_handle
-		 * will brelse the buffers for us, but if we aren't
-		 * checkpointing this handle, or we've gotten here
-		 * because of error then we have to do it manually. */
+	} else { 
+		/* If we're not checkpointing, we have to be careful
+		 * to also clear the modified bits. */
 		for(i = 0; i < handle->num_buffs; i++) {
 			bh = handle->buffs[i];
 			handle->buffs[i] = NULL;
-			brelse(bh);
+
+			ocfs_clear_buffer_modified(bh);
 		}
 	}
 
+/* done: */
+	for(i = 0; i < handle->num_buffs; i++) {
+		bh = handle->buffs[i];
+		brelse(bh);
+	}
+
 	down(&journal->commit_sem);
 	journal->curr = NULL;
 
@@ -355,8 +402,6 @@
 		retval = ocfs_journal_release_locks(handle, 0);
 		if (retval < 0)
 			LOG_ERROR_STATUS(retval);
-
-
 	} else {
 		/* If we're not going to checkpoint the handle on
 		 * commit then we need to add it to our journals list
@@ -366,6 +411,10 @@
 		up(&journal->commit_sem);
 	}
 
+#ifdef OCFS_PARANOID_ABORTS
+	/* At this point, we don't need the copyout buffers. */
+	ocfs_handle_free_all_copyout(handle);
+#endif
 	/* we don't free the kernel handle because jbd has freed it. */
 	if (handle->buffs) {
 		ocfs_free(handle->buffs);
@@ -376,13 +425,21 @@
 	/* This has to happen after we release the other locks. */
 	ocfs_release_trans_lock(osb);
 
+	if (handle->commit_bits && (retval == 0)) {
+		if (!sync)
+			BUG();
+		ocfs_process_bitmap_free_head(osb, handle->commit_bits);
+	}
+	free_bitmap_free_head(handle->commit_bits);
+	free_bitmap_free_head(handle->abort_bits);
+
 	if (checkpoint)
 		ocfs_free(handle);
 
 	LOG_EXIT_STATUS(retval);
 
 	return(retval);
-}
+} /* ocfs_commit_trans */
 
 /*
  * ocfs_abort_trans
@@ -393,10 +450,17 @@
 	ocfs_super *osb = NULL;
 	int i;
 	int retval;
+	ocfs_journal * journal = NULL;
+#ifdef OCFS_PARANOID_ABORTS
+	int j;
+	ocfs_journal_copyout *co = NULL;
+	char *data;
+#endif
 
 	LOG_ENTRY();
 
 	osb = handle->osb;
+	journal = &osb->journal;
 
 	/* There is a potential bug here which we may have to
 	 * resolve. What if you do a get_write_access on a buffer,
@@ -408,6 +472,32 @@
 	 * though it were clean, even though it contains aborted
 	 * data!*/
 
+#ifdef OCFS_PARANOID_ABORTS
+	/* Ok, we're aborting. For all dirtied buffers, copy our old
+	 * data back in. This should reverse what happened during the
+	 * transaction and revert us back.*/
+	for(i = 0; i < handle->num_buffs; i++) {
+		bh = handle->buffs[i];
+		
+		/* find the copyout. */
+		co = NULL;
+		for(j = 0; j < handle->num_co; j++)
+			if (handle->co_buffs[j].blocknr == bh->b_blocknr) {
+				co = &(handle->co_buffs[j]);
+				break;
+			}
+
+		if (co == NULL)
+			BUG();
+		LOG_TRACE_ARGS("Aborting block %lu\n", co->blocknr);
+		data = OCFS_BH_GET_DATA_WRITE(bh);
+		memcpy(data, co->data, bh->b_size);
+		OCFS_BH_PUT_DATA(bh);
+	}
+
+	/* done copying them, free it now. */
+	ocfs_handle_free_all_copyout(handle);
+#else
 	/* take all our dirtied buffers and make sure they can't be
 	 * written to disk */
 	for(i = 0; i < handle->num_buffs; i++) {
@@ -420,34 +510,70 @@
 		/* clear the uptodate and dirty flags so this never
 		 * gets written to disk inadvertantly by someone
 		 * else. */
+
 #ifdef LINUX_2_5
 		clear_buffer_uptodate(bh);
 #else
 		mark_buffer_uptodate(bh, false);
 #endif
 		clear_bit(BH_Dirty, &bh->b_state);
+
 		unlock_buffer(bh);
 		/* journal_forget will bforget the buffers for us too. */
+		get_bh(bh); /* keep a reference around so we can
+			     * reread after our journal_flush */
+
 		journal_forget(handle->k_handle, bh);
 	}
-
+#endif
 	/* want to force our handle to disk in abort case. */
 	handle->k_handle->h_sync = 1;
 
 	retval = journal_stop(handle->k_handle);
 	if (retval < 0) {
 		LOG_ERROR_STR("Could not commit aborted transaction!");
-		goto done;
+		LOG_ERROR_STATUS(retval);
 	}
+
 	handle->k_handle = NULL;
 
 	atomic_dec(&(osb->journal.num_trans));
-done:
 
+/* done: */
+
 	down(&osb->journal.commit_sem);
 	osb->journal.curr = NULL;
 	up(&osb->journal.commit_sem);
 
+	/* Ok, we now want to fill our buffers with the older (but
+	 * valid) data, instead of leaving them with the aborted
+	 * data. To do so we want to first checkpoint the valid
+	 * transactions in the journal so that we know that disk
+	 * reflects the latest correct blocks. After that, we just
+	 * repopulate the buffers from disk. */
+
+	/* journal flush here */
+	journal_lock_updates(journal->k_journal);
+	retval = journal_flush(journal->k_journal);
+	journal_unlock_updates(journal->k_journal);
+	if (retval < 0)
+		LOG_ERROR_STATUS(retval);
+
+	/* reread buffers here and then brelse them */
+	if (handle->num_buffs != 0)
+		retval = ocfs_read_bhs(osb, 
+				       handle->buffs[0]->b_blocknr * 512,
+				       handle->num_buffs * 512, 
+				       handle->buffs, 0, NULL);
+	if (retval < 0)
+		LOG_ERROR_STATUS(retval);
+
+	for(i = 0; i < handle->num_buffs; i++) {
+		ocfs_clear_buffer_modified(handle->buffs[i]);
+		brelse(handle->buffs[i]);
+	}
+
+	/* drop locks associated with the handle here. */
 	retval = ocfs_journal_release_locks(handle, 1);
 	if (retval < 0)
 		LOG_ERROR_STATUS(retval);
@@ -455,6 +581,12 @@
 	/* This has to happen after we release the other locks. */
 	ocfs_release_trans_lock(osb);
 
+	if (handle->abort_bits && (retval == 0))
+		ocfs_process_bitmap_free_head(osb, handle->abort_bits);
+
+	free_bitmap_free_head(handle->commit_bits);
+	free_bitmap_free_head(handle->abort_bits);
+
 	if (handle->buffs)
 		ocfs_free(handle->buffs);
 	ocfs_free(handle);
@@ -463,15 +595,58 @@
 	return;
 } /* ocfs_abort_trans */
 
+/*
+ * ocfs_journal_access
+ */
 int ocfs_journal_access(ocfs_journal_handle *handle, struct buffer_head *bh, int type) 
 {
 	int status = -1;
-
-	LOG_ENTRY_ARGS("(bh->b_blocknr=%lu, type=%d (\"%s\"))\n", bh->b_blocknr, 
-		       type, (type == OCFS_JOURNAL_ACCESS_CREATE) ? 
+	char *data;
+#ifdef OCFS_PARANOID_ABORTS
+	int i;
+	bool found = false;
+#endif
+	LOG_ENTRY_ARGS("(bh->b_blocknr=%lu, type=%d (\"%s\"), "
+		       "bh->b_size = %hu)\n", 
+		       bh->b_blocknr, type, 
+		       (type == OCFS_JOURNAL_ACCESS_CREATE) ? 
 		       "OCFS_JOURNAL_ACCESS_CREATE" : 
-		       "OCFS_JOURNAL_ACCESS_WRITE");
+		       "OCFS_JOURNAL_ACCESS_WRITE", bh->b_size);
 
+	/* by taking a "read" lock, we prevent anyone from doing any
+	 * IO on the buffers while in journal_get_*_access */
+	data = OCFS_BH_GET_DATA_READ(bh);
+
+#ifdef OCFS_PARANOID_ABORTS
+
+	/* search for this buffer in our copyout list. If it's already
+	 * there, we need to do nothing. Otherwise, add it to the
+	 * handle. 
+	 *
+	 * Note that we want to make a copy of the buffer on the 1st access
+	 * call as that when we know for sure it's clean. */
+	for(i = 0; i < handle->num_co; i++)
+		if (handle->co_buffs[i].blocknr == bh->b_blocknr) {
+			found = true;
+			break;
+		}
+
+	if (!found) {
+		i = handle->num_co;
+
+		LOG_TRACE_ARGS("Copying buffer out to position %d\n", i);
+		/* This malloc should just be a slab. */
+		handle->co_buffs[i].data = ocfs_malloc(bh->b_size);
+		if (handle->co_buffs[i].data == NULL) {
+			status = -ENOMEM;
+			goto done;
+		}
+		memcpy(handle->co_buffs[i].data, data, bh->b_size);
+		handle->co_buffs[i].blocknr = bh->b_blocknr;
+		handle->num_co++;
+	}
+#endif
+
 	switch (type) {
 	case OCFS_JOURNAL_ACCESS_CREATE:
 		status = journal_get_create_access(handle->k_handle, bh);
@@ -495,11 +670,15 @@
 
 	status = 0;
 done:
+	OCFS_BH_PUT_DATA(bh);
+
 	LOG_EXIT_STATUS(status);
 	return(status);
-}
+} /* ocfs_journal_access */
 
 /* 
+ * ocfs_journal_dirty
+ *
  * We also have to add the buffer to our handles list.
  */
 int ocfs_journal_dirty(ocfs_journal_handle *handle, struct buffer_head *bh) 
@@ -535,7 +714,6 @@
 	get_bh(bh);
 	handle->buffs[i] = bh;
 	handle->num_buffs++;
-	LOG_TRACE_ARGS("Dirtied buffer at position %d\n", i);
 
 call_jbd:
 	status = journal_dirty_metadata(handle->k_handle, bh);
@@ -549,12 +727,13 @@
 		goto done;
 	}
 
+	check_rootdir_overwrite(bh);
 
 	status = 0;
 done:
 	LOG_EXIT_STATUS(status);
 	return(status);
-}
+} /* ocfs_journal_dirty */
 
 
 /* We are expecting to be run on the current running transaction, so
@@ -658,6 +837,7 @@
 	__u64 lock_id = 0;
 	ocfs_inode * oin = NULL;
 	struct buffer_head *bh = NULL;
+	__u64 alloc_size;
 
 	LOG_ENTRY();
 
@@ -683,7 +863,7 @@
 			LOG_ERROR_STR("Could not get lock on journal!");
 		goto done;
 	}
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
 
 	/* extend the system file if we need to - it should be exactly
 	 * eight megs. */
@@ -692,18 +872,20 @@
 		fe = NULL;
 		status = ocfs_extend_system_file(osb, cleanup_file_id, 
 						 OCFS_JOURNAL_DEFAULT_SIZE, 
-						 bh, NULL);
+						 bh, NULL, false);
 		if (status < 0) {
 			LOG_ERROR_STR("Could not extend journal file!");
 			goto done;
 		}
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
 	}
 	LOG_TRACE_ARGS("fe->file_size = %u.%u\n", HI(fe->file_size), 
 		       LO(fe->file_size));
 	LOG_TRACE_ARGS("fe->alloc_size = %u.%u\n", HI(fe->alloc_size), 
 		       LO(fe->alloc_size));
 
+	/* gonna need this later */
+	alloc_size = fe->alloc_size;
 
 	/* Ok, look up the inode for our journal */
 	args.offset = fe->this_sector;
@@ -715,9 +897,7 @@
 #ifdef LINUX_2_5
 	inode = ocfs_iget (sb, &args);
 #else
-	inode =
-		iget4 (osb->sb, LO (args.offset),
-		       (find_inode_t) ocfs_find_inode, (void *) (&args));
+	inode = ocfs_get_inode_from_offset(osb, args.offset, bh);
 #endif
 	if (inode == NULL) {
 		LOG_ERROR_STR("access error");
@@ -732,19 +912,15 @@
 		goto done;
 	}
 	LOG_TRACE_ARGS("inode->i_size = %u\n", inode->i_size);
-	
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
-	status = ocfs_create_new_oin(&oin, fe->alloc_size, osb);
-	status = ocfs_initialize_oin(oin, osb, 0, fe->this_sector, 0, false, NULL);
+
+	status = ocfs_create_new_oin(&oin, alloc_size, osb);
+	status = ocfs_initialize_oin(oin, osb, 0, lock_id, lock_id, false, NULL);
 	oin->journal_inode = true;
 	oin->open_hndl_cnt++;
 	SET_INODE_OIN(inode, oin);
 	LOG_TRACE_ARGS("oin->alloc_size = %u.%u\n", HI(oin->alloc_size), 
 		       LO(oin->alloc_size));
 
-	OCFS_BH_PUT_DATA(bh);
-	fe = NULL;
-
 	/* call the kernels journal init function now */
 	k_journal = journal_init_inode(inode);
 	if (k_journal == NULL) {
@@ -837,7 +1013,7 @@
 	/* release the oin here. Isn't this racy? */
 	if (inode_data_is_oin(inode)) {
 		oin = GET_INODE_OIN(inode);
-		inode->i_flags &= ~S_OCFS_OIN_VALID;
+		CLEAR_INODE_OIN(inode);
 		oin->open_hndl_cnt--;
 		ocfs_release_oin(oin, true);
 	}
@@ -970,7 +1146,7 @@
 		LOG_ERROR_STATUS (status);
 		goto done;
 	}
-	publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+	publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(publish_bh); /* read */
 
 	retval = publish->mounted;
 
@@ -1001,7 +1177,7 @@
 		LOG_ERROR_STATUS (status);
 		goto done;
 	}
-	publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+	publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(publish_bh); /* write */
 
 	/* change it */
 	publish->mounted = value;
@@ -1249,6 +1425,7 @@
 	bool recovery_lock = false;
 	struct buffer_head *config_bh = NULL;
 	ocfs_disk_node_config_info *config = NULL;
+	__u64 alloc_size;
 
 	LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n", node_num,
 		       osb->node_num);
@@ -1309,7 +1486,7 @@
 		goto done;
 	}
 
-	config = (ocfs_disk_node_config_info *) OCFS_BH_GET_DATA(config_bh);
+	config = (ocfs_disk_node_config_info *) OCFS_BH_GET_DATA_READ(config_bh); /* read */
 	if (config->journal_version < OCFS_JOURNAL_CURRENT_VERSION) {
 		OCFS_BH_PUT_DATA(config_bh);
 		LOG_ERROR_ARGS("Cannot recover node %d, it has an old journal"\
@@ -1318,8 +1495,12 @@
 	}
 	OCFS_BH_PUT_DATA(config_bh);
 
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+
+	/* gonna need this later */
+	alloc_size = fe->alloc_size;
+
 	/* Ok, look up the inode for our journal */
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
 	args.offset = fe->this_sector;
 	args.fe_bh = bh;
 	OCFS_BH_PUT_DATA(bh);
@@ -1327,9 +1508,7 @@
 #ifdef LINUX_2_5
 	inode = ocfs_iget (sb, &args);
 #else
-	inode =
-		iget4 (osb->sb, LO (args.offset),
-		       (find_inode_t) ocfs_find_inode, (void *) (&args));
+	inode = ocfs_get_inode_from_offset(osb, args.offset, bh);
 #endif
 	if (inode == NULL) {
 		LOG_ERROR_STR("access error");
@@ -1345,17 +1524,15 @@
 	}
 	LOG_TRACE_ARGS("inode->i_size = %u\n", inode->i_size);
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
-	status = ocfs_create_new_oin(&oin, fe->alloc_size, osb);
+	status = ocfs_create_new_oin(&oin, alloc_size, osb);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto done;
 	}
-	status = ocfs_initialize_oin(oin, osb, 0, fe->this_sector, 0, false, 
+	status = ocfs_initialize_oin(oin, osb, 0, lock_id, lock_id, false, 
 				     NULL);
 	oin->journal_inode = true;
 	SET_INODE_OIN(inode, oin);
-	OCFS_BH_PUT_DATA(bh);
 
 	status = ocfs_force_read_journal(osb, inode->i_size, oin);
 	if (status < 0) {
@@ -1413,7 +1590,7 @@
 done:
 	/* close the journal file */
 	if (inode)
-		inode->i_flags &= ~S_OCFS_OIN_VALID;
+		CLEAR_INODE_OIN(inode);
 
 	if (oin)
 		ocfs_release_oin(oin, true);
@@ -1477,7 +1654,7 @@
 		goto finally;
 	}
 	
-	publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+	publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(publish_bh); /* write */
 
 	publish->dirty = false;
 	publish->vote = 0;

Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/namei.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -17,6 +17,10 @@
 			    __u64 id2, __u32 type2, __u32 flags2, 
 			    ocfs_lock_res **res2, struct buffer_head **bh2,
 		     	    struct inode *inode2);
+static int ocfs_fix_extent_pointers(ocfs_super *osb, 
+				    ocfs_journal_handle *handle,
+				    struct buffer_head *fe_bh,
+				    struct inode *inode);
 
 static struct dentry_operations ocfs_dentry_ops = {
 	.d_revalidate = ocfs_dentry_revalidate	// let's test it out!
@@ -33,7 +37,7 @@
 	ocfs_file_entry *fe = NULL;
 	struct buffer_head *fe_bh = NULL;
 	ocfs_inode *parentOin = NULL;
-	__u64 parentOffset;
+	__u64 parentOffset, fe_off;
 	struct inode *inode = NULL;
 	struct super_block *sb = dir->i_sb;
 	struct dentry *ret;
@@ -65,17 +69,35 @@
 
 	status = ocfs_find_files_on_disk (osb, parentOffset, &(dentry->d_name), &fe_bh, NULL, dir);
 	if (status >= 0) {
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+		unsigned long ino;
+		__u64 inode_off;
+
+		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 		args.offset = fe->this_sector;
 //		args.entry = fe;
 		args.fe_bh = fe_bh;
+		if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+			inode_off = fe->extents[0].disk_off;
+		else
+			inode_off = fe->this_sector;
+
+		fe_off = fe->this_sector;
 		OCFS_BH_PUT_DATA(fe_bh);
+
 		fe = NULL;
+		/* we should put this guy in the hash now... */
+
+		LOG_TRACE_STR("calling iget4");
+		/* alright, allocate a new inode number for this guy
+		 * and insert it into the hash. */
+		ino = iunique(osb->sb, OCFS_ROOT_INODE_NUMBER);
+		ino = ocfs_inode_hash_insert(osb, inode_off, fe_off, ino);
+
 #ifdef LINUX_2_5
 		inode = ocfs_iget (sb, &args);
 #else
 		inode =
-		    iget4 (sb, LO (args.offset),
+		    iget4 (sb, ino,
 			   (find_inode_t) ocfs_find_inode, (void *) (&args));
 #endif
 		if (inode == NULL) {
@@ -185,8 +207,14 @@
 			LOG_ERROR_STATUS(status);
 		ocfs_abort_trans(handle);
 		goto leave;
-	} else if (ocfs_commit_trans(handle) < 0)
-		LOG_ERROR_STR("Could not complete create!");
+	} else {
+		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
+		handle->new_file_lockid = fe->this_sector;
+		OCFS_BH_PUT_DATA(new_fe_bh);
+		fe = NULL;
+		if (ocfs_commit_trans(handle) < 0)
+			LOG_ERROR_STR("Could not complete create!");
+	}
 
 	status = ocfs_create_new_oin (&oin, 0ULL, osb);
 	if (status < 0) {
@@ -194,7 +222,7 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
 
 	file_off = fe->this_sector;
 	dirnode_off = fe->extents[0].disk_off;
@@ -214,7 +242,7 @@
 	if (ParentOin)
 		OCFS_CLEAR_FLAG (ParentOin->oin_flags, OCFS_OIN_IN_USE);
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
 
 	/* is this safe if we no longer have it locked? */
 	if (oin->lock_res != NULL) {
@@ -233,9 +261,8 @@
 
 leave:
 	if (status >= 0 && !IS_ERR (inode)) {
-		inode->i_ino = LO (oin->file_disk_off);
 		oin->inode = inode;
-		ocfs_populate_inode (inode, fe, mode, oin);
+		ocfs_populate_inode (inode, fe, mode, oin, true);
 		insert_inode_hash (inode);
 		d_instantiate (dentry, inode);
 	} else if (status == -ENOSPC)
@@ -345,9 +372,7 @@
 
 	fe->create_time = fe->modify_time = OCFS_CURRENT_TIME;
 
-	pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
-	/* is this always going to be false, considering we just
-	 * passed OCFS_DLM_EXCLUSIVE_LOCK above? */
+	pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(lock_bh); /* read */
 	cache_lock = (DISK_LOCK_FILE_LOCK (pLockNode) == OCFS_DLM_ENABLE_CACHE_LOCK);
 	OCFS_BH_PUT_DATA(lock_bh);
 
@@ -386,7 +411,7 @@
 
 		if (!cache_lock)
 			DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
-
+		
 		status = ocfs_read_bhs(osb, bitmapOffset, osb->vol_layout.dir_node_size, dirbhs, OCFS_BH_CACHED, NULL);
 		if (status < 0) {
 			ocfs_safefree (dirbhs);
@@ -396,12 +421,12 @@
 		}
 
 		for (i = 0; i < numblks; i++) {
-			tmp = OCFS_BH_GET_DATA(dirbhs[i]);
+			tmp = OCFS_BH_GET_DATA_WRITE(dirbhs[i]); /* write */
 			memset(tmp, 0, osb->sect_size);
 			OCFS_BH_PUT_DATA(dirbhs[i]);
 		}
 
-		new_dir = (ocfs_dir_node *) OCFS_BH_GET_DATA(dirbhs[0]);
+		new_dir = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(dirbhs[0]); /* write */
 		ocfs_initialize_dir_node (osb, new_dir, bitmapOffset, 
 					  fileOffset, osb->node_num);
 
@@ -705,6 +730,101 @@
 } /* ocfs_double_lock */
 
 /*
+ * ocfs_fix_extent_pointers
+ *
+ * If you move a file entry from one directory to another, the files
+ * offset changes (obviously). This function updates all the
+ * up_hdr_node_ptr's on any extents hanging off that file entry.
+ */
+static int ocfs_fix_extent_pointers(ocfs_super *osb, 
+				    ocfs_journal_handle *handle,
+				    struct buffer_head *fe_bh,
+				    struct inode *inode)
+{
+	int status = 0;
+	ocfs_file_entry *fe = NULL;
+	__u64 new_ptr;
+	struct buffer_head *extent_bh = NULL;
+	ocfs_extent_group *extent = NULL;
+	int i;
+
+	LOG_ENTRY();
+
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh);
+
+	if (!IS_VALID_FILE_ENTRY(fe)) {
+		LOG_ERROR_STATUS(status = -EINVAL);
+		goto bail;
+	}
+
+	LOG_TRACE_ARGS("fe->this_sector = %u.%u, fe->local_ext = %s, "
+		       "fe->next_free_ext = %u\n",
+		       HILO(fe->this_sector), 
+		       (fe->local_ext) ? "true" : "false",
+		       fe->next_free_ext);
+
+	/* If we have local extents, then don't even worry about
+	 * this. Directories, by definition, always have local_ext
+	 * true, so we don't need a seperate check for them. */
+	if (fe->local_ext)
+		goto bail;
+
+
+	new_ptr = fe->this_sector;
+
+	for(i = 0; i < fe->next_free_ext; i++) {
+		status = ocfs_read_bh(osb, fe->extents[i].disk_off, &extent_bh,
+				      OCFS_BH_CACHED, inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS(status);
+			goto bail;
+		}
+
+		status = ocfs_journal_access(handle, extent_bh, 
+					     OCFS_JOURNAL_ACCESS_WRITE);
+		if (status < 0) {
+			LOG_ERROR_STATUS(status);
+			goto bail;
+		}
+
+		extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh);
+		if ((!IS_VALID_EXTENT_HEADER(extent)) 
+		    && (!IS_VALID_EXTENT_DATA(extent))) {
+			LOG_ERROR_STATUS(status = -EINVAL);
+			OCFS_BH_PUT_DATA(extent_bh);
+			clear_buffer_modified(extent_bh);
+			goto bail;
+		}
+
+		/* this next line does the real work of the function. */
+		extent->up_hdr_node_ptr = new_ptr;
+
+		OCFS_BH_PUT_DATA(extent_bh);
+		extent = NULL;
+
+		status = ocfs_journal_dirty(handle, extent_bh);
+		if (status < 0) {
+			LOG_ERROR_STATUS(status);
+			goto bail;
+		}
+
+		brelse(extent_bh);
+		extent_bh = NULL;
+	}
+bail:
+	if (fe)
+		OCFS_BH_PUT_DATA(fe_bh);
+
+	if (extent_bh) {
+		if (extent)
+			OCFS_BH_PUT_DATA(extent_bh);
+		brelse(extent_bh);
+	}
+	LOG_EXIT_STATUS(status);
+	return(status);
+}
+
+/*
  * ocfs_rename()
  *
  */
@@ -717,12 +837,13 @@
 	ocfs_file_entry *newfe = NULL, *oldfe = NULL;
 	struct buffer_head *oldfe_bh = NULL;
 	struct buffer_head *newfe_bh = NULL;
+	struct buffer_head *insert_bh = NULL;
 	ocfs_file_entry *tmpfe = NULL;
 	ocfs_super *osb = NULL;
 	__u64 oldOffset, newDirOff, oldDirOff, t;
-	bool DeleteTargetOin = false;
 	__u64 tmpoff = 0;
 	bool kill_newfe = false;
+	bool delete_target_oin = false;
 	ocfs_bitmap_free_head *free_head = NULL;
 	ocfs_journal_handle *handle = NULL;
 	__u32 dir_lock_flags = FLAG_FILE_CREATE | FLAG_DIR;
@@ -800,7 +921,7 @@
 			OCFS_SET_FLAG (newOIN->oin_flags, OCFS_OIN_IN_USE);
 			ocfs_up_sem (&(newOIN->main_res));
 			status = ocfs_verify_update_oin (osb, newOIN);
-			DeleteTargetOin = true;
+			delete_target_oin = true;
 		}
 	}
 
@@ -838,7 +959,7 @@
 	/* lock old_fe. we read it ourselves instead of letting
 	 * acquire_lock do it because if it's a directory, we lock the
 	 * dirnode instead. */
-	oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA(oldfe_bh);
+	oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(oldfe_bh); /* read */
 	if (oldfe->attribs & OCFS_ATTRIB_DIRECTORY) {
 		oldfe_lockid = oldfe->extents[0].disk_off;
 		oldfe_flags = FLAG_DIR;
@@ -879,7 +1000,7 @@
 	/* In case we need to overwrite an existing file, we blow it
 	 * away first */
 	if (kill_newfe) {
-		newfe = (ocfs_file_entry *) OCFS_BH_GET_DATA(newfe_bh);
+		newfe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(newfe_bh); /* read */
 		if (newfe->attribs & OCFS_ATTRIB_DIRECTORY) {
 			newfe_lockid = newfe->extents[0].disk_off;
 			newfe_flags = FLAG_DIR;
@@ -912,6 +1033,7 @@
 		}
 	}
 
+
 	/* If we're moving to a different directory, all we've gotta
 	 * do is copy the fe information from the old directory to the
 	 * new one. */
@@ -938,7 +1060,7 @@
 			goto finally;
 		}
 
-		oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA(oldfe_bh);
+		oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(oldfe_bh); /* read */
 		memcpy(tmpfe, oldfe, sizeof(ocfs_file_entry));
 		OCFS_BH_PUT_DATA(oldfe_bh);
 		oldfe = NULL;
@@ -957,20 +1079,55 @@
 		DISK_LOCK_WRITER_NODE (tmpfe) = osb->node_num;
 		tmpfe->modify_time = OCFS_CURRENT_TIME;
 
-		status= ocfs_insert_file(osb, tmpfe, new_dir_bh, NULL, handle, new_dir, new_inode);
+		if (tmpfe->attribs & OCFS_ATTRIB_DIRECTORY)
+			tmpoff = tmpfe->extents[0].disk_off;
+		else
+			tmpoff = tmpfe->this_sector;
+
+		down(&old_inode->i_sem);
+
+		status = ocfs_insert_file(osb, tmpfe, new_dir_bh, &insert_bh, 
+					 handle, old_dir, old_inode);
 		if (status < 0) {
+			up(&old_inode->i_sem);
 			LOG_ERROR_STATUS (status);
 			goto finally;
 		}
 
+		status = ocfs_fix_extent_pointers(osb, handle, insert_bh,
+						  old_inode);
+		if (status < 0) {
+			up(&old_inode->i_sem);
+			LOG_ERROR_STATUS (status);
+			goto finally;
+		}
+		LOG_TRACE_ARGS("(after) tmpfe->this_sector = %u.%u\n", 
+			       HILO(tmpfe->this_sector));
+
+		if (oldOIN)
+			ocfs_down_sem(&oldOIN->main_res, true);
+
 		/* move the inode offset over to the new entry */
-		if (S_ISDIR (old_dentry->d_inode->i_mode)) {
-			SET_INODE_OFFSET(old_dentry->d_inode, 
-					 tmpfe->extents[0].disk_off);
+		if (S_ISDIR(old_dentry->d_inode->i_mode)) {
+			/* the vote offset doesn't actually change for
+			 * a directory, but the fe offset does... */
+			ocfs_inode_rehash(&osb->inode_hash, 
+					  tmpoff,
+					  tmpfe->extents[0].disk_off, 
+					  tmpfe->this_sector);
 		} else {
 			SET_INODE_OFFSET(old_dentry->d_inode, 
 					 tmpfe->this_sector);
+			ocfs_inode_rehash(&osb->inode_hash, 
+					  tmpoff,
+					  tmpfe->this_sector,
+					  tmpfe->this_sector);
 		}
+		if (oldOIN) {
+			oldOIN->file_disk_off = tmpfe->this_sector;
+			ocfs_up_sem(&oldOIN->main_res);
+		}
+		up(&old_inode->i_sem);
 	} else {
 		/* Ok, we're moving inside of the same directory --
 		 * this is easy then -- we just change the name on the
@@ -1008,9 +1165,11 @@
 			if (new_dentry->d_inode)
 				fsync_inode_buffers(old_dentry->d_inode);
 		}
-		if (kill_newfe && DeleteTargetOin) {
-			ocfs_release_cached_oin (osb, oldOIN);
-			ocfs_release_oin (oldOIN, true);
+
+		/* delete the targets oin here as we've just blown it away! */
+		if (kill_newfe && newOIN && delete_target_oin) {
+			ocfs_release_cached_oin (osb, newOIN);
+			ocfs_release_oin (newOIN, true);
 		}
 	}
 
@@ -1057,6 +1216,8 @@
 			OCFS_BH_PUT_DATA(newfe_bh);
 		brelse(newfe_bh);
 	}
+	if (insert_bh)
+		brelse(insert_bh);
 	if (old_dir_bh)
 		brelse(old_dir_bh);
 	if (new_dir_bh)
@@ -1144,9 +1305,8 @@
 		goto abort_trans;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
 	file_off = fe->this_sector;
-	printk("ok in symlink, got the fe, this sector is %u.%u\n", file_off);
 	OCFS_BH_PUT_DATA(new_fe_bh);
 	fe = NULL;
 
@@ -1162,9 +1322,6 @@
 
 abort_trans:
 	if (handle) {
-		ocfs_bitmap_free_head *f = osb->alloc_free_head;
-		osb->alloc_free_head = NULL;
-
 		if (status < 0)
 			ocfs_abort_trans(handle);
 		else {
@@ -1172,11 +1329,6 @@
 			if (status < 0)
 				LOG_ERROR_STATUS(status);
 		}
-
-		if (f) {
-			ocfs_process_bitmap_free_head(osb, f);
-			free_bitmap_free_head(f);
-		}
 	}
 
 	if (lock_res != NULL) {
@@ -1203,11 +1355,10 @@
 #else
 	inode->i_rdev = OCFS_NODEV;
 #endif	
-	inode->i_ino = LO (oin->file_disk_off);
 	oin->inode = inode;
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
-	ocfs_populate_inode (inode, fe, S_IFLNK | S_IRWXUGO, oin);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
+	ocfs_populate_inode (inode, fe, S_IFLNK | S_IRWXUGO, oin, true);
 	OCFS_BH_PUT_DATA(new_fe_bh);
 	fe = NULL;
 
@@ -1261,22 +1412,16 @@
 		goto leave;
 	}
 
-	status = ocfs_journal_access(handle, fe_bh, 
-					 OCFS_JOURNAL_ACCESS_WRITE);
-	if (status < 0) {
-		LOG_ERROR_STATUS (status);
-		goto leave;
-	}
-
 	/* lock file ent for a dir is out in the 1st extent, this_sector 
 	   for file */
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 	dir_node_ptr = fe->dir_node_ptr;
 	if (fe->attribs & OCFS_ATTRIB_DIRECTORY) {
 		lockId = fe->extents[0].disk_off;
 		lockFlags = (FLAG_DIR | FLAG_FILE_RENAME);
 
-		status = ocfs_read_bh(osb, lockId, &lockbh, OCFS_BH_CACHED, inode);
+		status = ocfs_read_bh(osb, lockId, &lockbh, OCFS_BH_CACHED, 
+				      inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
@@ -1293,32 +1438,40 @@
 		lockFlags = FLAG_FILE_RENAME;
 		lockbh = fe_bh;
 	}
+	OCFS_BH_PUT_DATA(fe_bh);
+	fe = NULL;
 
-	/* Change the name and write it back.... */
-	fe->filename[0] = '\0';
-	strncpy (fe->filename, file_name->name, file_name->len);
-	fe->filename[file_name->len] = '\0';
-
-	DISK_LOCK_SEQNUM (fe) = changeSeqNum;
-
-	/* Set the Valid bit here */
-	SET_VALID_BIT (fe->sync_flags);
-	fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE);
-
-	status = ocfs_read_bh (osb, dir_node_ptr, &dirbh, OCFS_BH_CACHED, inode);
+	status = ocfs_read_bh (osb, dir_node_ptr, &dirbh, OCFS_BH_CACHED, 
+			       inode);
 	if (status < 0) {
-		OCFS_BH_PUT_DATA(fe_bh);
 		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
+
 	status = ocfs_journal_access(handle, dirbh, OCFS_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
-		OCFS_BH_PUT_DATA(fe_bh);
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
+	status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		LOG_ERROR_STATUS (status);
+		goto leave;
+	}
 
-	pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(dirbh);
+	/* preserve bh lock ordering so grab the write on dirbh 1st. */
+	pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(dirbh); /* write */
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
+
+	/* Change the actual name now */
+	fe->filename[0] = '\0';
+	strncpy (fe->filename, file_name->name, file_name->len);
+	fe->filename[file_name->len] = '\0';
+	DISK_LOCK_SEQNUM (fe) = changeSeqNum;
+	SET_VALID_BIT (fe->sync_flags);
+	fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE);
+
+	/* mark the dirnode as dirty */
 	pLockNode->index_dirty = 1;
 	pLockNode->bad_off = (fe->this_sector - dir_node_ptr) / osb->sect_size;
 	pLockNode->bad_off -= 1;
@@ -1331,20 +1484,21 @@
 	needs_reindex = (index < pLockNode->num_ent_used);
 
 	if (needs_reindex) {
-		memmove (&pLockNode->index[index], &pLockNode->index[index + 1],
+		memmove (&pLockNode->index[index], 
+			 &pLockNode->index[index + 1],
 			 pLockNode->num_ent_used - (index + 1));
 		pLockNode->index[pLockNode->num_ent_used - 1] = pLockNode->bad_off;
 		/* is this a safe cast? */
 		flags = OCFS_FE_CACHE_FLAGS(osb, ((ocfs_file_entry *) pLockNode));
-		OCFS_BH_PUT_DATA(dirbh);
-		status = ocfs_journal_dirty(handle, dirbh);
-		if (status < 0) {
-			OCFS_BH_PUT_DATA(fe_bh);
-			LOG_ERROR_STATUS (status);
-			goto leave;
-		}
 	}
 
+	OCFS_BH_PUT_DATA(dirbh);
+	status = ocfs_journal_dirty(handle, dirbh);
+	if (status < 0) {
+		LOG_ERROR_STATUS (status);
+		goto leave;
+	}
+
 	flags = OCFS_FE_CACHE_FLAGS(osb, fe);
 	OCFS_BH_PUT_DATA(fe_bh);
 	fe = NULL;
@@ -1442,7 +1596,7 @@
 		 * deleting? in that case, we also need to read the
 		 * head of it's first dirnode which would have been
 		 * done implicitely by locking it. */
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 		if (fe->attribs & OCFS_ATTRIB_DIRECTORY) {
 			status = ocfs_read_bh(osb, fe->extents[0].disk_off, 
 					      &lock_bh, OCFS_BH_CACHED, inode);
@@ -1477,7 +1631,7 @@
 	}
 
 	/* lock the file entry */
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	if (fe->attribs & OCFS_ATTRIB_DIRECTORY) {
 		lock_id = fe->extents[0].disk_off;
@@ -1504,19 +1658,43 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	/* need to preserve locking order, so take a 'write' lock on
+	 * the dirnode sector first. it won't get passed to
+	 * journal_dirty until ocfs_remove_file so clean up the write
+	 * lock on errors before that */
+	OCFS_BH_GET_DATA_WRITE(lock_node_bh);
+	OCFS_BH_PUT_DATA(lock_node_bh);
+
+	/* we call ocfs_clear_buffer_modified in several error cases
+	 * here if we set the modify bit on this buffer, but haven't
+	 * journal_dirtied it yet. Otherwise, it'll stay modified even
+	 * after the abort_trans. */
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
 	is_dir = fe->attribs & OCFS_ATTRIB_DIRECTORY;
 	if (is_dir) {
 		__u8 numused;
 		ocfs_dir_node *pLockNode;
 
-		pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+		pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(lock_bh);/* read */
+		if (!IS_VALID_DIR_NODE(pLockNode)) {
+			OCFS_BH_PUT_DATA(lock_bh);
+			OCFS_BH_PUT_DATA(fe_bh);
+			ocfs_clear_buffer_modified(fe_bh);
+			ocfs_clear_buffer_modified(lock_node_bh);
+			status = -EIO;
+			LOG_TRACE_STR("Uhoh, invalid dirnode found!");
+			goto leave;
+		}
+
 		numused = pLockNode->num_ent_used;
 		OCFS_BH_PUT_DATA(lock_bh);
 
 		if (numused && !(flags & FLAG_DEL_NAME)) {
 			OCFS_BH_PUT_DATA(fe_bh);
+			ocfs_clear_buffer_modified(fe_bh);
+			ocfs_clear_buffer_modified(lock_node_bh);
 			status = -ENOTEMPTY;
+			LOG_TRACE_ARGS("-ENOTEMPY, numused = %u\n", numused);
 			goto leave;
 		}
 	}
@@ -1526,6 +1704,8 @@
 		 * doing a rename so skip the 1st part of this function. */
 		status = 0;
 		OCFS_BH_PUT_DATA(fe_bh);
+		ocfs_clear_buffer_modified(fe_bh);
+		ocfs_clear_buffer_modified(lock_node_bh);
 		goto delete_entry;
 	}
 
@@ -1543,11 +1723,13 @@
 	if (is_dir) {
 		/* Iterate through all the dir nodes for this
 		 * directory and mark them to be freed */
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 		status = ocfs_free_directory_block (osb, fe, free_head, inode);
 		OCFS_BH_PUT_DATA(fe_bh);
 		if (status < 0) {
 			OCFS_BH_PUT_DATA(fe_bh);
+			ocfs_clear_buffer_modified(fe_bh);
+			ocfs_clear_buffer_modified(lock_node_bh);
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
@@ -1556,6 +1738,8 @@
 		 * this file so we can remove them after commit. */
 		status = ocfs_free_file_extents (osb, fe_bh, free_head);
 		if (status < 0) {
+			ocfs_clear_buffer_modified(fe_bh);
+			ocfs_clear_buffer_modified(lock_node_bh);
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
@@ -1563,7 +1747,8 @@
 
 delete_entry:
 	/* remove the fe from the dirnode.*/
-	status = ocfs_remove_file(osb, fe_bh, lock_node_bh, handle, parent_inode, inode);
+	status = ocfs_remove_file(osb, fe_bh, lock_node_bh, handle, 
+				  parent_inode, inode);
 	if (status < 0) {
 		LOG_ERROR_STATUS(status);
 		goto leave;

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/nm.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -29,7 +29,7 @@
 /* Tracing */
 #define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_NM
 
-static struct inode * ocfs_get_inode_from_offset(ocfs_super * osb, __u64 fileoff);
+static struct inode * ocfs_get_inode_no_bh(ocfs_super * osb, __u64 voteoff);
 static int ocfs_release_dir_cache_lock (ocfs_super *osb, struct buffer_head **dir_bhs, struct inode *inode);
 static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, __u32 flags, int status, bool *master_alive, ocfs_inode **oin);
 static int ocfs_disk_update_resource (ocfs_super * osb, ocfs_lock_res * lock_res, struct buffer_head **bh, __u32 timeout, struct inode *inode);
@@ -163,7 +163,7 @@
 
 	LOG_ENTRY_ARGS("(vote_node = %d, bh = 0x%x)\n", vote_node, bh);
 
-	publish = (ocfs_publish *) OCFS_BH_GET_DATA(bh);
+	publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bh); /* read */
 
 	if (osb->last_publ_seq_num[vote_node] == publish->publ_seq_num){
 		LOG_TRACE_ARGS("Already voted on node %d, seqnum (%u.%u)\n", 
@@ -237,7 +237,7 @@
 	ocfs_node_config_hdr *node_cfg_hdr = NULL;
 	__u64 curr_node_map;
 	__u64 cfg_seq_num;
-	int which;
+	int which, pruned;
 	int flush_misses = 0;
 	struct buffer_head *bh = NULL;
 
@@ -301,6 +301,10 @@
 			}
 		}
 
+		/* try to prune some bh_sem hash entries if list is too long */
+		pruned = ocfs_bh_sem_hash_prune();
+		LOG_TRACE_ARGS("pruned %d entries from nm thread\n", pruned);
+
 		/* lock publish to prevent overwrites from vote_req and vote_reset */
 		down (&(osb->publish_lock));
 
@@ -321,7 +325,7 @@
 		up (&(osb->publish_lock));
 
 		/* If another node was added to the config read and update the cfg */
-		node_cfg_hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(osb->cfg_bhs[1]);
+		node_cfg_hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_READ(osb->cfg_bhs[1]); /* read */
 		num_nodes = node_cfg_hdr->num_nodes;
 		cfg_seq_num = node_cfg_hdr->cfg_seq_num;
 		OCFS_BH_PUT_DATA(osb->cfg_bhs[1]);
@@ -355,7 +359,7 @@
 
 		/* Check for the highest node looking for a vote, if anybody is looking */
 		for (i = 0, which = OCFS_VOLCFG_NEWCFG_SECTORS; i < num_nodes; i++, which++) {
-			publish = (ocfs_publish *) OCFS_BH_GET_DATA(osb->cfg_bhs[which]);
+			publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(osb->cfg_bhs[which]); /* read */
 
 			if (publish->time == (__u64) 0)
 				goto loop;
@@ -392,13 +396,19 @@
 
 		if ((vote_node != OCFS_INVALID_NODE_NUM) && 
 		    (vote_node != osb->node_num)) {
+			__s32 voted;
 			LOG_TRACE_ARGS("vote_node = %d\n", vote_node);
 
 			bh = osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS 
 					  + osb->node_num];
 			down(&(osb->publish_lock));
-			publish = (ocfs_publish *) OCFS_BH_GET_DATA(bh);
-			if (publish->vote) {
+
+			publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bh); /* read */
+			voted = publish->vote;
+			OCFS_BH_PUT_DATA(bh);
+
+			if (voted) {
+				publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
 				publish->vote = 0;
 				OCFS_BH_PUT_DATA(bh);
 				status = ocfs_write_bh(osb, bh, 0, NULL);
@@ -406,8 +416,7 @@
 					LOG_ERROR_STATUS (status);
 					goto finally;
 				}
-			} else 
-				OCFS_BH_PUT_DATA(bh);
+			}
 			publish = NULL;
 			up(&(osb->publish_lock));
 
@@ -487,7 +496,7 @@
 		goto finally;
 	}
 
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*bh);
+	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*bh); /* read */
 	lock_res->lock_type = DISK_LOCK_FILE_LOCK (fe);
 	lock_res->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
 	lock_res->oin_openmap = DISK_LOCK_OIN_MAP (fe);
@@ -587,8 +596,10 @@
 	/* If we found the lockres in the hash and it's asked for, we still
 	 * need to return a buffer_head */
 	if (status >= 0) {
+		int flags = (OCFS_NONCACHED(osb, (*lockres)->sector_num) ? 
+			     0 : OCFS_BH_CACHED);
 		status = ocfs_read_bh(osb, (*lockres)->sector_num, b, 
-				      OCFS_BH_CACHED, NULL);
+				      flags, NULL);
 		if (status < 0) {
 			LOG_ERROR_STATUS(status);
 			goto finally;
@@ -623,62 +634,62 @@
 #define OCFS_DEBUG_CONTEXT	OCFS_DEBUG_CONTEXT_DLM
 
 /*
- * ocfs_get_inode_from_offset()
+ * ocfs_get_inode_no_bh()
  *
  */
-struct inode * ocfs_get_inode_from_bh(ocfs_super * osb, struct buffer_head *bh)
+static struct inode * ocfs_get_inode_no_bh(ocfs_super * osb, __u64 voteoff)
 {
+        int status;
         struct inode *inode = NULL;
-        ocfs_file_entry *fe = NULL;
-	ocfs_find_inode_args args;
+	struct buffer_head *fe_bh = NULL;
+	__u64 fe_off = 0;
 
-	LOG_ENTRY ();
-        
-	args.fe_bh = bh;
-	fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
-	args.offset = fe->this_sector;
-	OCFS_BH_PUT_DATA(bh);
-	fe = NULL;
+	LOG_ENTRY_ARGS("(voteoff = %u.%u)\n", HILO(voteoff));
 
-#ifdef LINUX_2_5
-	inode = ocfs_iget(osb->sb, &args);
-#else
-	inode = iget4 (osb->sb, (__u32) LO (args.offset),
-			(find_inode_t) ocfs_find_inode,
-			(void *) (&args));
-#endif
-	if (inode != NULL && is_bad_inode (inode)) {
-		iput (inode);
-		inode = NULL;
+	if (voteoff == osb->vol_layout.root_start_off) {
+		inode = osb->sb->s_root->d_inode;
+		if (inode)
+			atomic_inc(&inode->i_count);
+		goto bail;
 	}
-	if (inode)
-		SET_BH_SEQNUM(inode, bh);
 
-	LOG_EXIT_PTR (inode);
-        return inode;
-}				/* ocfs_get_inode_from_offset */
+	/* try to lookup the offset in the hash. If it's in there,
+	 * then we have an inode and we should continue. Get the fe
+	 * offset and read that in. */
 
+	/* if it's not in the inode hash, then it can't have an inode
+	 * in memory. */
+	if (ocfs_inode_hash_lookup(&osb->inode_hash, voteoff, &fe_off) == 0)
+		goto bail;
 
+	LOG_TRACE_ARGS("got fe_off = %u.%u\n", HILO(fe_off));
 
-/*
- * ocfs_get_inode_from_offset()
- *
- */
-static struct inode * ocfs_get_inode_from_offset(ocfs_super * osb, __u64 fileoff)
-{
-        int status;
-        struct inode *inode = NULL;
-	struct buffer_head *fe_bh = NULL;
+	/* only root dir has that fe_off in inode hash and we
+	 * should've caught that case above... */
+	if (fe_off == 0) {
+		LOG_ERROR_STATUS(-EFAIL);
+		goto bail;
+	}
 
-	LOG_ENTRY ();
-        
-	status = ocfs_read_bh(osb, fileoff, &fe_bh, OCFS_BH_CACHED, NULL);
-	inode = ocfs_get_inode_from_bh(osb, fe_bh);
-	brelse(fe_bh);
+	/* use the fe_off passed back as the offset might be for a
+	 * directory and we actually want to give the FE bh. */
+	status = ocfs_read_bh(osb, fe_off, &fe_bh, OCFS_BH_CACHED, NULL);
+	if (status < 0) {
+		LOG_ERROR_STATUS(status);
+		goto bail;
+	}
 
+	inode = ocfs_get_inode_from_offset(osb, voteoff, fe_bh);
+	if (inode)
+		SET_BH_SEQNUM(inode, fe_bh);
+
+bail:
+	if (fe_bh)
+		brelse(fe_bh);
+
 	LOG_EXIT_PTR (inode);
         return inode;
-}				/* ocfs_get_inode_from_offset */
+}				/* ocfs_get_inode_no_bh */
 
 
 
@@ -851,7 +862,7 @@
 	}
 
 	/* if we're lucky this will not need to do an IO */
-	inode = ocfs_get_inode_from_offset(osb, lock_id);
+	inode = ocfs_get_inode_no_bh(osb, lock_id);
 	status = ocfs_find_update_res (osb, lock_id, &lockres, NULL, NULL,
 					(OCFS_NM_HEARTBEAT_TIME/2), inode);
 	if (status < 0) {
@@ -873,13 +884,13 @@
 	
 	printk("ocfs_process_vote: %s request for lockid: %u.%u, action: %s, type: %s\n",
 	       flags & FLAG_RELEASE_LOCK ? "RELEASE" : 
-	       (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "INVALID!!!"), lock_id,
+	       (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
 	       process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote" );
 
 
 	if (disk_vote) {
 		/* Zero out the vote for everybody, if any already set and hung */
-		vote = (ocfs_vote *) OCFS_BH_GET_DATA(vote_bh);
+		vote = (ocfs_vote *) OCFS_BH_GET_DATA_WRITE(vote_bh); /* write */
 		for (i = 0; i < num_nodes; i++)
 			vote->vote[i] = 0;
 		OCFS_BH_PUT_DATA(vote_bh);
@@ -975,7 +986,7 @@
 				if (status < 0)
 					LOG_ERROR_STATUS (status);
 				if (status >= 0) {
-					fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+					fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
 					DISK_LOCK_CURRENT_MASTER (fe) = node_num;
 					OCFS_BH_PUT_DATA(fe_bh);
 					status = ocfs_write_bh(osb, fe_bh, 0, inode);
@@ -1107,7 +1118,7 @@
 				break;
 			}
 	
-			fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+			fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
 			is_dir = IS_VALID_DIR_NODE(fe);
 			is_locked = DISK_LOCK_FILE_LOCK (fe) > OCFS_DLM_NO_LOCK;
 			if (vote_type == CHANGE_MASTER) {
@@ -1153,20 +1164,28 @@
 			/* need to do the write only if fe lock values need to change */
 			if (is_locked || vote_type == CHANGE_MASTER) {
 				if (vote_type == RELEASE_CACHE) {
-					fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+					fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
 					DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
 					OCFS_BH_PUT_DATA(fe_bh);
-				}
-				status = ocfs_write_bh(osb, fe_bh, 0, inode);
-				if (status < 0) {
-					LOG_ERROR_STATUS (status);
-					brelse(fe_bh);
-					break;
-				}
-				if (vote_type == RELEASE_CACHE)
+					status = ocfs_write_bh(osb, fe_bh, 0, inode);
+					if (status < 0) {
+						LOG_ERROR_STATUS (status);
+						brelse(fe_bh);
+						break;
+					}
 					lockres->lock_type = lockres->lock_state = OCFS_DLM_NO_LOCK;
-				else
+				} else {
+					if (!is_dir) {
+						/* fe_bh was written in ocfs_release_dir_cache_lock */
+						status = ocfs_write_bh(osb, fe_bh, 0, inode);
+						if (status < 0) {
+							LOG_ERROR_STATUS (status);
+							brelse(fe_bh);
+							break;
+						}
+					}
 					lockres->master_node_num = node_num;
+				}
 			}
 			brelse(fe_bh);
 			vote_response = FLAG_VOTE_NODE;
@@ -1182,7 +1201,7 @@
 				LOG_ERROR_STATUS (status);
 				break;
 			}
-			fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+			fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
 
 			if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
 			    (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
@@ -1239,9 +1258,6 @@
 			break;
 	}
 
-	if (inode)
-		iput(inode);
-
 	if (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE) && 
 	    ((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
 	    (flags & FLAG_RELEASE_LOCK))) {
@@ -1294,9 +1310,9 @@
 			}
 		}
 	}
-	
+
 	if (disk_vote) {
-		vote = (ocfs_vote *) OCFS_BH_GET_DATA(vote_bh);
+		vote = (ocfs_vote *) OCFS_BH_GET_DATA_WRITE(vote_bh); /* write */
 		vote->dir_ent = lock_id;
 		vote->vote_seq_num = seq_num;
 		vote->open_handle = open_handle;
@@ -1315,8 +1331,10 @@
 		ocfs_put_lockres(lockres);
 	}
 
-	if (inc_inode_seq) {
-		ocfs_inc_inode_seq(osb, inode);
+	if (inode) {
+		if (inc_inode_seq)
+			ocfs_inc_inode_seq(osb, inode);
+		iput(inode);
 	}
 leave:
 	LOG_EXIT_STATUS (status);
@@ -1398,10 +1416,15 @@
 
 	LOG_ENTRY_ARGS ("(osb=0x%08x, dirnd=0x%08x)\n", osb, dirnode);
 
-	dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dir_bhs[0]);
+	/* need to mark ALL buffers in a dir for write before calling write_bhs */
+	for (i = 0; i<256; i++) {
+		OCFS_BH_GET_DATA_WRITE(dir_bhs[i]);
+		OCFS_BH_PUT_DATA(dir_bhs[i]);
+	}
+	dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(dir_bhs[0]); /* read */
 
 	for(i = 0; i < dirnode->num_ent_used; i++) {
-		fe = (ocfs_file_entry *) FILEENT_GETBH(dirnode, dir_bhs, i);
+		fe = (ocfs_file_entry *) FILEENT_GETBH_WRITE(dirnode, dir_bhs, i); /* write */
 
 		if (DISK_LOCK_FILE_LOCK(fe) == OCFS_DLM_ENABLE_CACHE_LOCK)
 			DISK_LOCK_FILE_LOCK(fe) = OCFS_DLM_NO_LOCK;
@@ -1426,21 +1449,27 @@
 				brelse(dir_bhs[i]);
 		memset(dir_bhs, 0, dirblks * sizeof(*dir_bhs));
 
-		status = ocfs_read_bhs(osb, dirnode->next_node_ptr, 
-				       dirblks, dir_bhs, OCFS_BH_CACHED, inode);
+		status = ocfs_read_bhs(osb, next_node_ptr, 
+				       osb->vol_layout.dir_node_size, 
+				       dir_bhs, OCFS_BH_CACHED, inode);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto bail;
 		}
 
-		dirnode = (ocfs_dir_node *)OCFS_BH_GET_DATA(dir_bhs[0]);
+		for (i = 0; i<256; i++) {
+			OCFS_BH_GET_DATA_WRITE(dir_bhs[i]);
+			OCFS_BH_PUT_DATA(dir_bhs[i]);
+		}
+
+		dirnode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(dir_bhs[0]); /* write */
 		if(!IS_VALID_DIR_NODE(dirnode))
 			break;
 
 		DISK_LOCK_FILE_LOCK(dirnode) = OCFS_DLM_NO_LOCK;
 
 		for(i = 0; i < dirnode->num_ent_used; i++) {
-			fe = FILEENT_GETBH(dirnode, dir_bhs, i);
+			fe = FILEENT_GETBH_WRITE(dirnode, dir_bhs, i); /* write */
 			if (DISK_LOCK_FILE_LOCK(fe) == OCFS_DLM_ENABLE_CACHE_LOCK)
 				DISK_LOCK_FILE_LOCK(fe) = OCFS_DLM_NO_LOCK;
 			FILEENT_PUTBH(dirnode, dir_bhs, i);
@@ -1459,7 +1488,7 @@
 		}
 	}
 
-      bail:
+bail:
 	if (dirnode)
 		OCFS_BH_PUT_DATA(dir_bhs[0]);
 

Modified: trunk/src/oin.c
===================================================================
--- trunk/src/oin.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/oin.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -32,7 +32,7 @@
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	/* Make sure that what we found is not a directory. */
 	if (!(oin->oin_flags & OCFS_OIN_DIRECTORY)) {
@@ -179,11 +179,11 @@
 			}
 		}
 
-		pLockRes = oin->lock_res;
-		ocfs_get_lockres (pLockRes);
+		/* ??? we need to the lock resource before updating it */
+		if (oin->lock_res) {
+			ocfs_get_lockres(oin->lock_res);
 
-		/* ??? we need to the lock resource before updating it */
-		if (pLockRes) {
+			pLockRes = oin->lock_res;
 			pLockRes->lock_type = DISK_LOCK_FILE_LOCK (fe);
 			pLockRes->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
 			pLockRes->oin_openmap = DISK_LOCK_OIN_MAP (fe);
@@ -191,8 +191,9 @@
 			pLockRes->last_read_time = DISK_LOCK_LAST_READ (fe);
 			pLockRes->reader_node_num = DISK_LOCK_READER_NODE (fe);
 			pLockRes->writer_node_num = DISK_LOCK_WRITER_NODE (fe);
+
+			ocfs_put_lockres(oin->lock_res);
 		}
-		ocfs_put_lockres (pLockRes);
 
 		status = 0;
 	} else {
@@ -244,7 +245,7 @@
 		LOG_ERROR_STATUS(status = -ENOMEM);
 		goto leave;
 	}
-	tmp = OCFS_BH_GET_DATA(fe_bh);
+	tmp = OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 	memcpy(fe, tmp, sizeof(ocfs_file_entry));
 	OCFS_BH_PUT_DATA(fe_bh);
 
@@ -321,7 +322,7 @@
 			}
 
 			while (1) {
-				extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+				extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
 
 				if (!IS_VALID_EXTENT_DATA (extent)) {
 					LOG_ERROR_STATUS(status = -EFAIL);
@@ -435,7 +436,7 @@
 	int status = 0;
 	ocfs_inode *oin = NULL;
 
-	LOG_ENTRY ();
+	LOG_ENTRY_ARGS("(alloc_size = %u.%u)\n", HILO(alloc_size));
 
 	OCFS_ASSERT (osb);
 
@@ -514,7 +515,7 @@
 			goto finally;
 		}
 		
-		volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA(hdr_bh);
+		volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA_READ(hdr_bh); /* read */
 		root_off = volDiskHdr->root_off;
 		int_off = volDiskHdr->internal_off;
 		OCFS_BH_PUT_DATA(hdr_bh);
@@ -553,8 +554,10 @@
 			LOG_ERROR_STATUS (status);
 		goto finally;
 	}
-		
 
+	/* put the offset/inode number in the inode cache thingy. */
+	ocfs_inode_hash_insert(osb, osb->vol_layout.root_start_off, 
+			       0, OCFS_ROOT_INODE_NUMBER);
 	// oin->Parentoin = NULL; /*  Root has no parent */
 
 	/*  Set the Rootdirectories root Dir Node */
@@ -616,7 +619,7 @@
 	if (inode) {
 		__u64 savedOffset = oin->file_disk_off;
 
-		SET_INODE_OIN (inode, NULL);
+		CLEAR_INODE_OIN(inode);
 		SET_INODE_OFFSET (inode, savedOffset);
 		LOG_TRACE_ARGS ("inode oin cleared / flags: %d / offset: %u.%u\n",
 			inode->i_flags, savedOffset);
@@ -629,13 +632,10 @@
 		ocfs_del_sem (&(oin->main_res));
 		OCFS_CLEAR_FLAG (oin->oin_flags, OCFS_INITIALIZED_MAIN_RESOURCE);
 	}
-	if (oin->oin_flags & OCFS_INITIALIZED_PAGING_IO_RESOURCE) {
-		ocfs_del_sem (&(oin->paging_io_res));
-		OCFS_CLEAR_FLAG (oin->oin_flags,
-			       OCFS_INITIALIZED_PAGING_IO_RESOURCE);
-	}
 
 	if (FreeMemory) {
+		/* clean out the oin */
+		memset(oin, 0, sizeof(ocfs_inode));
 #ifdef OCFS_MEM_DBG
 		ocfs_dbg_slab_free (OcfsGlobalCtxt.oin_cache, oin);
 #else
@@ -701,7 +701,10 @@
 
 		lockResource = (ocfs_lock_res *) oin->lock_res;
 		if (lockResource == NULL) {
-			LOG_ERROR_STR ("lockres=null");
+			LOG_ERROR_ARGS("lockres=null, oin->file_disk_off "
+				       "= %u.%u\n", 
+				       HILO(oin->file_disk_off));
+
 			goto bail;
 		}
 

Modified: trunk/src/osb.c
===================================================================
--- trunk/src/osb.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/osb.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -33,6 +33,7 @@
 	vol_layout->cluster_size = (__u32) (vdh->cluster_size);
 	osb->obj_id.type = OCFS_TYPE_OSB;
 	osb->obj_id.size = sizeof (ocfs_super);
+	INIT_LIST_HEAD (&(osb->osb_next));
 
 #define HASHBITS	12
 
@@ -41,6 +42,7 @@
 		goto bail;
 	}
 
+
 	ocfs_init_sem (&(osb->osb_res));
 	ocfs_init_sem (&(osb->map_lock));
 	ocfs_init_sem (&(osb->log_lock));
@@ -165,13 +167,11 @@
 	/* Read the Publish Sector of local Node */
 	offset = vol_layout->publ_sect_off + (osb->node_num * osb->sect_size);
 	status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
-/*	status = ocfs_read_force_disk_ex (osb, (void **)&publish, 
-	           osb->sect_size, osb->sect_size, offset);*/
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		goto finally;
 	}
-	publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+	publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(publish_bh); /* write */
 
 	/*  Zero out the time stamp to write a new value */
 	publish->time = 0;
@@ -179,6 +179,7 @@
 
 	OCFS_BH_PUT_DATA(publish_bh);
 	publish = NULL;
+
 	status = ocfs_write_bh (osb, publish_bh, 0, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
@@ -200,6 +201,13 @@
 	for(i = 0; i < OCFS_MAXIMUM_NODES; i++)
 		osb->last_publ_seq_num[i] = (__u64) (-1);
 
+	/* init the inode hash */
+	status = ocfs_inode_hash_init(osb);
+	if (status < 0) {
+		LOG_ERROR_STATUS (status);
+		goto finally;
+	}
+
 	/* We might need to add a variable in Global List of osb to */
 	/* delay any creation, if any other node is already creating a file */
 
@@ -288,7 +296,7 @@
 	/*  Check to see who else is alive. */
 	/*  Kick in the NM i/f to start writing time stamps to the disk */
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_verify_volume */
@@ -318,7 +326,7 @@
 		goto finally;
 	}
 	
-	publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+	publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(publish_bh); /* read */
 	/* we copy these two flags out of the publish sector and then unlock
 	 * the bh as other functions will need to modify it. */
 	dirty = publish->dirty;
@@ -440,7 +448,8 @@
 	/* Remove the osb from the global linked list of all osb structures. */
 	/* The Global Link List is mainted for the whole driver */
 	ocfs_down_sem (&(OcfsGlobalCtxt.res), true);
-	list_del (&(osb->osb_next));
+	if (!list_empty(&(osb->osb_next)))
+		list_del (&(osb->osb_next));
 	ocfs_up_sem (&(OcfsGlobalCtxt.res));
 
 	for (i=0; i<32; i++)
@@ -554,12 +563,12 @@
 	}
 
 	for (i = 0; i < OCFS_DEFAULT_DIR_NODE_SECTS; i++) {
-		char *sect = OCFS_BH_GET_DATA(dirnode_bhs[i]);
+		char *sect = OCFS_BH_GET_DATA_WRITE(dirnode_bhs[i]); /* write */
 		memset(sect, 0, osb->sect_size);
 		OCFS_BH_PUT_DATA(dirnode_bhs[i]);
 	}
 
-	NewDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dirnode_bhs[0]);
+	NewDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(dirnode_bhs[0]); /* write */
 	osb->vol_layout.root_start_off = bitmapOffset;
 	ocfs_initialize_dir_node (osb, NewDirNode, bitmapOffset, fileOffset, osb->node_num);
 	NewDirNode->dir_node_flags |= DIR_NODE_FLAG_ROOT;
@@ -580,7 +589,7 @@
 		goto bail;
 	}
 
-	volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA(hdr_bh);
+	volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA_WRITE(hdr_bh); /* write */
 	volDiskHdr->root_off = osb->vol_layout.root_start_off;
 	volDiskHdr->internal_off = osb->vol_layout.root_int_off;
 	OCFS_BH_PUT_DATA(hdr_bh);

Modified: trunk/src/sem.c
===================================================================
--- trunk/src/sem.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/sem.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -38,8 +38,8 @@
 
 	if (res->magic != OCFS_SEM_MAGIC) {
 		LOG_ERROR_ARGS("semaphore magic value is bad!\n");
-		ret = false;
-		goto bail;
+
+		BUG();
 	}
 
 	if (res->pid == 0) {
@@ -68,7 +68,6 @@
 		}
 	}
 
-bail:
 	LOG_EXIT_ULONG (ret);
 	return ret;
 }				/* ocfs_down_sem */
@@ -87,7 +86,8 @@
 
 	if (res->magic != OCFS_SEM_MAGIC) {
 		LOG_ERROR_ARGS("semaphore magic value is bad!\n");
-		goto bail;
+
+		BUG();
 	}
 
 	if (res->count && current->pid == res->pid) {
@@ -98,7 +98,6 @@
 		}
 	}
 
-bail:
 	LOG_EXIT ();
 	return;
 }				/* ocfs_up_sem */

Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/super.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -69,7 +69,7 @@
 module_param (ip_address, charp, 0);
 module_param (ip_port, ulong, 0);
 module_param (guid, charp, 0);
-module_param (cs, ulong, 0);
+module_param (cs, int, 0);
 module_param (comm_voting, ulong, 0);
 #else /* LINUX_2_5 */
 MODULE_PARM (node_name, "s");
@@ -88,7 +88,7 @@
 MODULE_PARM_DESC(ip_port, "Port number for the network dlm on this node");
 MODULE_PARM (guid, "s");
 MODULE_PARM_DESC(guid, "GUID for this machine");
-MODULE_PARM (cs, "l");
+MODULE_PARM (cs, "i");
 MODULE_PARM_DESC(cs, "Checksum");
 MODULE_PARM (comm_voting, "l");
 MODULE_PARM_DESC(comm_voting, "Enable/Disable network dlm");
@@ -426,6 +426,12 @@
 		LOG_ERROR_STATUS (status);
 		goto leave;
 	}
+	
+	status = ocfs_bh_sem_hash_init();
+	if (status < 0) {
+		LOG_ERROR_STATUS (status);
+		goto leave;
+	}
 
         /* Initialize the DLM */
 	status = ocfs_init_dlm ();
@@ -457,6 +463,9 @@
 
 leave:
 	if (status < 0) {
+		if (OcfsGlobalCtxt.bh_sem_hash && ocfs_bh_sem_hash_destroy() < 0) 
+			LOG_ERROR_STR("failed to destroy bh_sem hashtable");
+
 		/* Free up lookaside lists */
 		if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
 			ocfs_free_mem_lists ();
@@ -608,6 +617,9 @@
 	ocfs_down_sem (&(OcfsGlobalCtxt.res), true);
 	OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_SHUTDOWN_VOL_THREAD);
 
+	if (ocfs_bh_sem_hash_destroy() < 0) 
+		LOG_ERROR_STR("failed to destroy bh_sem hashtable");
+
 	if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
 		ocfs_free_mem_lists ();
 
@@ -670,7 +682,7 @@
 		LOG_ERROR_STR("failed to read bitmap data");
 		return -EIO;
 	}
-	bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA(bh);	
+	bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA_READ(bh); /* read */
 
         if (numbits >= bm_lock->used_bits)
             freebits = numbits - bm_lock->used_bits;
@@ -742,6 +754,10 @@
 	OcfsGlobalCtxt.extent_cache = kmem_cache_create ("extent_cache",
 		sizeof(ocfs_extent) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
 		NULL, NULL);
+	
+	OcfsGlobalCtxt.bh_sem_cache = kmem_cache_create ("bh_sem_cache",
+		sizeof(ocfs_bh_sem), 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
+		NULL, NULL);
 
 	OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
 
@@ -759,6 +775,7 @@
 	kmem_cache_destroy (OcfsGlobalCtxt.fe_cache);
 	kmem_cache_destroy (OcfsGlobalCtxt.lockres_cache);
 	kmem_cache_destroy (OcfsGlobalCtxt.extent_cache);
+	kmem_cache_destroy (OcfsGlobalCtxt.bh_sem_cache);
 	OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
 }				/* ocfs_free_mem_lists */
 
@@ -800,8 +817,8 @@
 	for (i=0; i<2; i++)
 		wait_on_buffer(bhs[i]);
 
-	vol_header = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA(bhs[0]);
-	vol_label = (ocfs_vol_label *) OCFS_BH_GET_DATA(bhs[1]);
+	vol_header = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
+	vol_label = (ocfs_vol_label *) OCFS_BH_GET_DATA_READ(bhs[1]); /* read */
 
 	LOG_TRACE_STR ("ocfs_verify_volume...");
 	status = ocfs_verify_volume (vol_header);
@@ -1085,6 +1102,9 @@
 //    list_del(&osb->osb_next);  /* this has been moved into ocfs_delete_osb */
 	ocfs_up_sem (&(OcfsGlobalCtxt.res));
 
+	/* destroy the inode hash */
+	ocfs_inode_hash_destroy(&osb->inode_hash);
+
 	osb->vol_state = VOLUME_DISMOUNTED;
 	if (AcquiredOSB) {
 		ocfs_up_sem (&(osb->osb_res));

Modified: trunk/src/sysfile.c
===================================================================
--- trunk/src/sysfile.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/sysfile.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -90,7 +90,7 @@
 		LOG_ERROR_STATUS(status);
 		goto leave;
 	}
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
 
 	memset (fe, 0, sizeof (ocfs_file_entry));
 	/*  Set the Flag to use the Local Extents */
@@ -118,11 +118,8 @@
 	}
 
 leave:
-	if (fe_bh) {
-		if (fe)
-			OCFS_BH_PUT_DATA(fe_bh);
+	if (fe_bh)
 		brelse(fe_bh);
-	}
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_init_system_file */
@@ -166,7 +163,7 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY (fe)) {
 		LOG_ERROR_STATUS(status = -EINVAL);
@@ -223,35 +220,7 @@
 	return status;
 }				/* ocfs_read_system_file */
 
-
 /*
- * ocfs_write_system_file()
- * This should disappear actually -- we oughta just use ocfs_write_bhs
- *
- * NOTE: 'Length' and 'Offset' are essentially ignored -- the
- * entire buffer_head array is written out to disk!
- */
-int ocfs_write_system_file (ocfs_super * osb, __u64 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset)
-{
-	int status = 0;
-	__u32 numblocks;
-
-	LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId);
-
-	if (Offset != 0)
-		LOG_ERROR_STR("Asked to write at non zero offset, but we" \
-			      " don't support that yet!");
-
-	numblocks = (Length + 511) >> 9;
-	status = ocfs_write_bhs(osb, bhs, numblocks, 0, NULL);
-	if (status < 0)
-		LOG_ERROR_STATUS (status);
-
-	LOG_EXIT_STATUS (status);
-	return status;
-}				/* ocfs_write_system_file */
-
-/*
  * ocfs_file_to_disk_off()
  *
  */
@@ -275,7 +244,7 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY (fe)) {
 		LOG_ERROR_STATUS(status = -EINVAL);
@@ -295,7 +264,7 @@
 	/*  Return the disk offset of first run . */
 	StartOffset = (IoRuns[0].disk_off);
 
-      leave:
+leave:
 	if (fe_bh) {
 		OCFS_BH_PUT_DATA(fe_bh);
 		brelse(fe_bh);
@@ -340,7 +309,7 @@
 		goto leave;
 	}
 
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY (fe)) {
 		LOG_ERROR_ARGS("offset=%u.%u", HILO (offset));
@@ -369,7 +338,7 @@
  * of course, if you've already read it off disk, then give us fe_bh to avoid
  * an extra read. We always do the write out of the new fe.
  */
-int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle)
+int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle, bool zero)
 {
 	int status = 0;
 	__u64 actualDiskOffset = 0, actualLength = 0;
@@ -377,6 +346,10 @@
 	bool local_fe = false;
 	ocfs_file_entry *fe = NULL;
 	int flags = OCFS_BH_COND_CACHED;
+	__u64 alloc_size;
+	int numbhs, i;
+	char *data;
+	struct buffer_head **bhs;
 
 	LOG_ENTRY_ARGS ("(FileId = %u, Size = %u.%u)\n", FileId, HI (FileSize),
 			LO (FileSize));
@@ -397,33 +370,33 @@
 			goto leave;
 		}
 	}
-	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
 
 	if (!IS_VALID_FILE_ENTRY (fe)) {
+		OCFS_BH_PUT_DATA(fe_bh);
 		LOG_ERROR_STATUS (status = -EINVAL);
 		goto leave;
 	}
+	alloc_size = fe->alloc_size;
+	OCFS_BH_PUT_DATA(fe_bh);
+	fe = NULL;
 
 	if (handle) {
-		OCFS_BH_PUT_DATA(fe_bh);
 		status = ocfs_journal_access(handle, fe_bh, 
 					     OCFS_JOURNAL_ACCESS_WRITE);
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;
 		}
 	}
 
-	if (FileSize <= fe->alloc_size) {
-		fe->file_size = FileSize;
-	} else {
+	if (FileSize > alloc_size) {
 		/*  We need to allocate from bitmap */
 		__u64 numClusterAlloc = 0, BitmapOffset = 0;
 
 		status =
 		    ocfs_find_contiguous_space_from_bitmap (osb,
-						   FileSize - fe->alloc_size,
+						   FileSize - alloc_size,
 						   &BitmapOffset,
 						   &numClusterAlloc, true, 
 						   NULL);
@@ -438,65 +411,53 @@
 		actualLength =
 		    (__u64) (numClusterAlloc * osb->vol_layout.cluster_size);
 
-#ifdef ZERO_METADATA_BLOCKS
-		/* zero the entire metadata block! */
-		{
-			int nbhs, bufsize, j;
-			__u64 iosize;
-			struct buffer_head **bhs = NULL;
-			char *mem;
+		status = ocfs_allocate_extent (osb, NULL, fe_bh, handle,  
+					       actualDiskOffset, actualLength, NULL);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			goto leave;
+		}
+		
+		if (zero) {
+			numbhs = actualLength >> 9;
 
-			iosize = OCFS_ALIGN(actualLength, 512);
-			nbhs = (int) (iosize >> 9);
-			bufsize = nbhs * sizeof(struct buffer_head *);
-			LOG_TRACE_ARGS("about to zero out %d new metadata blocks, newlen=%u.%u\n", 
-				       nbhs, iosize);
-
-			bhs = (struct buffer_head **)ocfs_malloc(bufsize);
-			if (bhs == NULL) {
-				LOG_ERROR_STATUS (status = -ENOMEM);
+			bhs = ocfs_malloc(numbhs*sizeof(struct buffer_head *));
+			if (!bhs) {
+				status = -ENOMEM;
+				LOG_ERROR_STATUS(status);
 				goto leave;
 			}
-			memset(bhs, 0, bufsize);
-			status = ocfs_read_bhs(osb, actualDiskOffset, iosize, bhs, 0, NULL);
+			memset(bhs, 0, numbhs * sizeof(struct buffer_head *));
+
+			status = ocfs_read_bhs(osb, actualDiskOffset, 
+					       actualLength, bhs, 0, NULL);
 			if (status < 0) {
-				ocfs_safefree(bhs);
-				LOG_ERROR_STATUS (status);
+				ocfs_free(bhs);
+				LOG_ERROR_STATUS(status);
 				goto leave;
 			}
-			for (j=0; j<nbhs; j++) {
-				mem = OCFS_BH_GET_DATA(bhs[j]);
-				memset(mem, 0, 512);
-				OCFS_BH_PUT_DATA(bhs[j]);
+
+			for(i = 0; i < numbhs; i++) {
+				data = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+				memset(data, 0, 512);
+				OCFS_BH_PUT_DATA(bhs[i]);
 			}
 
-			LOG_TRACE_STR("writing zeroed blocks now");
-
-			status = ocfs_write_bhs(osb, bhs, nbhs, 0, NULL);
-			for (j=0; j<nbhs; j++)
-				brelse(bhs[j]);
-			ocfs_safefree(bhs);
+			status = ocfs_write_bhs(osb, bhs, numbhs, 0, NULL);
+			for(i = 0; i < numbhs; i++)
+				brelse(bhs[i]);
+			ocfs_free(bhs);
 			if (status < 0) {
-				LOG_ERROR_STATUS (status);
+				LOG_ERROR_STATUS(status);
 				goto leave;
 			}
 		}
-#endif
-
-		OCFS_BH_PUT_DATA(fe_bh);
-		fe = NULL;
-
-		status = ocfs_allocate_extent (osb, NULL, fe_bh, handle,  
-					       actualDiskOffset, actualLength, NULL);
-		if (status < 0) {
-			LOG_ERROR_STATUS (status);
-			goto leave;
-		}
+	} else
+		actualLength = 0;
 		
-		fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
-		fe->alloc_size += actualLength;
-		fe->file_size = FileSize;
-	}
+	fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
+	fe->alloc_size += actualLength;
+	fe->file_size = FileSize;
 
 	if (!bWriteThru) {
 		DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num;
@@ -508,7 +469,7 @@
 	if (handle)
 		status = ocfs_journal_dirty(handle, fe_bh);
 	else
-		status = ocfs_write_bh(osb, fe_bh, flags, NULL);
+		status = ocfs_write_bh(osb, fe_bh, 0, NULL);
 
 	if (status < 0)
 		LOG_ERROR_STATUS (status);
@@ -655,7 +616,7 @@
 				goto leave;
 			}
 
-			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);;
+			extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
 			while (extent->type != OCFS_EXTENT_DATA) {
 				__u64 diskoffset;
 
@@ -686,7 +647,7 @@
 					LOG_ERROR_STATUS (status);
 					goto leave;
 				}
-				extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+				extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
 			}
 
 			searchVbo = newOffset;
@@ -818,7 +779,7 @@
 				HILO (allocSize), HILO (neededSize));
 		status = ocfs_extend_system_file (osb,
 				  (OCFS_FILE_VOL_META_DATA + osb->node_num),
-				  neededSize, NULL, NULL);
+				  neededSize, NULL, NULL, false);
 		if (status < 0) {
 			LOG_ERROR_STATUS (status);
 			goto leave;

Modified: trunk/src/util.c
===================================================================
--- trunk/src/util.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/util.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -264,24 +264,11 @@
 	if (oin != NULL)
 		*oin = NULL;
 
-	if (inode_data_is_oin (inode)) {
-		ocfs_inode *f = GET_INODE_OIN(inode);
+	if (oin && inode_data_is_oin (inode))
+		*oin = GET_INODE_OIN(inode);
+	*off = GET_INODE_OFFSET (inode);
 
-		if (f == NULL) {
-			LOG_ERROR_STR ("bad inode oin");
-			*off = -1;
-			return false;
-		} else {
-			if (oin != NULL)
-				*oin = f;
-			if (S_ISDIR (inode->i_mode))
-				*off = f->dir_disk_off;
-			else
-				*off = f->file_disk_off;
-		}
-	} else {
-		*off = GET_INODE_OFFSET (inode);
-	}
+	LOG_TRACE_ARGS("offset=%u.%u, i_ino=%u\n", HILO((*off)), inode->i_ino);
 	return (*off != -1);
 }				/* ocfs_linux_get_inode_offset */
 
@@ -303,7 +290,7 @@
 
 	status = ocfs_find_files_on_disk (osb, parentOff, fileName, &ent_bh, NULL, parent_inode);
 	if (status >= 0) {
-		ent = (ocfs_file_entry *) OCFS_BH_GET_DATA(ent_bh);
+		ent = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(ent_bh); /* read */
 		*off = ent->this_sector;
 		OCFS_BH_PUT_DATA(ent_bh);
 	} else
@@ -360,3 +347,20 @@
 	truncate_inode_pages(&inode->i_data, off);
 #endif
 }				/* ocfs_truncate_inode_pages */
+
+void ocfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+{
+//	LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
+//		       uptodate);
+
+	if (!uptodate)
+		LOG_ERROR_STATUS(-EIO);
+
+	mark_buffer_uptodate(bh, uptodate);
+	unlock_buffer(bh);
+	VERBOSE_UNLOCK_BUFFER_STR(bh);
+
+//	LOG_EXIT();
+	return;
+}
+

Modified: trunk/src/volcfg.c
===================================================================
--- trunk/src/volcfg.c	2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/volcfg.c	2004-01-24 01:22:15 UTC (rev 15)
@@ -61,13 +61,14 @@
 
 	/* Obtain the volume for which we need to reiterate the lock */
 	osb = cfg_task->osb;
-	//buffer = cfg_task->buffer;
 	bh = cfg_task->bh;
 	length = osb->sect_size;
 	offset = cfg_task->lock_off;
 
 	/* Write the sector back */
-	status = ocfs_write_bh(osb, bh, 0, NULL);
+	/* NOTE: another thread owns this bh!           */
+	/* we *must* pass OCFS_BH_CONCURRENT_WRITE here */
+	status = ocfs_write_bh(osb, bh, OCFS_BH_CONCURRENT_WRITE, NULL);
 	if (status < 0) {
 		LOG_ERROR_STATUS (status);
 		/* deliberate no exit jump here */
@@ -150,7 +151,7 @@
 	/* Check if preferred node num is available */
 	node_num = OCFS_INVALID_NODE_NUM;
 	if (pref_node_num >= 0 && pref_node_num < OCFS_MAXIMUM_NODES) {
-		p = OCFS_BH_GET_DATA(cfg_bhs[pref_node_num]);
+		p = OCFS_BH_GET_DATA_READ(cfg_bhs[pref_node_num]); /* read */
 		disk_node = (ocfs_disk_node_config_info *)p;
 		if (disk_node->node_name[0] == '\0')
 			node_num = pref_node_num;
@@ -160,7 +161,7 @@
 	/* if not, find the first available empty slot */
 	if (node_num == OCFS_INVALID_NODE_NUM) {
 		for (node_num = 0; node_num < OCFS_MAXIMUM_NODES; node_num++) {
-			p = OCFS_BH_GET_DATA(cfg_bhs[node_num]);
+			p = OCFS_BH_GET_DATA_READ(cfg_bhs[node_num]); /* read */
 			disk_node = (ocfs_disk_node_config_info *) p;
 			if (disk_node->node_name[0] == '\0')
 				done = true;
@@ -179,7 +180,7 @@
 	}
 
 	/* Copy the new nodecfg into the memory buffer */
-	p = OCFS_BH_GET_DATA(cfg_bhs[node_num]);
+	p = OCFS_BH_GET_DATA_WRITE(cfg_bhs[node_num]); /* write */
 	memcpy (p, new_disk_node, sect_size);
 	OCFS_BH_PUT_DATA(cfg_bhs[node_num]);
 
@@ -197,7 +198,7 @@
 		goto finally;
 	}
 
-      finally:
+finally:
 	for (i = 0; i < OCFS_MAXIMUM_NODES; i++)
 		if (cfg_bhs[i])
 			brelse(cfg_bhs[i]);
@@ -236,8 +237,8 @@
 		goto bail;
 	}
 
-	hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(node_cfg_bhs[0]);
-	hdr_copy = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(node_cfg_bhs[1]);
+	hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_WRITE(node_cfg_bhs[0]); /* write */
+	hdr_copy = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_WRITE(node_cfg_bhs[1]); /* write */
 
 	if (op == OCFS_VOLCFG_ADD)
 		hdr->num_nodes++;
@@ -337,22 +338,15 @@
 	}
 	cfg_task->bh = bh;
 
-	//while (1) {
 	for (i=0; i<50; i++) {
 		/* Read the volcfg lock sector */
-		lock_buffer(bh);
-		if (!buffer_dirty(bh)) {
-#ifdef LINUX_2_5
-			clear_buffer_uptodate(bh);
-#else		
-			mark_buffer_uptodate(bh, false);
-#endif
+		status = ocfs_read_bh(osb, lock_off, &bh, 0, NULL);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			goto finito;
 		}
-		unlock_buffer(bh);
-		ll_rw_block(READ, 1, &bh);
-		wait_on_buffer(bh);
 
-		lock_buf = OCFS_BH_GET_DATA(bh);
+		lock_buf = OCFS_BH_GET_DATA_WRITE(bh); /* write */
 		bh_locked = true;
 		
 
@@ -390,19 +384,13 @@
 		bh_locked = false;
 
 		/* Read the volcfg lock sector again... */
-		lock_buffer(bh);
-		if (!buffer_dirty(bh)) {
-#ifdef LINUX_2_5
-			clear_buffer_uptodate(bh);
-#else		
-			mark_buffer_uptodate(bh, false);
-#endif
+		status = ocfs_read_bh(osb, lock_off, &bh, 0, NULL);
+		if (status < 0) {
+			LOG_ERROR_STATUS (status);
+			goto finito;
 		}
-		unlock_buffer(bh);
-		ll_rw_block(READ, 1, &bh);
-		wait_on_buffer(bh);
 
-		lock_buf = OCFS_BH_GET_DATA(bh);
+		lock_buf = OCFS_BH_GET_DATA_WRITE(bh); /* write */
 
 		/* If we tried to acquire and we still own it we take it... */
 		if ((tried_acq) && (memcmp (lock_buf, cfg_buf, sect_size) == 0)) {
@@ -454,7 +442,7 @@
 	if (i >= 50)
 		status = -EFAIL;
 
-      finito:
+finito:
 	ocfs_release_disk_lock (osb, lock_off);
 
 	ocfs_safefree (cfg_task);
@@ -499,18 +487,19 @@
 		goto finally;
 	}
 
-	buffer = OCFS_BH_GET_DATA(bh);
+	buffer = OCFS_BH_GET_DATA_WRITE(bh); /* write */
 
 	/* reset lock... */
 	memset (buffer, 0, sect_size);
-#ifdef LINUX_2_5
-		set_buffer_uptodate(bh);
-#else
-		mark_buffer_uptodate(bh, true);
-#endif
-	mark_buffer_dirty(bh);
 	OCFS_BH_PUT_DATA(bh);
 
+	/* Release the lock */
+	status = ocfs_write_bh(osb, bh, 0, NULL);
+	if (status < 0) {
+		LOG_ERROR_STATUS (status);
+		goto finally;
+	}
+
 	/* Cancel the timer so that we don't reiterate the lock anymore */
 	LOG_TRACE_STR ("Waiting for osb->lock_event");
 	atomic_set (&osb->lock_stop, 1);
@@ -518,6 +507,11 @@
 	atomic_set (&osb->lock_event_woken, 0);
 	del_timer_sync(&osb->lock_timer);
 
+	buffer = OCFS_BH_GET_DATA_WRITE(bh); /* write */
+	/* reset lock... */
+	memset (buffer, 0, sect_size);
+	OCFS_BH_PUT_DATA(bh);
+
 	/* Release the lock */
 	status = ocfs_write_bh(osb, bh, 0, NULL);
 	if (status < 0) {
@@ -525,7 +519,7 @@
 		goto finally;
 	}
 
-      finally:
+finally:
 	if (bh)
 		brelse(bh);
 	LOG_EXIT_STATUS (status);
@@ -574,7 +568,7 @@
 		goto bail;
 	}
 
-      bail:
+bail:
 	ocfs_safefree (buffer);
 
 	LOG_EXIT_STATUS (status);
@@ -613,7 +607,7 @@
 
 	(*node)->journal_version = disk->journal_version;
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_disknode_to_node */
@@ -646,14 +640,8 @@
 		goto finally;
 	}
 
-	buf = OCFS_BH_GET_DATA(bh);
+	buf = OCFS_BH_GET_DATA_WRITE(bh); /* write */
 	memcpy(buf, disk, osb->sect_size);
-#ifdef LINUX_2_5
-	set_buffer_uptodate(bh);
-#else	
-	mark_buffer_uptodate(bh, true);
-#endif	
-	mark_buffer_dirty(bh);
 	OCFS_BH_PUT_DATA(bh);
 
 	status = ocfs_write_bh(osb, bh, 0, NULL);
@@ -668,7 +656,7 @@
 		goto finally;
 	}
 
-      finally:
+finally:
 	if (bh)
 		brelse(bh);
 
@@ -774,7 +762,7 @@
 	}
 
 	/* 1st block in buffer is the NodeCfgHdr */
-	hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(cfg_bhs[0]);
+	hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_READ(cfg_bhs[0]); /* read */
 
 	if (strncmp (hdr->signature, NODE_CONFIG_HDR_SIGN,
 		     NODE_CONFIG_SIGN_LEN)) {
@@ -805,7 +793,7 @@
 		int which;
 		which = i + OCFS_VOLCFG_HDR_SECTORS;
 		disk = (ocfs_disk_node_config_info *) 
-			OCFS_BH_GET_DATA(cfg_bhs[which]);
+			OCFS_BH_GET_DATA_READ(cfg_bhs[which]); /* read */
 
 		if (disk->node_name[0] == '\0')
 			goto loop;
@@ -860,7 +848,7 @@
 			status = -EFAIL;
 			goto finally;
 		}
-	loop:
+loop:
 		OCFS_BH_PUT_DATA(cfg_bhs[which]);
 		continue;
 	}
@@ -873,7 +861,7 @@
 
 finally:
 	if (cfg_bhs) {
-		if (cfg_bhs[0] && buffer_locked(cfg_bhs[0]))
+		if (cfg_bhs[0])
 			OCFS_BH_PUT_DATA(cfg_bhs[0]);
 		for (i = 0; i < numblocks; i++)
 			if (cfg_bhs[i])
@@ -925,7 +913,7 @@
 
 	LOG_TRACE_ARGS ("Node Num: %d\n", osb->node_num);
 
-      bail:
+bail:
 	LOG_EXIT_STATUS (status);
 	return status;
 }				/* ocfs_get_config */
@@ -1009,7 +997,7 @@
 		goto bail;
 	}
 
-      bail:
+bail:
 	ocfs_safefree(buffer);
 	LOG_EXIT_STATUS (status);
 	return status;