[Ocfs2-commits] rev 15 - in trunk: . src src/inc
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Sat Jan 24 01:22:17 CST 2004
Author: manish
Date: 2004-01-23 19:22:15 -0600 (Fri, 23 Jan 2004)
New Revision: 15
Modified:
trunk/TODO
trunk/config.guess
trunk/config.sub
trunk/configure.in
trunk/install-sh
trunk/mkinstalldirs
trunk/src/Makefile
trunk/src/alloc.c
trunk/src/bitmap.c
trunk/src/dcache.c
trunk/src/dir.c
trunk/src/dlm.c
trunk/src/extmap.c
trunk/src/file.c
trunk/src/hash.c
trunk/src/heartbeat.c
trunk/src/inc/journal.h
trunk/src/inc/ocfs.h
trunk/src/inc/proto.h
trunk/src/inode.c
trunk/src/ioctl.c
trunk/src/journal.c
trunk/src/namei.c
trunk/src/nm.c
trunk/src/oin.c
trunk/src/osb.c
trunk/src/sem.c
trunk/src/super.c
trunk/src/sysfile.c
trunk/src/util.c
trunk/src/volcfg.c
Log:
Sync
Modified: trunk/TODO
===================================================================
--- trunk/TODO 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/TODO 2004-01-24 01:22:15 UTC (rev 15)
@@ -5,15 +5,11 @@
for the main bitmap. Data writes to the bitmap files can be
writethrough or journalled (with delayed playback).
-* Make bitmap reads/writes only read/write those blocks which we care about
+* Make bitmap reads only read those blocks which we care about
* Make bitmap free functions do their job without relocking the bitmaps for
each record.
-* Investigate whether we should put dirty cached writes into the
- inodes dirty_data_buffers list or not. How does this interact with the
- journalling code?
-
* get rid of osb->curr_trans_id as it was never used (always zero)
* get rid of all the:
Modified: trunk/config.guess
===================================================================
--- trunk/config.guess 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/config.guess 2004-01-24 01:22:15 UTC (rev 15)
@@ -3,7 +3,7 @@
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-timestamp='2003-10-07'
+timestamp='2004-01-05'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -221,6 +221,9 @@
mvmeppc:OpenBSD:*:*)
echo powerpc-unknown-openbsd${UNAME_RELEASE}
exit 0 ;;
+ pegasos:OpenBSD:*:*)
+ echo powerpc-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
pmax:OpenBSD:*:*)
echo mipsel-unknown-openbsd${UNAME_RELEASE}
exit 0 ;;
@@ -307,6 +310,9 @@
*:OS/390:*:*)
echo i370-ibm-openedition
exit 0 ;;
+ *:OS400:*:*)
+ echo powerpc-ibm-os400
+ exit 0 ;;
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
echo arm-acorn-riscix${UNAME_RELEASE}
exit 0;;
@@ -742,6 +748,11 @@
FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit 0 ;;
+ 5000:UNIX_System_V:4.*:*)
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit 0 ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
exit 0 ;;
@@ -986,6 +997,9 @@
i*86:atheos:*:*)
echo ${UNAME_MACHINE}-unknown-atheos
exit 0 ;;
+ i*86:syllable:*:*)
+ echo ${UNAME_MACHINE}-pc-syllable
+ exit 0 ;;
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
echo i386-unknown-lynxos${UNAME_RELEASE}
exit 0 ;;
@@ -1172,7 +1186,7 @@
*:QNX:*:4*)
echo i386-pc-qnx
exit 0 ;;
- NSR-[DGKLNPTVWY]:NONSTOP_KERNEL:*:*)
+ NSR-?:NONSTOP_KERNEL:*:*)
echo nsr-tandem-nsk${UNAME_RELEASE}
exit 0 ;;
*:NonStop-UX:*:*)
@@ -1216,6 +1230,9 @@
SEI:*:*:SEIUX)
echo mips-sei-seiux${UNAME_RELEASE}
exit 0 ;;
+ *:DRAGONFLY:*:*)
+ echo ${UNAME_MACHINE}-unknown-dragonfly${UNAME_RELEASE}
+ exit 0 ;;
esac
#echo '(No uname command or uname output not recognized.)' 1>&2
Modified: trunk/config.sub
===================================================================
--- trunk/config.sub 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/config.sub 2004-01-24 01:22:15 UTC (rev 15)
@@ -3,7 +3,7 @@
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-timestamp='2003-10-07'
+timestamp='2004-01-05'
# This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software
@@ -118,7 +118,8 @@
# Here we must recognize all the valid KERNEL-OS combinations.
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in
- nto-qnx* | linux-gnu* | linux-dietlibc | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+ nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
+ kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
;;
@@ -379,6 +380,9 @@
amd64)
basic_machine=x86_64-pc
;;
+ amd64-*)
+ basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
amdahl)
basic_machine=580-amdahl
os=-sysv
@@ -743,6 +747,10 @@
basic_machine=or32-unknown
os=-coff
;;
+ os400)
+ basic_machine=powerpc-ibm
+ os=-os400
+ ;;
OSE68000 | ose68000)
basic_machine=m68000-ericsson
os=-ose
@@ -963,6 +971,10 @@
tower | tower-32)
basic_machine=m68k-ncr
;;
+ tpf)
+ basic_machine=s390x-ibm
+ os=-tpf
+ ;;
udi29k)
basic_machine=a29k-amd
os=-udi
@@ -1137,13 +1149,13 @@
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* \
| -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+ | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
- | -powermax* | -dnix* | -nx6 | -nx7 | -sei*)
+ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
@@ -1182,6 +1194,9 @@
-opened*)
os=-openedition
;;
+ -os400*)
+ os=-os400
+ ;;
-wince*)
os=-wince
;;
@@ -1203,6 +1218,9 @@
-atheos*)
os=-atheos
;;
+ -syllable*)
+ os=-syllable
+ ;;
-386bsd)
os=-bsd
;;
@@ -1225,6 +1243,9 @@
-sinix*)
os=-sysv4
;;
+ -tpf*)
+ os=-tpf
+ ;;
-triton*)
os=-sysv3
;;
@@ -1473,9 +1494,15 @@
-mvs* | -opened*)
vendor=ibm
;;
+ -os400*)
+ vendor=ibm
+ ;;
-ptx*)
vendor=sequent
;;
+ -tpf*)
+ vendor=ibm
+ ;;
-vxsim* | -vxworks* | -windiss*)
vendor=wrs
;;
Modified: trunk/configure.in
===================================================================
--- trunk/configure.in 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/configure.in 2004-01-24 01:22:15 UTC (rev 15)
@@ -38,12 +38,19 @@
;;
esac
+KERNEL_CFLAGS=
+
case "$host_cpu" in
+ powerpc64)
+ OCFS_PROCESSOR="ppc64"
+ KERNEL_CFLAGS="-m64"
+ ;;
ia64)
OCFS_PROCESSOR="ia64"
;;
x86_64)
OCFS_PROCESSOR="x86_64"
+ KERNEL_CFLAGS="-m64"
;;
i386|i486|i586|i686|i786|k6|k7)
OCFS_PROCESSOR="i686"
@@ -84,7 +91,7 @@
fi
AC_SUBST(OCFS_AIO)
-AC_ARG_ENABLE(memdebug, [ --enable-mem-debug=[yes/no] Turn on memory debugging [default=no]],,enable_memdebug=no)
+AC_ARG_ENABLE(memdebug, [ --enable-memdebug=[yes/no] Turn on memory debugging [default=no]],,enable_memdebug=no)
OCFS_MEMDEBUG=
if test "x$enable_memdebug" = "xyes"; then
OCFS_MEMDEBUG=yes
@@ -125,7 +132,10 @@
fi
saved_CPPFLAGS="$CPPFLAGS"
+saved_CFLAGS="$CFLAGS"
+
CPPFLAGS="-I$KERNELINC $CPPFLAGS"
+CFLAGS="$KERNEL_CFLAGS $CFLAGS"
AC_MSG_CHECKING(for kernel version)
rm -f conf.kvertest
@@ -259,6 +269,7 @@
AC_SUBST(MODVERSIONS)
CPPFLAGS="$saved_CPPFLAGS"
+CFLAGS="$saved_CFLAGS"
AC_MSG_CHECKING(for directory for kernel modules)
AC_ARG_WITH(moddir, [ --with-moddir=/path Path to where modules should be installed [/lib/modules/<KVER>/fs]], moddir="$withval", moddir="/lib/modules/$kversion/kernel/fs")
Modified: trunk/install-sh
===================================================================
--- trunk/install-sh 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/install-sh 2004-01-24 01:22:15 UTC (rev 15)
@@ -1,7 +1,8 @@
#!/bin/sh
-#
# install - install a program, script, or datafile
-#
+
+scriptversion=2004-01-12.10
+
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
@@ -41,13 +42,11 @@
# from scratch. It can only install one file at a time, a restriction
# shared with many OS's install programs.
-
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"
-
# put in absolute paths if you don't have them in your path; or use env. vars.
mvprog="${MVPROG-mv}"
@@ -59,236 +58,253 @@
rmprog="${RMPROG-rm}"
mkdirprog="${MKDIRPROG-mkdir}"
-transformbasename=""
-transform_arg=""
+transformbasename=
+transform_arg=
instcmd="$mvprog"
chmodcmd="$chmodprog 0755"
-chowncmd=""
-chgrpcmd=""
-stripcmd=""
+chowncmd=
+chgrpcmd=
+stripcmd=
rmcmd="$rmprog -f"
mvcmd="$mvprog"
-src=""
-dst=""
-dir_arg=""
+src=
+dst=
+dir_arg=
-while [ x"$1" != x ]; do
- case $1 in
- -c) instcmd=$cpprog
- shift
- continue;;
+usage="Usage: $0 [OPTION]... SRCFILE DSTFILE
+ or: $0 [OPTION]... SRCFILES... DIRECTORY
+ or: $0 -d DIRECTORIES...
- -d) dir_arg=true
- shift
- continue;;
+In the first form, install SRCFILE to DSTFILE, removing SRCFILE by default.
+In the second, create the directory path DIR.
- -m) chmodcmd="$chmodprog $2"
- shift
- shift
- continue;;
+Options:
+-b=TRANSFORMBASENAME
+-c copy source (using $cpprog) instead of moving (using $mvprog).
+-d create directories instead of installing files.
+-g GROUP $chgrp installed files to GROUP.
+-m MODE $chmod installed files to MODE.
+-o USER $chown installed files to USER.
+-s strip installed files (using $stripprog).
+-t=TRANSFORM
+--help display this help and exit.
+--version display version info and exit.
- -o) chowncmd="$chownprog $2"
- shift
- shift
- continue;;
+Environment variables override the default commands:
+ CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
- -g) chgrpcmd="$chgrpprog $2"
- shift
- shift
- continue;;
+while test -n "$1"; do
+ case $1 in
+ -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+ shift
+ continue;;
- -s) stripcmd=$stripprog
- shift
- continue;;
+ -c) instcmd=$cpprog
+ shift
+ continue;;
- -t=*) transformarg=`echo $1 | sed 's/-t=//'`
- shift
- continue;;
+ -d) dir_arg=true
+ shift
+ continue;;
- -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
- shift
- continue;;
+ -g) chgrpcmd="$chgrpprog $2"
+ shift
+ shift
+ continue;;
- *) if [ x"$src" = x ]
- then
- src=$1
- else
- # this colon is to work around a 386BSD /bin/sh bug
- :
- dst=$1
- fi
- shift
- continue;;
- esac
-done
+ --help) echo "$usage"; exit 0;;
-if [ x"$src" = x ]
-then
- echo "$0: no input file specified" >&2
- exit 1
-else
- :
-fi
+ -m) chmodcmd="$chmodprog $2"
+ shift
+ shift
+ continue;;
-if [ x"$dir_arg" != x ]; then
- dst=$src
- src=""
+ -o) chowncmd="$chownprog $2"
+ shift
+ shift
+ continue;;
- if [ -d "$dst" ]; then
- instcmd=:
- chmodcmd=""
- else
- instcmd=$mkdirprog
- fi
-else
+ -s) stripcmd=$stripprog
+ shift
+ continue;;
-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad
-# if $src (and thus $dsttmp) contains '*'.
+ -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+ shift
+ continue;;
- if [ -f "$src" ] || [ -d "$src" ]
- then
- :
- else
- echo "$0: $src does not exist" >&2
- exit 1
- fi
+ --version) echo "$0 $scriptversion"; exit 0;;
- if [ x"$dst" = x ]
- then
- echo "$0: no destination specified" >&2
- exit 1
- else
- :
- fi
+ *) # When -d is used, all remaining arguments are directories to create.
+ test -n "$dir_arg" && break
+ # Otherwise, the last argument is the destination. Remove it from $@.
+ for arg
+ do
+ if test -n "$dstarg"; then
+ # $@ is not empty: it contains at least $arg.
+ set fnord "$@" "$dstarg"
+ shift # fnord
+ fi
+ shift # arg
+ dstarg=$arg
+ done
+ break;;
+ esac
+done
-# If destination is a directory, append the input filename; if your system
-# does not like double slashes in filenames, you may need to add some logic
-
- if [ -d "$dst" ]
- then
- dst=$dst/`basename "$src"`
- else
- :
- fi
+if test -z "$1"; then
+ if test -z "$dir_arg"; then
+ echo "$0: no input file specified." >&2
+ exit 1
+ fi
+ # It's OK to call `install-sh -d' without argument.
+ # This can happen when creating conditional directories.
+ exit 0
fi
-## this sed command emulates the dirname command
-dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+for src
+do
+ # Protect names starting with `-'.
+ case $src in
+ -*) src=./$src ;;
+ esac
-# Make sure that the destination directory exists.
-# this part is taken from Noah Friedman's mkinstalldirs script
+ if test -n "$dir_arg"; then
+ dst=$src
+ src=
-# Skip lots of stat calls in the usual case.
-if [ ! -d "$dstdir" ]; then
-defaultIFS='
- '
-IFS="${IFS-$defaultIFS}"
+ if test -d "$dst"; then
+ instcmd=:
+ chmodcmd=
+ else
+ instcmd=$mkdirprog
+ fi
+ else
+ # Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+ # might cause directories to be created, which would be especially bad
+ # if $src (and thus $dsttmp) contains '*'.
+ if test ! -f "$src" && test ! -d "$src"; then
+ echo "$0: $src does not exist." >&2
+ exit 1
+ fi
-oIFS=$IFS
-# Some sh's can't handle IFS=/ for some reason.
-IFS='%'
-set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
-IFS=$oIFS
+ if test -z "$dstarg"; then
+ echo "$0: no destination specified." >&2
+ exit 1
+ fi
-pathcomp=''
+ dst=$dstarg
+ # Protect names starting with `-'.
+ case $dst in
+ -*) dst=./$dst ;;
+ esac
-while [ $# -ne 0 ] ; do
- pathcomp=$pathcomp$1
- shift
+ # If destination is a directory, append the input filename; won't work
+ # if double slashes aren't ignored.
+ if test -d "$dst"; then
+ dst=$dst/`basename "$src"`
+ fi
+ fi
- if [ ! -d "$pathcomp" ] ;
- then
- $mkdirprog "$pathcomp"
- else
- :
- fi
+ # This sed command emulates the dirname command.
+ dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
- pathcomp=$pathcomp/
-done
-fi
+ # Make sure that the destination directory exists.
-if [ x"$dir_arg" != x ]
-then
- $doit $instcmd "$dst" &&
+ # Skip lots of stat calls in the usual case.
+ if test ! -d "$dstdir"; then
+ defaultIFS='
+ '
+ IFS="${IFS-$defaultIFS}"
- if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dst"; else : ; fi &&
- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dst"; else : ; fi &&
- if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dst"; else : ; fi &&
- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dst"; else : ; fi
-else
+ oIFS=$IFS
+ # Some sh's can't handle IFS=/ for some reason.
+ IFS='%'
+ set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+ IFS=$oIFS
-# If we're going to rename the final executable, determine the name now.
+ pathcomp=
- if [ x"$transformarg" = x ]
- then
- dstfile=`basename "$dst"`
- else
- dstfile=`basename "$dst" $transformbasename |
- sed $transformarg`$transformbasename
- fi
+ while test $# -ne 0 ; do
+ pathcomp=$pathcomp$1
+ shift
+ test -d "$pathcomp" || $mkdirprog "$pathcomp"
+ pathcomp=$pathcomp/
+ done
+ fi
-# don't allow the sed command to completely eliminate the filename
+ if test -n "$dir_arg"; then
+ $doit $instcmd "$dst" \
+ && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+ && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+ && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+ && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
- if [ x"$dstfile" = x ]
- then
- dstfile=`basename "$dst"`
- else
- :
- fi
+ else
+ # If we're going to rename the final executable, determine the name now.
+ if test -z "$transformarg"; then
+ dstfile=`basename "$dst"`
+ else
+ dstfile=`basename "$dst" $transformbasename \
+ | sed $transformarg`$transformbasename
+ fi
-# Make a couple of temp file names in the proper directory.
+ # don't allow the sed command to completely eliminate the filename.
+ test -z "$dstfile" && dstfile=`basename "$dst"`
- dsttmp=$dstdir/_inst.$$_
- rmtmp=$dstdir/_rm.$$_
+ # Make a couple of temp file names in the proper directory.
+ dsttmp=$dstdir/_inst.$$_
+ rmtmp=$dstdir/_rm.$$_
-# Trap to clean up temp files at exit.
+ # Trap to clean up those temp files at exit.
+ trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
+ trap '(exit $?); exit' 1 2 13 15
- trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
- trap '(exit $?); exit' 1 2 13 15
+ # Move or copy the file name to the temp name
+ $doit $instcmd "$src" "$dsttmp" &&
-# Move or copy the file name to the temp name
+ # and set any options; do chmod last to preserve setuid bits.
+ #
+ # If any of these fail, we abort the whole thing. If we want to
+ # ignore errors from any of these, just make sure not to ignore
+ # errors from the above "$doit $instcmd $src $dsttmp" command.
+ #
+ { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+ && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+ && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+ && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
- $doit $instcmd "$src" "$dsttmp" &&
+ # Now remove or move aside any old file at destination location. We
+ # try this two ways since rm can't unlink itself on some systems and
+ # the destination file might be busy for other reasons. In this case,
+ # the final cleanup might fail but the new file should still install
+ # successfully.
+ {
+ if test -f "$dstdir/$dstfile"; then
+ $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+ || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+ || {
+ echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+ (exit 1); exit
+ }
+ else
+ :
+ fi
+ } &&
-# and set any options; do chmod last to preserve setuid bits
+ # Now rename the file to the real destination.
+ $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+ fi || { (exit 1); exit; }
+done
-# If any of these fail, we abort the whole thing. If we want to
-# ignore errors from any of these, just make sure not to ignore
-# errors from the above "$doit $instcmd $src $dsttmp" command.
-
- if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dsttmp"; else :;fi &&
- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dsttmp"; else :;fi &&
- if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dsttmp"; else :;fi &&
- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dsttmp"; else :;fi &&
-
-# Now remove or move aside any old file at destination location. We try this
-# two ways since rm can't unlink itself on some systems and the destination
-# file might be busy for other reasons. In this case, the final cleanup
-# might fail but the new file should still install successfully.
-
-{
- if [ -f "$dstdir/$dstfile" ]
- then
- $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null ||
- $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null ||
- {
- echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
- (exit 1); exit
- }
- else
- :
- fi
-} &&
-
-# Now rename the file to the real destination.
-
- $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
-
-fi &&
-
# The final little trick to "correctly" pass the exit status to the exit trap.
-
{
- (exit 0); exit
+ (exit 0); exit
}
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
Modified: trunk/mkinstalldirs
===================================================================
--- trunk/mkinstalldirs 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/mkinstalldirs 2004-01-24 01:22:15 UTC (rev 15)
@@ -1,20 +1,32 @@
#! /bin/sh
# mkinstalldirs --- make directory hierarchy
-# Author: Noah Friedman <friedman at prep.ai.mit.edu>
+
+scriptversion=2003-11-08.23
+
+# Original author: Noah Friedman <friedman at prep.ai.mit.edu>
# Created: 1993-05-16
-# Public domain
+# Public domain.
+#
+# This file is maintained in Automake, please report
+# bugs to <bug-automake at gnu.org> or send patches to
+# <automake-patches at gnu.org>.
errstatus=0
dirmode=""
usage="\
-Usage: mkinstalldirs [-h] [--help] [-m mode] dir ..."
+Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
+Create each directory DIR (with mode MODE, if specified), including all
+leading file name components.
+
+Report bugs to <bug-automake at gnu.org>."
+
# process command line arguments
while test $# -gt 0 ; do
case $1 in
-h | --help | --h*) # -h for help
- echo "$usage" 1>&2
+ echo "$usage"
exit 0
;;
-m) # -m PERM arg
@@ -23,6 +35,10 @@
dirmode=$1
shift
;;
+ --version)
+ echo "$0 $scriptversion"
+ exit 0
+ ;;
--) # stop option processing
shift
break
@@ -55,12 +71,25 @@
if mkdir -p -- . 2>/dev/null; then
echo "mkdir -p -- $*"
exec mkdir -p -- "$@"
+ else
+ # On NextStep and OpenStep, the `mkdir' command does not
+ # recognize any option. It will interpret all options as
+ # directories to create, and then abort because `.' already
+ # exists.
+ test -d ./-p && rmdir ./-p
+ test -d ./-- && rmdir ./--
fi
;;
*)
if mkdir -m "$dirmode" -p -- . 2>/dev/null; then
echo "mkdir -m $dirmode -p -- $*"
exec mkdir -m "$dirmode" -p -- "$@"
+ else
+ # Clean up after NextStep and OpenStep mkdir.
+ for d in ./-m ./-p ./-- "./$dirmode";
+ do
+ test -d $d && rmdir $d
+ done
fi
;;
esac
@@ -84,17 +113,17 @@
mkdir "$pathcomp" || lasterr=$?
if test ! -d "$pathcomp"; then
- errstatus=$lasterr
+ errstatus=$lasterr
else
- if test ! -z "$dirmode"; then
+ if test ! -z "$dirmode"; then
echo "chmod $dirmode $pathcomp"
- lasterr=""
- chmod "$dirmode" "$pathcomp" || lasterr=$?
+ lasterr=""
+ chmod "$dirmode" "$pathcomp" || lasterr=$?
- if test ! -z "$lasterr"; then
- errstatus=$lasterr
- fi
- fi
+ if test ! -z "$lasterr"; then
+ errstatus=$lasterr
+ fi
+ fi
fi
fi
@@ -107,5 +136,8 @@
# Local Variables:
# mode: shell-script
# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
# End:
-# mkinstalldirs ends here
Modified: trunk/src/Makefile
===================================================================
--- trunk/src/Makefile 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/Makefile 2004-01-24 01:22:15 UTC (rev 15)
@@ -2,17 +2,12 @@
include $(TOPDIR)/Preamble.make
-ifeq ($(OCFS_PROCESSOR),x86_64)
- WARNINGS = -Wall -Wstrict-prototypes -Wno-format
-else
- WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
- -Wmissing-declarations
+WARNINGS = -Wall -Wstrict-prototypes -Wno-format
+
+ifneq ($(OCFS_PROCESSOR),x86_64)
+WARNINGS += -Wmissing-prototypes -Wmissing-declarations
endif
-#REMOVE THIS NEXT LINE AFTER DONE MOVING PROTOTYPES AROUND
-# WARNINGS = -Wall -Wstrict-prototypes -Wno-format \
-# -Wmissing-declarations
-
ifdef OCFS_DEBUG
OPTS += -g
endif
@@ -44,15 +39,17 @@
DEFINES += -DUSE_JOURNAL_CREATE_REPLACEMENT
endif
+DEFINES += -DVERBOSE_BH_SEM
DEFINES += -DDEBUG_LOCK_BUFFER
DEFINES += -DVERBOSE_BH_JBD_TRACE
DEFINES += -DVERBOSE_LOCKING_TRACE
ifneq ($(OCFS_PROCESSOR),ia64)
-DEFINES += -DOCFS_DBG_TIMING
+#DEFINES += -DOCFS_DBG_TIMING
endif
DEFINES += -DALLOW_NO_HANDLE_SYNCING
+DEFINES += -DOCFS_PARANOID_ABORTS
ifeq ($(KVER),vmware)
KERNELINC = /usr/src/linux-2.4/include
@@ -92,23 +89,27 @@
-fomit-frame-pointer $(MODVERSIONS) $(WARNINGS)
LDADD=-nostdlib
+OPTIMIZE = -O2
+
+ifeq ($(OCFS_PROCESSOR),ppc64)
+ DEFINES += -D__LP64__
+ CFLAGS += -m64 -fsigned-char -fno-builtin -msoft-float -mminimal-toc
+ LDADD += -m elf64ppc
+endif
ifeq ($(OCFS_PROCESSOR),x86_64)
- DEFINES += -D__OPTIMIZE__
- CFLAGS += -mcmodel=kernel
- CFLAGS += -O0 -m64 -finline-functions
+ CFLAGS += -m64 -mcmodel=kernel
endif
ifeq ($(OCFS_PROCESSOR),ia64)
- #DEFINES += -D__OPTIMIZE__
- CFLAGS += -O2
endif
ifeq ($(OCFS_PROCESSOR),i686)
DEFINES += -D__ILP32__
- CFLAGS += -O2
endif
ifeq ($(OCFS_PROCESSOR),i586)
- CFLAGS += -O2
+ DEFINES += -D__ILP32__
endif
+CFLAGS += $(OPTIMIZE)
+
MODULES = ocfs.o
@@ -123,6 +124,7 @@
hash.c \
heartbeat.c \
inode.c \
+ io.c \
ioctl.c \
journal.c \
namei.c \
@@ -158,7 +160,7 @@
HFILES = \
inc/journal.h \
inc/ocfs.h \
- inc/ocfsio.h \
+ inc/io.h \
inc/proto.h
$(CFILES): $(HFILES)
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/alloc.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -22,7 +22,7 @@
__u64 length, struct inode *inode);
static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr,
- __u32 *freeExtent,
+ __u8 *freeExtent,
ocfs_bitmap_free_head * free_head,
__u64 FileSize, bool flag, struct inode *inode) ;
@@ -44,7 +44,8 @@
static inline int ocfs_free_main_bitmap(ocfs_super *osb,
ocfs_free_rec *freelog);
-static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh);
+static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh,
+ ocfs_journal_handle *hanlde);
static int ocfs_sync_local_to_main(ocfs_super *osb,
ocfs_bitmap_free_head **f,
struct buffer_head *local_alloc_bh,
@@ -98,6 +99,9 @@
( (type == DISK_ALLOC_EXTENT_NODE) ?
"DISK_ALLOC_EXTENT_NODE" : "DISK_ALLOC_DIR_NODE" ));
+ if (len == 0)
+ BUG();
+
log = f->tail;
/* need a new one? */
@@ -311,12 +315,10 @@
if (free_vol_bits != NULL) {
ocfs_bitmap_lock *bm_lock;
- bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA(globalbh);
+ bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA_WRITE(globalbh); /* write */
bm_lock->used_bits = ocfs_count_bits(&osb->cluster_bitmap);
OCFS_BH_PUT_DATA(globalbh);
-/* status = ocfs_write_force_disk(osb, bm_lock, OCFS_SECTOR_SIZE,
- OCFS_BITMAP_LOCK_OFFSET);*/
status = ocfs_write_bh(osb, globalbh, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -398,6 +400,8 @@
int status;
__u32 bitmapblocks; /* we only care about the valid blocks */
+ LOG_ENTRY();
+
bitmap = &osb->cluster_bitmap;
bitmapblocks = (OCFS_ALIGN(bitmap->validbits, OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
@@ -418,6 +422,13 @@
for (i = 0; i < freelog->num_updates; i++)
ocfs_clear_bits(bitmap, freelog->update[i].file_off, freelog->update[i].length);
+ /* we don't know which blocks we've changed and which
+ * haven't, so just write them all out */
+ for(i = 0; i < bitmapblocks; i++) {
+ OCFS_BH_GET_DATA_WRITE(bitmap->chunk[i]);
+ OCFS_BH_PUT_DATA(bitmap->chunk[i]);
+ }
+
status = ocfs_write_bhs(osb, bitmap->chunk, bitmapblocks, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -426,6 +437,7 @@
status = 0;
bail:
+ LOG_EXIT_STATUS(status);
return(0);
}
@@ -544,9 +556,14 @@
/* ocfs_free_main_bitmap handles all the reads/writes for the
* main bitmap */
if (Type != DISK_ALLOC_VOLUME) {
- status = ocfs_write_system_file(osb, fileId, tmpbitmap->chunk,
- bitmapblocks * osb->sect_size,
- offset);
+ /* we don't know which blocks we've changed and which
+ * haven't, so just write them all out */
+ for(i = 0; i < bitmapblocks; i++) {
+ OCFS_BH_GET_DATA_WRITE(tmpbitmap->chunk[i]);
+ OCFS_BH_PUT_DATA(tmpbitmap->chunk[i]);
+ }
+ status = ocfs_write_bhs(osb, tmpbitmap->chunk, bitmapblocks,
+ 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -602,7 +619,7 @@
goto finally;
}
}
- extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_header_bh);
+ extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_header_bh); /* write */
bh_locked = 1;
}
if (extent_header != NULL) {
@@ -651,7 +668,7 @@
goto finally;
}
}
- buff = OCFS_BH_GET_DATA(header_bhs[i]);
+ buff = OCFS_BH_GET_DATA_WRITE(header_bhs[i]); /* write */
memset(buff, 0, osb->sect_size);
/* TODO: Do we really need to do this? */
@@ -687,7 +704,7 @@
/* Fill in all the headers and the leaf */
for (i = 0; i <= depth; i++) {
ocfs_extent_group *ext;
- ext = (ocfs_extent_group *) OCFS_BH_GET_DATA(header_bhs[i]);
+ ext = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(header_bhs[i]); /* write */
ext->last_ext_ptr = lastExtPointer;
ext->up_hdr_node_ptr = upHeaderPtr;
@@ -770,7 +787,7 @@
goto finally;
}
}
- extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh);
+ extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
if (!IS_VALID_EXTENT_HEADER(extent_header)) {
OCFS_BH_PUT_DATA(bh);
brelse(bh);
@@ -848,7 +865,7 @@
LOG_ERROR_STATUS (status = -ENOMEM);
goto finally;
}
- real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
memcpy(fe, real_fe, 512);
OCFS_BH_PUT_DATA(fe_bh);
real_fe = NULL;
@@ -881,7 +898,7 @@
LOG_ERROR_STATUS(status);
goto finally;
}
- buf = OCFS_BH_GET_DATA(bhs[i]);
+ buf = OCFS_BH_GET_DATA_WRITE(bhs[i]); /* write */
memset(buf, 0, osb->sect_size);
#ifdef LINUX_2_5
set_buffer_uptodate(bhs[i]);
@@ -900,7 +917,7 @@
goto finally;
}
- OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(bhs[0]);
+ OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
/* Copy the File Entry information in to the newly allocated sector */
for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) {
OcfsExtent->extents[k].file_off = fe->extents[k].file_off;
@@ -928,7 +945,7 @@
upHeaderPtr = fe->this_sector;
for (i = 0; i < fe->granularity; i++) {
- ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA(bhs[i]);
+ ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bhs[i]); /* write */
ExtentHeader->type = OCFS_EXTENT_HEADER;
ExtentHeader->granularity = (fe->granularity - 1) - i;
@@ -965,7 +982,7 @@
}
/* Update the Data Segment, which is the last one in our array */
- OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(bhs[fe->granularity]);
+ OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bhs[fe->granularity]); /* write */
i = (fe->granularity) ? 0 : OCFS_MAX_FILE_ENTRY_EXTENTS;
@@ -1022,7 +1039,7 @@
}
}
- ext = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh);
+ ext = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
if (!IS_VALID_EXTENT_DATA(ext)) {
OCFS_BH_PUT_DATA(bh);
brelse(bh);
@@ -1030,7 +1047,7 @@
goto finally;
}
- ext->next_data_ext = OcfsExtent->this_ext;
+ ext->next_data_ext = lastExtentPtr;
OCFS_BH_PUT_DATA(bh);
if (handle)
@@ -1069,7 +1086,7 @@
}
}
- ext = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh);
+ ext = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
ext->up_hdr_node_ptr = new_up_hdr_ptr;
OCFS_BH_PUT_DATA(bh);
@@ -1104,7 +1121,7 @@
finally:
if (fe) {
- real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ real_fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
memcpy(real_fe, fe, 512);
OCFS_BH_PUT_DATA(fe_bh);
real_fe = NULL;
@@ -1131,7 +1148,7 @@
LOG_ENTRY_ARGS("(actualDiskOffset=%u.%u, actualLength=%u.%u)\n", actualDiskOffset, actualLength);
- FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
OCFS_ASSERT (FileEntry);
if (!IS_VALID_FILE_ENTRY (FileEntry)) {
@@ -1140,6 +1157,7 @@
}
if (FileEntry->local_ext) {
+ LOG_TRACE_STR("Using local extents");
/* We are still using the local extents of File Entry */
if (FileEntry->next_free_ext > OCFS_MAX_FILE_ENTRY_EXTENTS) {
LOG_ERROR_STATUS(status = -EINVAL);
@@ -1170,6 +1188,7 @@
IncreaseTreeDepth = true;
goto increase_depth;
}
+ LOG_TRACE_STR("Using NON-local extents");
/*** Nonlocal Extents ***/
if (FileEntry->granularity > 3)
@@ -1183,7 +1202,7 @@
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* but not if journalled */
if (!IS_VALID_EXTENT_DATA(extent)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
@@ -1201,8 +1220,7 @@
OCFS_BH_PUT_DATA(extent_bh);
ocfs_journal_access(handle, extent_bh,
OCFS_JOURNAL_ACCESS_WRITE);
- extent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* journal_access */
}
extent->extents[k].num_bytes += actualLength;
status = 0;
@@ -1219,8 +1237,7 @@
OCFS_BH_PUT_DATA(extent_bh);
ocfs_journal_access(handle, extent_bh,
OCFS_JOURNAL_ACCESS_WRITE);
- extent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* journal access */
}
extent->extents[k].file_off = FileEntry->alloc_size;
extent->extents[k].num_bytes = actualLength;
@@ -1250,7 +1267,7 @@
LOG_ERROR_STATUS (status);
goto finally;
}
- extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_header_bh);
+ extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_header_bh); /* read */
if (!IS_VALID_EXTENT_HEADER(extent_header)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
@@ -1299,8 +1316,7 @@
OCFS_BH_PUT_DATA(extent_bh);
ocfs_journal_access(handle, extent_bh,
OCFS_JOURNAL_ACCESS_WRITE);
- extent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh); /* write */ /* journal access */
}
extent->next_data_ext = newExtentOff;
@@ -1353,8 +1369,7 @@
}
}
- extent_header = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(extent_header_bh);
+ extent_header = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_header_bh); /* write */
if (!IS_VALID_EXTENT_HEADER(extent_header)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
@@ -1425,7 +1440,7 @@
* mapping run.So just adding this entry will be
* fine. */
if (FileEntry == NULL)
- FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ FileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
Vbo = FileEntry->alloc_size;
Lbo = actualDiskOffset;
@@ -1453,6 +1468,7 @@
if (FileEntry) {
OCFS_BH_PUT_DATA(fe_bh);
}
+
LOG_EXIT_STATUS (status);
return (status);
} /* ocfs_allocate_extent */
@@ -1468,7 +1484,7 @@
* 'flag' seems to be an indicator that (if true) tells us that we already know
* we're gonna have to clear out all of extarr.
*/
-int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, __u32 *freeExtent, ocfs_bitmap_free_head *free_head, __u64 FileSize, bool flag, struct inode *inode)
+int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, __u8 *freeExtent, ocfs_bitmap_free_head *free_head, __u64 FileSize, bool flag, struct inode *inode)
{
int status = 0;
bool FirstTime = true;
@@ -1484,6 +1500,7 @@
LOG_ENTRY ();
firstfree = *freeExtent;
+
/* loop through the used alloc_extents */
for (i = 0; i < firstfree; i++) {
ext = &(extarr[i]);
@@ -1541,6 +1558,9 @@
* including itself, it's children, and any data blocks they point to.
* Works fine with any granularity (up to 4, in which case we'd need
* more stack space)
+ *
+ * extent_grp_bh will be unchanged, though it will be marked for
+ * deletion in free_head.
*/
/* We can't recurse, so we keep a simple stack of ocfs_extent_groups. */
@@ -1554,20 +1574,22 @@
__u64 tmp_off;
__u32 num_sectors = 0, bitmap_offset = 0;
ocfs_alloc_ext *ext;
- struct buffer_head * bh_stack[OCFS_TREE_STACK_SIZE];
+ struct buffer_head *tmp_bh = NULL;
+ char * stack[OCFS_TREE_STACK_SIZE];
ocfs_extent_group * cur_extent; /* convenience, points to TOS */
int tos = 0;
LOG_ENTRY();
for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
- bh_stack[i] = NULL;
+ stack[i] = NULL;
- bh_stack[tos] = extent_grp_bh;
+ stack[tos] = ocfs_malloc(512);
+ memcpy(stack[tos], OCFS_BH_GET_DATA_READ(extent_grp_bh), 512);
+ OCFS_BH_PUT_DATA(extent_grp_bh);
do {
- cur_extent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(bh_stack[tos]);
+ cur_extent = (ocfs_extent_group *) stack[tos];
if (!IS_VALID_EXTENT_DATA(cur_extent) &&
!IS_VALID_EXTENT_HEADER(cur_extent)) {
@@ -1576,7 +1598,7 @@
}
if (IS_VALID_EXTENT_DATA(cur_extent)) {
- LOG_PID_PRINTK("found some data to free (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext));
+ LOG_TRACE_ARGS("found some data to free (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext));
for(i = 0; i < cur_extent->next_free_ext; i++) {
/* Free the data associated with each header */
ext = &cur_extent->extents[i];
@@ -1594,7 +1616,7 @@
/* Did we already kill all his children, or
* are they already dead? */
if (cur_extent->next_free_ext == 0) {
- LOG_PID_PRINTK("Popping this header (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext), cur_extent->next_free_ext);
+ LOG_TRACE_ARGS("Popping this header (%u.%u)\n", HI(cur_extent->this_ext), LO(cur_extent->this_ext), cur_extent->next_free_ext);
goto free_meta;
}
@@ -1611,26 +1633,29 @@
tmp_off = cur_extent->extents[victim].disk_off;
cur_extent->next_free_ext--;
- OCFS_BH_PUT_DATA(bh_stack[tos]);
cur_extent = NULL;
tos++;
/* should already be null, but we can do this
* just in case. */
- bh_stack[tos] = NULL;
+ stack[tos] = ocfs_malloc(512);
- status = ocfs_read_bh(osb, tmp_off, &bh_stack[tos],
+ status = ocfs_read_bh(osb, tmp_off, &tmp_bh,
OCFS_BH_COND_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
+ memcpy(stack[tos], OCFS_BH_GET_DATA_READ(tmp_bh), 512);
+ OCFS_BH_PUT_DATA(tmp_bh);
+ brelse(tmp_bh);
+ tmp_bh = NULL;
/* We only want to free on our way back up the tree */
continue;
}
- free_meta:
+free_meta:
/* Free the metadata associated with this extent group */
status = ocfs_add_to_bitmap_free_head(osb, free_head, 1, cur_extent->alloc_file_off, cur_extent->alloc_node, DISK_ALLOC_EXTENT_NODE);
if (status < 0) {
@@ -1638,22 +1663,17 @@
goto bail;
}
/* Pop one off the stack */
- OCFS_BH_PUT_DATA(bh_stack[tos]);
- brelse(bh_stack[tos]);
- bh_stack[tos] = NULL;
+ ocfs_free(stack[tos]);
+ stack[tos] = NULL;
cur_extent = NULL;
tos--;
} while (tos >= 0);
status = 0;
bail:
- if (cur_extent)
- OCFS_BH_PUT_DATA(bh_stack[tos]);
- /* brelse the stack. We never brelse the bottom of the stack
- * because we were passed that guy from the caller */
- for(i = 1; i < OCFS_TREE_STACK_SIZE; i++)
- if (bh_stack[i])
- brelse(bh_stack[i]);
+ for(i = 0; i < OCFS_TREE_STACK_SIZE; i++)
+ if (stack[i])
+ ocfs_free(stack[i]);
LOG_EXIT_STATUS (status);
return(status);
@@ -1675,7 +1695,7 @@
goto bail;
}
- group = (ocfs_extent_group *) OCFS_BH_GET_DATA(group_bh);
+ group = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(group_bh); /* write */
if (!IS_VALID_EXTENT_DATA(group) &&
!IS_VALID_EXTENT_HEADER(group)) {
@@ -1737,7 +1757,7 @@
because I can't recreate one. */
if (gran == 3) {
LOG_ERROR_STR("Truncating file with granularity 3, this is not tested and may be unsafe!");
- LOG_PID_STR("Found a granularity 3 tree, trimming it.\n");
+ LOG_TRACE_STR("Found a granularity 3 tree, trimming it.\n");
status = ocfs_journal_access(handle, extent_grp_bh,
OCFS_JOURNAL_ACCESS_WRITE);
@@ -1745,7 +1765,7 @@
LOG_ERROR_STATUS(status);
goto bail;
}
- extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_grp_bh);
+ extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_grp_bh); /* write */ /* journal access */
for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
ext = &extent_grp->extents[i];
@@ -1799,7 +1819,6 @@
goto bail;
}
extent_grp_bh = tmp_bh2;
- LOG_PID_STR("Ok, continuing as if granularity = 2");
/* We want to do the next bit of stuff too */
gran = 2;
@@ -1810,7 +1829,7 @@
/* get rid of everything from the top level HDR that we can, then
proceeed as if we're granularity 1 (which we know works) */
if (gran == 2) {
- LOG_PID_STR("Found a granularity 2 tree, trimming it.\n");
+ LOG_TRACE_STR("Found a granularity 2 tree, trimming it.\n");
status = ocfs_journal_access(handle, extent_grp_bh,
OCFS_JOURNAL_ACCESS_WRITE);
@@ -1819,7 +1838,7 @@
goto bail;
}
- extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_grp_bh);
+ extent_grp = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_grp_bh); /* write */ /* journal access */
for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
ext = &extent_grp->extents[i];
@@ -1874,7 +1893,6 @@
}
extent_grp_bh = tmp_bh;
- LOG_PID_STR("Ok, continuing as if granularity = 1");
/* Right now, we don't use 'gran' below here, but just
* in case */
@@ -1901,7 +1919,7 @@
goto bail;
}
- AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(bh_stack[tos]);
+ AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */ /* journal access */
if (!IS_VALID_EXTENT_DATA(AllocExtent) &&
!IS_VALID_EXTENT_HEADER(AllocExtent)) {
@@ -1911,10 +1929,8 @@
if (IS_VALID_EXTENT_DATA(AllocExtent)) {
/* shall we just do away with him? */
- LOG_PID_STR("Found a whole data extent!");
- /* changed this from > to >= */
if (AllocExtent->extents[0].file_off >= newsize) {
- LOG_PID_PRINTK("Killing this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
+ LOG_TRACE_ARGS("Killing this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
/* Boundary case - what if this guy is
* the last DAT we should delete
* (i.e., split no more ;) */
@@ -1926,12 +1942,11 @@
goto bail;
}
/* silly, but what to do? */
- AllocExtent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(bh_stack[tos]);
+ AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(bh_stack[tos]); /* read */
} else {
/* Alright, we know for sure that
* we're splitting in this guy. */
- LOG_PID_PRINTK("Splitting this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
+ LOG_TRACE_ARGS("Splitting this data extent (%u, %u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
fe->last_ext_ptr = AllocExtent->this_ext;
AllocExtent->next_data_ext = 0;
/* total_bytes is used below to know
@@ -1944,7 +1959,7 @@
* it: */
ext = &AllocExtent->extents[AllocExtent->next_free_ext - 1];
if ((ext->file_off + ext->num_bytes)==newsize){
- LOG_PID_STR("Ok, hit that boundary in the DAT");
+ LOG_TRACE_STR("Ok, hit that boundary in the DAT");
goto fix_headers;
}
@@ -1987,7 +2002,6 @@
}
} /* For loop */
- LOG_PID_PRINTK("Writing that data extent back out to disk now (%u,%u)\n", HI(AllocExtent->this_ext), LO(AllocExtent->this_ext));
/* Either way, we need to write this back out*/
OCFS_BH_PUT_DATA(bh_stack[tos]);
AllocExtent = NULL;
@@ -1998,20 +2012,19 @@
goto bail;
}
- LOG_PID_PRINTK("Fixing the headers above us! (tos=%d)\n", tos);
- fix_headers:
+ LOG_TRACE_ARGS("Fixing the headers above us! (tos=%d)\n", tos);
+fix_headers:
/*And here we should fix the headers above us*/
tos--;
while (tos >= 0) {
- LOG_PID_PRINTK("at top of loop, tos=%d\n", tos);
+ LOG_TRACE_ARGS("at top of loop, tos=%d\n", tos);
status = ocfs_journal_access(handle, bh_stack[tos], OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- AllocExtent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(bh_stack[tos]);
+ AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */ /* journal access */
victim = AllocExtent->next_free_ext;
AllocExtent->next_free_ext++;
/* need to also update
@@ -2034,7 +2047,7 @@
}
tos--;
}
- LOG_PID_STR("breaking to end function now!");
+ LOG_TRACE_STR("breaking to end function now!");
/* Ok, done! */
break;
}
@@ -2044,7 +2057,7 @@
* are they already dead? */
if (AllocExtent->next_free_ext == 0) {
/*Ok, we're done with this guy, pop the stack*/
- LOG_PID_PRINTK("Popping this header (%u.%u)\n",
+ LOG_TRACE_ARGS("Popping this header (%u.%u)\n",
HI(AllocExtent->this_ext),
LO(AllocExtent->this_ext),
AllocExtent->next_free_ext);
@@ -2064,7 +2077,7 @@
/* changed this from > to >= */
/* Do we just delete this whole part of the tree? */
if (AllocExtent->extents[0].file_off >= newsize) {
- LOG_PID_PRINTK("whacking this tree: (%u.%u)\n",
+ LOG_TRACE_ARGS("whacking this tree: (%u.%u)\n",
HI(AllocExtent->this_ext),
LO(AllocExtent->this_ext));
@@ -2094,8 +2107,7 @@
goto bail;
}
- AllocExtent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(bh_stack[tos]);
+ AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */ /* journal access */
victim = AllocExtent->next_free_ext;
AllocExtent->extents[victim].file_off = 0;
@@ -2151,8 +2163,7 @@
}
/* need to get the next offset to read */
- AllocExtent = (ocfs_extent_group *)
- OCFS_BH_GET_DATA(bh_stack[tos]);
+ AllocExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(bh_stack[tos]); /* write */
AllocExtent->next_free_ext--;
victim = AllocExtent->next_free_ext;
ext = &AllocExtent->extents[victim];
@@ -2233,7 +2244,7 @@
LOG_ENTRY ();
if (fe->next_free_ext == 0) {
- LOG_PID_STR("setting to zero as there isn't any used extents");
+ LOG_TRACE_STR("setting to zero as there isn't any used extents");
fe->last_ext_ptr = 0;
status = 0;
goto bail;
@@ -2247,7 +2258,7 @@
goto bail;
}
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
if (!IS_VALID_EXTENT_DATA(extent) &&
!IS_VALID_EXTENT_HEADER(extent)) {
@@ -2274,7 +2285,7 @@
LOG_ERROR_STATUS(status);
goto bail;
}
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
}
fe->last_ext_ptr = extent->this_ext;
@@ -2317,9 +2328,10 @@
/* local extents */
if (FileEntry->local_ext) {
- LOG_PID_STR("local extents, calling _squish_extent_entries");
- status = _squish_extent_entries(osb, FileEntry->extents, (__u32 *)&FileEntry->next_free_ext, free_head, alloc_size, false, inode);
- LOG_PID_PRINTK("return from _squish_extent_entries, status=%d", status);
+ status = _squish_extent_entries(osb, FileEntry->extents,
+ &FileEntry->next_free_ext,
+ free_head, alloc_size, false,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -2327,14 +2339,14 @@
goto finally;
}
- LOG_PID_PRINTK("non-local extents. taking that code path, truncating to alloc_size of (%u.%u)\n", HI(alloc_size), LO(alloc_size));
+ LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to alloc_size of (%u.%u)\n", HI(alloc_size), LO(alloc_size));
/* non-local extents */
updated_lep = false;
/* Loop backwards through only the used free extent headers here */
for (i = (FileEntry->next_free_ext - 1); i >= 0; i--) {
- LOG_PID_PRINTK("at top of loop, i = %d\n", i);
+ LOG_TRACE_ARGS("at top of loop, i = %d\n", i);
/* Go ahead and read that bit of the tree - we'll need it. */
status = ocfs_read_bh(osb, FileEntry->extents[i].disk_off,
&extent_bh, OCFS_BH_CACHED, inode);
@@ -2344,7 +2356,7 @@
}
/* Figure out, do we want to kill this whole tree? */
if (FileEntry->extents[i].file_off >= alloc_size) {
- LOG_PID_PRINTK("Found an entire tree to delete!\n");
+ LOG_TRACE_ARGS("Found an entire tree to delete!\n");
status = ocfs_kill_this_tree(osb, extent_bh, free_head, inode);
if (status < 0) {
@@ -2361,7 +2373,7 @@
* split this tree, but we call this function
* anyways in order to update last_ext_ptr. */
- LOG_PID_PRINTK("Splitting this tree!\n");
+ LOG_TRACE_ARGS("Splitting this tree!\n");
status = ocfs_split_this_tree(osb, extent_bh, free_head, FileEntry, handle, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -2369,7 +2381,7 @@
}
/* Ok, update the FileEntry */
- LOG_PID_PRINTK("Alright. num_bytes = (%u,%u), alloc_size = (%u,%u) file_off = (%u,%u)\n", HI(FileEntry->extents[i].num_bytes), LO(FileEntry->extents[i].num_bytes), HI(alloc_size), LO(alloc_size), HI(FileEntry->extents[i].file_off), LO(FileEntry->extents[i].file_off));
+ LOG_TRACE_ARGS("Alright. num_bytes = (%u,%u), alloc_size = (%u,%u) file_off = (%u,%u)\n", HI(FileEntry->extents[i].num_bytes), LO(FileEntry->extents[i].num_bytes), HI(alloc_size), LO(alloc_size), HI(FileEntry->extents[i].file_off), LO(FileEntry->extents[i].file_off));
FileEntry->extents[i].num_bytes = alloc_size;
for (j=0; j < i; j++)
FileEntry->extents[i].num_bytes += FileEntry->extents[j].num_bytes;
@@ -2393,7 +2405,6 @@
}
if (!updated_lep) {
- LOG_PID_STR("Updating FileEntry->last_ext_ptr");
status = ocfs_update_last_ext_ptr(osb, FileEntry, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -2401,8 +2412,6 @@
}
}
- LOG_PID_PRINTK("non-local extents, out of loop now, i = %d\n", i);
-
finally:
if (extent_bh)
brelse(extent_bh);
@@ -2461,7 +2470,7 @@
goto finally;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (!IS_VALID_FILE_ENTRY (fe)) {
LOG_ERROR_STATUS (status = -EINVAL);
@@ -2493,7 +2502,7 @@
goto finally;
}
- OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(ext_bh);
+ OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(ext_bh); /* read */
while (1) {
status = ocfs_update_extent_map (osb, &oin->map, OcfsExtent,
&localVbo, &remainingLength, NONLOCAL_EXT);
@@ -2528,7 +2537,7 @@
LOG_ERROR_STATUS(status);
goto finally;
}
- OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA(ext_bh);
+ OcfsExtent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(ext_bh); /* read */
if (!IS_VALID_EXTENT_DATA(OcfsExtent)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
@@ -2600,7 +2609,7 @@
goto finally;
}
- ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA(ext_bh);
+ ExtentHeader = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(ext_bh); /* read */
if (!IS_VALID_EXTENT_HEADER(ExtentHeader)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
@@ -2634,7 +2643,7 @@
goto finally;
}
- tmp = (ocfs_extent_group *) OCFS_BH_GET_DATA(*data_extent_bh);
+ tmp = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(*data_extent_bh); /* read */
if (!IS_VALID_EXTENT_DATA(tmp)) {
LOG_ERROR_STATUS (status = -EINVAL);
OCFS_BH_PUT_DATA(*data_extent_bh);
@@ -2709,7 +2718,7 @@
}
bLockAcquired = true;
}
- bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA(bh);
+ bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
ByteCount = file_size;
@@ -2856,7 +2865,6 @@
return status;
} /* ocfs_find_contiguous_space_from_bitmap */
-
/*
* ocfs_alloc_node_block()
*
@@ -2865,6 +2873,7 @@
{
int status = 0;
int tmpstat = 0;
+ int startbh, numblocks;
__u64 fileSize = 0;
__u64 offset = 0;
__u64 lockId = 0;
@@ -2880,8 +2889,8 @@
__u32 blockSize = 0;
bool bLockAcquired = false;
ocfs_lock_res *pLockResource = NULL;
- __u32 fileId = 0;
- __u32 extendFileId = 0;
+ __u32 bm_file = 0;
+ __u32 alloc_file = 0;
struct buffer_head *bh = NULL;
ocfs_file_entry *fe = NULL;
bool needs_uninit = false;
@@ -2894,12 +2903,12 @@
ocfs_down_sem (&(osb->vol_alloc_lock), true);
if (Type == DISK_ALLOC_DIR_NODE) {
- fileId = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum;
+ bm_file = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum;
blockSize = (__u32) osb->vol_layout.dir_node_size;
- extendFileId = OCFS_FILE_DIR_ALLOC + NodeNum;
+ alloc_file = OCFS_FILE_DIR_ALLOC + NodeNum;
} else if (Type == DISK_ALLOC_EXTENT_NODE) {
- fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
- extendFileId = OCFS_FILE_FILE_ALLOC + NodeNum;
+ bm_file = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
+ alloc_file = OCFS_FILE_FILE_ALLOC + NodeNum;
blockSize = (__u32) osb->vol_layout.file_node_size;
}
@@ -2907,7 +2916,7 @@
OCFS_ASSERT (blockSize);
- lockId = (fileId * OCFS_SECTOR_SIZE) + osb->vol_layout.root_int_off;
+ lockId = (bm_file * OCFS_SECTOR_SIZE) + osb->vol_layout.root_int_off;
/* Get a lock on the file */
status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK,
@@ -2924,25 +2933,24 @@
/* Read in the bitmap file for the dir alloc and look for the
* required space, if found */
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
fileSize = fe->file_size;
allocSize = fe->alloc_size;
OCFS_BH_PUT_DATA(bh);
prevFileSize = fileSize;
-
+
if ((fileSize != 0) && (allocSize != 0)) {
/* Round this off to dirnodesize */
ocfs_initialize_bitmap (&bitmap, (__u32) fileSize * 8, (__u32) allocSize * 8);
needs_uninit = true;
- status = ocfs_read_system_file (osb, fileId, bitmap.chunk,
+ status = ocfs_read_system_file (osb, bm_file, bitmap.chunk,
allocSize, offset);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
-
/* Find the requisite number of bits... */
@@ -2958,7 +2966,6 @@
if (foundBit == -1) {
/* if not found add more allocation to the file and try again. */
-
/* Lets get a 1MB chunks every time or clustersize which ever */
/* is greater or the number of bit asked */
extent = ((1 * ONE_MEGA_BYTE) > osb->vol_layout.cluster_size) ?
@@ -2969,7 +2976,7 @@
extent = OCFS_ALIGN (extent, ONE_MEGA_BYTE);
- status = ocfs_get_system_file_size (osb, (extendFileId),
+ status = ocfs_get_system_file_size (osb, alloc_file,
&newFileSize, &allocSize);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -2982,8 +2989,9 @@
if (allocSize > 0)
extent *= 2;
- status = ocfs_extend_system_file (osb, (extendFileId),
- newFileSize + extent, NULL, handle);
+ status = ocfs_extend_system_file (osb, alloc_file,
+ newFileSize + extent, NULL,
+ handle, false);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -2996,12 +3004,13 @@
* do a put_data first! */
/* Calculate the new bitmap size */
- status = ocfs_extend_system_file (osb, fileId, bitMapSize, bh, handle);
+ status = ocfs_extend_system_file (osb, bm_file, bitMapSize, bh,
+ handle, true);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
/* we wrote it back out in ocfs_extend_system_file so
* we can trust the sizes here */
fileSize = fe->file_size;
@@ -3016,7 +3025,7 @@
allocSize * 8);
needs_uninit = true;
- status = ocfs_read_system_file (osb, fileId, bitmap.chunk,
+ status = ocfs_read_system_file (osb, bm_file, bitmap.chunk,
allocSize, offset);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -3034,17 +3043,21 @@
ocfs_set_bits (&bitmap, (__u32) foundBit, (__u32) numBits);
+ /* only write out what has changed... */
+ startbh = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit);
+ numblocks = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit + numBits) - startbh + 1;
+
/* Write the bitmap file back */
- status = ocfs_write_system_file (osb, fileId, bitmap.chunk,
- allocSize, offset);
+ status = ocfs_write_bhs(osb, &bitmap.chunk[startbh], numblocks,
+ 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- LOG_TRACE_ARGS ("offset=%u.%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
- foundBit * blockSize, Type, blockSize, foundBit, extendFileId);
- *DiskOffset = ocfs_file_to_disk_off (osb, (extendFileId),
+ LOG_TRACE_ARGS ("offset=%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
+ foundBit * blockSize, Type, blockSize, foundBit, alloc_file);
+ *DiskOffset = ocfs_file_to_disk_off (osb, (alloc_file),
(foundBit * blockSize));
if (*DiskOffset == 0) {
LOG_ERROR_STATUS(status = -EFAIL);
@@ -3052,6 +3065,7 @@
}
*file_off = (__u64) ((__u64) foundBit * (__u64) blockSize);
+
/* this can just fall through */
if (*file_off == 0) {
LOG_TRACE_ARGS ("offset=%u.%u, type=%x, blksz=%u, foundbit=%u\n",
@@ -3116,7 +3130,7 @@
goto leave;
}
- dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dir_hdr_bh);
+ dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(dir_hdr_bh); /* read */
while ((dirnode->node_disk_off != INVALID_NODE_POINTER) &&
(IS_VALID_DIR_NODE (dirnode))) {
@@ -3138,7 +3152,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dir_hdr_bh);
+ dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(dir_hdr_bh); /* read */
continue;
} else {
break;
@@ -3170,12 +3184,23 @@
struct buffer_head *extent_bh = NULL;
ocfs_file_entry *fe = NULL;
struct inode *inode = NULL;
+ __u64 offset;
LOG_ENTRY ();
-
- inode = ocfs_get_inode_from_bh(osb, fe_bh);
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
+ if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+ offset = fe->extents[0].disk_off;
+ else
+ offset = fe->this_sector;
+ OCFS_BH_PUT_DATA(fe_bh);
+
+ inode = ocfs_get_inode_from_offset(osb, offset, fe_bh);
+ if (inode)
+ SET_BH_SEQNUM(inode, fe_bh);
+
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
+
if (fe->local_ext) {
for (i = 0; i < fe->next_free_ext; i++) {
numBitsAllocated = (__u32) (fe->extents[i].num_bytes /
@@ -3199,7 +3224,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
if ((fe->granularity && (!IS_VALID_EXTENT_HEADER(extent))) || !IS_VALID_EXTENT_DATA(extent)) {
status = -EINVAL;
LOG_ERROR_STATUS(status);
@@ -3346,7 +3371,7 @@
if (!local_alloc_bh)
local_alloc_bh = osb->local_alloc_bh;
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(local_alloc_bh); /* read */
if (alloc->alloc_size == 0) {
OCFS_BH_PUT_DATA(local_alloc_bh);
LOG_TRACE_STR("nothing to sync!");
@@ -3392,7 +3417,7 @@
}
}
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(local_alloc_bh); /* read */
LOG_TRACE_ARGS("alloc->alloc_size = %u, COUNT = %u, num_used = %u\n",
alloc->alloc_size, ocfs_alloc_count_bits(alloc),
@@ -3446,7 +3471,8 @@
*
* pass it the bitmap lock in lock_bh if you have it.
*/
-static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh)
+static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh,
+ ocfs_journal_handle *handle)
{
int status = 0;
__u64 alloc_bytes, cluster_off, cluster_count;
@@ -3455,7 +3481,7 @@
LOG_ENTRY();
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(osb->local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(osb->local_alloc_bh); /* read */
if (alloc->alloc_size != 0)
LOG_TRACE_STR("asking me to alloc a new window over a"
" non-empty one");
@@ -3476,7 +3502,7 @@
goto bail;
}
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(osb->local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */
alloc->bitmap_start = cluster_off;
alloc->alloc_size = cluster_count;
@@ -3492,6 +3518,25 @@
LOG_TRACE_ARGS("window alloc_size = %u\n", alloc->alloc_size);
OCFS_BH_PUT_DATA(osb->local_alloc_bh);
+ if (handle->abort_bits)
+ LOG_ERROR_STR("Multiple window allocations in a transaction "
+ "-- this is illegal!");
+ else
+ handle->abort_bits = alloc_bitmap_free_head();
+
+ status = ocfs_add_to_bitmap_free_head(osb, handle->abort_bits,
+ cluster_count,
+ alloc->bitmap_start, -1,
+ DISK_ALLOC_VOLUME);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+
+ /* In case of this error, we want to shutdown the
+ * local alloc bitmap. We'll let shutdown handling
+ * deal with freeing newly allocated bits. */
+ free_bitmap_free_head(handle->abort_bits);
+ handle->abort_bits = NULL;
+ }
bail:
LOG_EXIT_STATUS(status);
return(status);
@@ -3569,7 +3614,6 @@
int status = 0, tmpstat;
int startoff, tmpoff;
__u32 tmpwanted;
- bool dontdirty = false;
/* main bitmap variables. */
struct buffer_head *main_bm_bh = NULL;
ocfs_lock_res *bm_lock_res = NULL;
@@ -3592,7 +3636,7 @@
goto bail;
}
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(osb->local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
tryagain:
/* If we need to initialize a new window, do so now. */
if (alloc->alloc_size == 0) {
@@ -3600,23 +3644,31 @@
alloc = NULL;
LOG_TRACE_STR("Allocating a new window...");
- status = ocfs_alloc_new_window(osb, main_bm_bh);
+ status = ocfs_alloc_new_window(osb, main_bm_bh, handle);
if (status < 0) {
if (status == -ENOSPC) {
+ /* TODO: Remove this printk */
printk("ocfs: disabling local alloc "
"bitmap for this mount.\n");
- /* at this point, we shouldn't have
- * anything allocated for the local
- * alloc, so shutting it down won't
- * wind up free'ing anything... */
- ocfs_shutdown_local_alloc(osb, NULL, true);
+
+ ocfs_shutdown_local_alloc(osb, NULL, false);
+
+ /* we want to make sure an empty alloc
+ * hits disk. */
+ ocfs_handle_set_sync(handle, true);
+
+ /* the bh might not have been dirtied to
+ * the journal yet. */
+ tmpstat = ocfs_journal_dirty(handle,
+ osb->local_alloc_bh);
+ if (tmpstat < 0)
+ LOG_ERROR_STATUS(tmpstat);
goto bail;
}
LOG_ERROR_STATUS(status);
goto bail;
}
- alloc = (ocfs_local_alloc *)
- OCFS_BH_GET_DATA(osb->local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
}
/* Alright, try to satisfy the request. */
@@ -3644,15 +3696,14 @@
goto bail;
}
- status = ocfs_sync_local_to_main(osb, &(osb->alloc_free_head),
+ status = ocfs_sync_local_to_main(osb, &(handle->commit_bits),
NULL, main_bm_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- alloc = (ocfs_local_alloc *)
- OCFS_BH_GET_DATA(osb->local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
ocfs_clear_local_alloc(alloc);
@@ -3666,16 +3717,7 @@
* alloc put back! */
ocfs_handle_set_sync(handle, true);
- /* skip our own abort handling. */
- status = journal_dirty_metadata(handle->k_handle,
- osb->local_alloc_bh);
- dontdirty = true;
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- alloc = (ocfs_local_alloc *)
- OCFS_BH_GET_DATA(osb->local_alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh); /* write */ /* journal access */
goto tryagain;
}
@@ -3698,12 +3740,10 @@
OCFS_BH_PUT_DATA(osb->local_alloc_bh);
alloc = NULL;
- if (!dontdirty) {
- status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+ status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
bail:
/* if we locked the main bitmap, cleanup after ourselves. */
@@ -3820,7 +3860,7 @@
goto leave;
}
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(alloc_bh); /* write */
memset(alloc, 0, sizeof(ocfs_local_alloc));
strcpy (alloc->signature, OCFS_LOCAL_ALLOC_SIGNATURE);
@@ -3864,7 +3904,7 @@
goto bail;
}
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(alloc_bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_READ(alloc_bh); /* read */
/* do a little verification. */
num_used = ocfs_alloc_count_bits(alloc);
@@ -3918,8 +3958,6 @@
else
bh = osb->local_alloc_bh;
- if (osb->alloc_free_head)
- LOG_TRACE_STR("Shutting down with a pending bitmap_free_head!");
status = ocfs_sync_local_to_main(osb, &f, NULL, NULL);
if (status < 0)
LOG_ERROR_STATUS(status);
@@ -3931,7 +3969,7 @@
f = NULL;
}
- alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA(bh);
+ alloc = (ocfs_local_alloc *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
ocfs_clear_local_alloc(alloc);
OCFS_BH_PUT_DATA(bh);
Modified: trunk/src/bitmap.c
===================================================================
--- trunk/src/bitmap.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/bitmap.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -160,13 +160,14 @@
globalsize, bitmap->validbits, sysonly);*/
/*LOG_TRACE_ARGS("before loop: c=%u, lastbh=%u, size=%u, "
"localstart=%u\n", c, lastbh, size, localstart);*/
- buffer = OCFS_BH_GET_DATA(currbh);
+ buffer = OCFS_BH_GET_DATA_READ(currbh); /* read */
while ((bitoff = find_next_zero_bit(buffer, OCFS_BITS_IN_CHUNK,
localstart)) != -1) {
/*LOG_TRACE_ARGS("c=%u, globaloff=%u, bitoff=%u, "
"localstart=%u\n", c, globaloff, bitoff,
localstart);*/
+
/* find_next_zero_bit returns:
>= size passed in: if no zero bits in here.
some number < size: at the next zero bit
@@ -188,9 +189,10 @@
localstart = bitoff = 0;
c++;
currbh = bitmap->chunk[c];
- buffer = OCFS_BH_GET_DATA(currbh);
+ buffer = OCFS_BH_GET_DATA_READ(currbh); /* read */
if (c == lastbh)
size = globalsize-(OCFS_BITS_IN_CHUNK*lastbh);
+ globaloff = c * OCFS_BITS_IN_CHUNK;
continue;
}
@@ -234,24 +236,21 @@
*/
int ocfs_count_bits (ocfs_alloc_bm * bitmap)
{
- __u32 size, count = 0, i, j;
+ __u32 count = 0, i, j;
struct buffer_head *currbh;
unsigned char tmp;
__u8 *buffer;
- __u32 validbh;
+ int validbytes, size;
LOG_ENTRY ();
size = (bitmap->validbits >> 3);
- validbh = OCFS_ALIGN(bitmap->validbits, OCFS_BITS_IN_CHUNK) /
- OCFS_BITS_IN_CHUNK;
-
- for (i = 0; i < validbh; i++) {
+
+ for (i = 0, validbytes = (size >= OCFS_BITMAP_CHUNK ? OCFS_BITMAP_CHUNK : size);
+ size > 0; size -= validbytes, i++) {
currbh = bitmap->chunk[i];
- buffer = OCFS_BH_GET_DATA(currbh);
- for (j = 0; j < (size % OCFS_BITMAP_CHUNK ?
- size % OCFS_BITMAP_CHUNK :
- OCFS_BITMAP_CHUNK); j++) {
+ buffer = OCFS_BH_GET_DATA_READ(currbh); /* read */
+ for (j = 0; j < validbytes; j++) {
memcpy (&tmp, buffer, 1);
count += BITCOUNT (tmp);
buffer++;
@@ -290,7 +289,7 @@
local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
currbh = bitmap->chunk[i];
- buff = OCFS_BH_GET_DATA(currbh);
+ buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
while (num--) {
set_bit (local++, buff);
@@ -299,7 +298,7 @@
OCFS_BH_PUT_DATA(currbh);
i++;
currbh = bitmap->chunk[i];
- buff = OCFS_BH_GET_DATA(currbh);
+ buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
}
}
@@ -331,7 +330,7 @@
local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
currbh = bitmap->chunk[i];
- buff = OCFS_BH_GET_DATA(currbh);
+ buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
while (num--) {
clear_bit (local++, buff);
@@ -340,7 +339,7 @@
OCFS_BH_PUT_DATA(currbh);
i++;
currbh = bitmap->chunk[i];
- buff = OCFS_BH_GET_DATA(currbh);
+ buff = OCFS_BH_GET_DATA_WRITE(currbh); /* write */
}
}
Modified: trunk/src/dcache.c
===================================================================
--- trunk/src/dcache.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/dcache.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -82,7 +82,7 @@
goto bail;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
/* we now have a file entry to call read_inode */
q.name = fe->filename;
@@ -145,7 +145,7 @@
spin_unlock (&dcache_lock);
ret = 1;
- bail:
+bail:
LOG_EXIT_LONG (ret);
return ret;
} /* ocfs_empty */
Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/dir.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -100,7 +100,7 @@
} else {
if (ocfs_find_files_on_disk (osb, rootOff, NULL, &entry_bh, ofile, inode) < 0)
break;
- entry = (ocfs_file_entry *) OCFS_BH_GET_DATA(entry_bh);
+ entry = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(entry_bh); /* read */
r=filldir (dirent, entry->filename, strlen (entry->filename), filp->f_pos,
LO (entry->this_sector), DT_UNKNOWN);
if (r < 0) {
@@ -303,25 +303,18 @@
__u64 offset;
ocfs_dir_node *DirNode = NULL;
ocfs_file_entry *fe = NULL;
- bool sync_hdr_write = false, sync_fe_write = false;
- bool cached_hdr_write = false, cached_fe_write = false;
+ bool sync_fe_write = false;
LOG_ENTRY ();
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
offset = DirNode->node_disk_off + ((idx + 1) * 512);
- if ((DISK_LOCK_CURRENT_MASTER (DirNode) == osb->node_num) &&
- (DISK_LOCK_FILE_LOCK (DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK)) {
- cached_hdr_write = true;
- } else
- sync_hdr_write = true;
-
OCFS_BH_PUT_DATA(bhs[0]);
if (idx != -1) {
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[idx+1]);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bhs[idx+1]); /* read */
if(!IS_VALID_FILE_ENTRY(fe)) {
OCFS_BH_PUT_DATA(bhs[idx+1]);
@@ -329,29 +322,16 @@
goto bail;
}
- if ((DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num) &&
- (DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK)) {
- cached_fe_write = true;
- if (!cached_hdr_write)
- sync_fe_write = true;
- } else {
- ocfs_down_sem(&(osb->map_lock), true);
- ocfs_remove_extent_map_entry(osb, &osb->metadata_map, offset, 512);
- ocfs_remove_extent_map_entry(osb, &osb->trans_map, offset, 512);
- ocfs_up_sem(&(osb->map_lock));
- sync_fe_write = true;
- sync_hdr_write = true;
- }
+ ocfs_down_sem(&(osb->map_lock), true);
+ ocfs_remove_extent_map_entry(osb, &osb->metadata_map, offset, 512);
+ ocfs_remove_extent_map_entry(osb, &osb->trans_map, offset, 512);
+ ocfs_up_sem(&(osb->map_lock));
+ sync_fe_write = true;
OCFS_BH_PUT_DATA(bhs[idx+1]);
}
/* Write the file entry at idx, if given */
- if (cached_fe_write) {
- status = ocfs_write_bh (osb, bhs[idx+1], OCFS_BH_CACHED, file_inode);
- if (status < 0)
- LOG_ERROR_STATUS (status);
- }
if (sync_fe_write) {
status = ocfs_write_bh (osb, bhs[idx+1], 0, file_inode);
if (status < 0)
@@ -359,18 +339,10 @@
}
/* Write the first sector last */
- if (cached_hdr_write) {
- status = ocfs_write_bh (osb, bhs[0], OCFS_BH_CACHED, dir_inode);
- if (status < 0)
- LOG_ERROR_STATUS (status);
- }
- if (sync_hdr_write) {
- status = ocfs_write_bh (osb, bhs[0], 0, dir_inode);
- if (status < 0)
- LOG_ERROR_STATUS (status);
- }
+ status = ocfs_write_bh (osb, bhs[0], 0, dir_inode);
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
- //IF_TRACE (ocfs_print_dir_node (osb, DirNode));
bail:
LOG_EXIT_STATUS (status);
return status;
@@ -399,7 +371,7 @@
goto bail;
}
- tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
memcpy(DirNode, tmp, 512);
OCFS_BH_PUT_DATA(bhs[0]);
@@ -410,6 +382,7 @@
if (!IS_VALID_DIR_NODE (DirNode)) {
+ LOG_TRACE_STR("Invalid Dir Node!\n");
bRet = false;
goto bail;
}
@@ -424,11 +397,7 @@
}
if (found) {
- fe = FILEENT_GETBH(DirNode, bhs, i);
- if (fe == NULL) {
- // fe is locked by this kernel thread
- continue;
- }
+ fe = FILEENT_GETBH(DirNode, bhs, i); /* read */
if (fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED ||
!(fe->sync_flags & OCFS_SYNC_FLAG_VALID)) {
@@ -471,11 +440,12 @@
goto bail;
}
- tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ tmp = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
memcpy(DirNode, tmp, 512);
OCFS_BH_PUT_DATA(bhs[0]);
if (!IS_VALID_DIR_NODE (DirNode)) {
+ LOG_TRACE_STR("Invalid Dir Node!\n");
bRet = false;
goto bail;
}
@@ -492,7 +462,7 @@
OFile->curr_byte_off = i + 1;
}
- bail:
+bail:
if (DirNode)
ocfs_safefree(DirNode);
@@ -504,6 +474,7 @@
/*
* ocfs_find_index()
*
+ * Locks the dirnode bh, and then only one fe at a time.
*/
static bool ocfs_find_index (ocfs_super * osb, struct buffer_head *bhs[], struct qstr * FileName, int *Index)
{
@@ -516,7 +487,7 @@
LOG_ENTRY ();
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
if (!IS_VALID_DIR_NODE (DirNode) || FileName==NULL) {
ret = false;
goto bail;
@@ -527,15 +498,11 @@
if (DirNode->index_dirty) {
for (index = start; index < DirNode->num_ent_used; index++) {
- fe = FILEENT_GETBH(DirNode, bhs, index);
- if (fe == NULL) {
- // fe is locked by this kernel thread
- continue;
- }
+ fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
(!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
- OCFS_BH_PUT_DATA(bhs[index+1]);
+ FILEENT_PUTBH(DirNode, bhs, index);
continue;
}
q.name = fe->filename;
@@ -556,11 +523,7 @@
for (lowBnd = start, upBnd = (DirNode->num_ent_used - start); upBnd; upBnd >>= 1) {
index = lowBnd + (upBnd >> 1);
- fe = FILEENT_GETBH(DirNode, bhs, index);
- if (fe == NULL) {
- // fe is locked by this kernel thread
- continue;
- }
+ fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
(!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
@@ -568,11 +531,7 @@
FILEENT_PUTBH(DirNode, bhs, index);
for (index = lowBnd; index < (lowBnd + upBnd); index++) {
- fe = FILEENT_GETBH(DirNode, bhs, index);
- if (fe == NULL) {
- // fe is locked by this kernel thread
- continue;
- }
+ fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
(!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
FILEENT_PUTBH(DirNode, bhs, index);
@@ -634,7 +593,7 @@
{
int status = 0;
ocfs_dir_node *dir = NULL;
- ocfs_file_entry *target;
+ ocfs_file_entry *target = NULL;
ocfs_file_entry *fe;
__u32 i;
__u8 offset = 0;
@@ -652,6 +611,7 @@
LOG_ERROR_STATUS (status = -ENOMEM);
goto leave;
}
+ memset(arr, 0, bufsz);
status = ocfs_read_bhs (osb, DirNodeOffset,
osb->vol_layout.dir_node_size, arr,
@@ -663,7 +623,7 @@
} else
arr = bhs;
- dir = (ocfs_dir_node *)OCFS_BH_GET_DATA(arr[0]);
+ dir = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(arr[0]); /* write */
if (!IS_VALID_DIR_NODE (dir)) {
OCFS_BH_PUT_DATA(arr[0]);
LOG_ERROR_STATUS(status = -EINVAL);
@@ -673,18 +633,25 @@
if (dir->index_dirty) {
offset = dir->bad_off;
- target = (ocfs_file_entry *)OCFS_BH_GET_DATA(arr[offset+1]);
+ /* To preserve locking order, (we only want to lock 1
+ * fe at a time, in incremental order), we copy this
+ * one off. */
+ target = ocfs_allocate_file_entry();
+ if (target == NULL) {
+ OCFS_BH_PUT_DATA(arr[0]);
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto leave;
+ }
+ memcpy(target, OCFS_BH_GET_DATA_READ(arr[offset+1]),
+ sizeof(ocfs_file_entry)); /* read */
+ OCFS_BH_PUT_DATA(arr[offset+1]);
for (i = 0; i < dir->num_ent_used; i++) {
/* don't need to check ourselves */
if (dir->index[i] == offset)
continue;
- fe = FILEENT_GETBH(dir, arr, i);
- if (fe == NULL) {
- // buffer is locked, like the rename case
- continue;
- }
+ fe = FILEENT_GETBH(dir, arr, i); /* read */
if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
(!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
@@ -701,8 +668,6 @@
FILEENT_PUTBH(dir, arr, i);
}
- OCFS_BH_PUT_DATA(arr[offset+1]);
-
if (i < dir->num_ent_used - 1) {
memmove (&dir->index[i+1], &dir->index[i],
dir->num_ent_used - i);
@@ -712,17 +677,22 @@
dir->index_dirty = 0;
OCFS_BH_PUT_DATA(arr[0]);
- status = ocfs_write_dir_node (osb, arr, -1, dir_inode, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
+ if (!handle) {
+ status = ocfs_write_dir_node (osb, arr, -1, dir_inode,
+ NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
}
} else
OCFS_BH_PUT_DATA(arr[0]);
- leave:
+leave:
if (bhs == NULL)
ocfs_safefree (arr);
+ if (target)
+ ocfs_release_file_entry(target);
LOG_EXIT_STATUS (status);
return status;
@@ -745,7 +715,7 @@
LOG_ENTRY ();
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
if (!IS_VALID_DIR_NODE (DirNode)) {
LOG_ERROR_STATUS(status = -EINVAL);
@@ -762,7 +732,7 @@
LOG_ERROR_STATUS (status);
goto bail;
}
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
}
/* Should status be updated here? */
@@ -784,21 +754,17 @@
status = -EEXIST;
goto bail;
}
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
if (index < DirNode->num_ent_used) {
- fe = FILEENT_GETBH(DirNode, bhs, index);
- if (fe == NULL) {
- // fe is locked by this kernel thread
- index = 0;
- } else {
- res = strcmp (fe->filename, InsertEntry->filename);
- FILEENT_PUTBH(DirNode, bhs, index);
- if (res > 0) {
- /* We are greater than the entry in question we
- * should be less than the one next to it */
- index++;
- }
+ fe = FILEENT_GETBH(DirNode, bhs, index); /* read */
+
+ res = strcmp (fe->filename, InsertEntry->filename);
+ FILEENT_PUTBH(DirNode, bhs, index);
+ if (res > 0) {
+ /* We are greater than the entry in question we
+ * should be less than the one next to it */
+ index++;
}
}
} else {
@@ -819,7 +785,7 @@
freeOffset = DirNode->first_del;
DirNode->num_del--;
if (DirNode->num_del) {
- lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[freeOffset+1]);
+ lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bhs[freeOffset+1]); /* read */
DirNode->first_del = lastEntry->next_del;
OCFS_BH_PUT_DATA(bhs[freeOffset+1]);
}
@@ -849,7 +815,7 @@
}
/* Put the entry at the end */
- lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[freeOffset+1]);
+ lastEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bhs[freeOffset+1]); /* write */ /* journal access */
InsertEntry->dir_node_ptr = DirNode->node_disk_off;
memcpy (lastEntry, InsertEntry, osb->sect_size);
OCFS_SET_FLAG (lastEntry->sync_flags, OCFS_SYNC_FLAG_VALID);
@@ -898,7 +864,6 @@
struct buffer_head *dirbh = NULL;
struct buffer_head **dirbhs = NULL;
__u64 lock_off, head_del, parent_off;
- bool journal_lockbh = false;
const int numbhs = 256;
const int length = numbhs * sizeof(struct buffer_head *);
@@ -912,10 +877,8 @@
}
memset(dirbhs, 0, length);
- EntryToDel = (ocfs_file_entry *) OCFS_BH_GET_DATA(febh);
-
- /* briefly grab LockNode and get useful bits of info */
- LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(lockbh);
+ /* briefly grab LockNode and get useful bits of info. */
+ LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(lockbh); /* read */
lock_off = LockNode->node_disk_off;
head_del = LockNode->head_del_ent_node;
OCFS_BH_PUT_DATA(lockbh);
@@ -927,13 +890,13 @@
goto leave;
}
+ EntryToDel = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(febh); /* write */
+
/* if fe comes from lower down in the dir chain, get the ocfs_dir_node
* for that chain. otherwise, use the lockbh (toplevel) */
if (EntryToDel->dir_node_ptr == lock_off) {
- journal_lockbh = false;
dirbhs[0] = lockbh;
} else {
- journal_lockbh = true;
status = ocfs_read_bh(osb, EntryToDel->dir_node_ptr,
&dirbh, OCFS_BH_CACHED, dir_inode);
if (status < 0) {
@@ -942,14 +905,14 @@
}
dirbhs[0] = dirbh;
- status= ocfs_journal_access(handle, dirbh,
+ status= ocfs_journal_access(handle, dirbhs[0],
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
}
- PDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dirbhs[0]);
+ PDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(dirbhs[0]); /* write */ /* journal access */
parent_off = PDirNode->node_disk_off;
offset= ((EntryToDel->this_sector - parent_off) >> 9) - 1;
for (index = 0; index < PDirNode->num_ent_used; index++)
@@ -996,12 +959,10 @@
EntryToDel = NULL;
PDirNode = NULL;
- LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(lockbh);
- if (LockNode->head_del_ent_node == INVALID_NODE_POINTER) {
- if (lock_off != parent_off)
- journal_lockbh = true;
+ LockNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(lockbh); /* write */ /* journal access */
+ if (LockNode->head_del_ent_node == INVALID_NODE_POINTER)
LockNode->head_del_ent_node = parent_off;
- }
+
OCFS_BH_PUT_DATA(lockbh);
LockNode = NULL;
@@ -1010,12 +971,10 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- if (journal_lockbh) {
- status = ocfs_journal_dirty(handle, lockbh);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ status = ocfs_journal_dirty(handle, lockbh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
}
leave:
@@ -1093,7 +1052,7 @@
DISK_LOCK_READER_NODE (InsertEntry) = osb->node_num;
/* route the new file entry to the proper dir_off */
- LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+ LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(lock_bh); /* read */
locknode_off = LockNode->node_disk_off;
locknode_head_del = LockNode->head_del_ent_node;
@@ -1123,7 +1082,7 @@
}
/* see if it fits at dir_off */
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
dir_num_ent_used = DirNode->num_ent_used;
dir_next_node = DirNode->next_node_ptr;
dir_cache_lock = ((DISK_LOCK_FILE_LOCK(DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
@@ -1158,7 +1117,7 @@
goto leave;
}
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
dir_num_ent_used = DirNode->num_ent_used;
dir_next_node = DirNode->next_node_ptr;
OCFS_BH_PUT_DATA(bhs[0]);
@@ -1182,7 +1141,7 @@
}
}
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
dir_num_ent_used = DirNode->num_ent_used;
dir_next_node = DirNode->next_node_ptr;
dir_cache_lock = ((DISK_LOCK_FILE_LOCK(DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
@@ -1204,12 +1163,14 @@
goto leave;
}
- if (new_head_del != 0) {
- LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+ /* we always want to mark lock_bh at least once as it's going
+ * to the journal, so leave this get_data_write outside of the
+ * if statement. */
+ LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(lock_bh); /* write */ /* journal access */
+ if (new_head_del != 0)
LockNode->head_del_ent_node = new_head_del;
- OCFS_BH_PUT_DATA(lock_bh);
- LockNode = NULL;
- }
+ OCFS_BH_PUT_DATA(lock_bh);
+ LockNode = NULL;
/* If we have a list of dir nodes go to the last dirnode */
/* and insert in that. */
@@ -1296,11 +1257,11 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- buf = OCFS_BH_GET_DATA(newbhs[i]);
+ buf = OCFS_BH_GET_DATA_WRITE(newbhs[i]); /* write */
memset(buf, 0, 512);
OCFS_BH_PUT_DATA(newbhs[i]);
}
- pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(newbhs[0]);
+ pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(newbhs[0]); /* write */
ocfs_initialize_dir_node (osb, pNewDirNode,
bitmapOffset, fileOffset,
osb->node_num);
@@ -1308,7 +1269,7 @@
pNewDirNode = NULL;
}
- pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(newbhs[0]);
+ pNewDirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(newbhs[0]); /* write */
new_disk_off = pNewDirNode->node_disk_off;
if (dir_cache_lock) {
DISK_LOCK_CURRENT_MASTER (pNewDirNode) = osb->node_num;
@@ -1323,13 +1284,13 @@
file_inode);
if (status >= 0) {
- LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+ LockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(lock_bh); /* write */
LockNode->free_node_ptr = new_disk_off;
OCFS_BH_PUT_DATA(lock_bh);
LockNode = NULL;
/* Setup the pointer to this new directory block */
- DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(bhs[0]);
+ DirNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(bhs[0]); /* write */
DirNode->next_node_ptr = new_disk_off;
OCFS_BH_PUT_DATA(bhs[0]);
DirNode = NULL;
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/dlm.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -189,7 +189,7 @@
}
for (i = 0; i < numnodes; i++) {
- p = OCFS_BH_GET_DATA(bhs[i]);
+ p = OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
pubsect = (ocfs_publish *) p;
if (pubsect->time == (__u64) 0 || pubsect->publ_seq_num <= largestseqno) {
OCFS_BH_PUT_DATA(bhs[i]);
@@ -220,7 +220,7 @@
/* Increment the largest sequence number by one & */
/* write it in its own Publish Sector and set the Dirty Bit */
- p = OCFS_BH_GET_DATA(bhs[osb->node_num]);
+ p = OCFS_BH_GET_DATA_WRITE(bhs[osb->node_num]); /* write */
pubsect = (ocfs_publish *)p;
largestseqno++;
LOG_TRACE_ARGS ("largestseqno : %u.%u\n", HILO (largestseqno));
@@ -277,6 +277,8 @@
__u32 timewaited = 0;
ocfs_file_entry *fe = NULL;
struct buffer_head *bh = NULL;
+ __u32 curr_master;
+ __u8 lock_level;
LOG_ENTRY ();
@@ -292,42 +294,39 @@
LOG_ERROR_STATUS (status = tmpstat);
goto finally;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+ curr_master = DISK_LOCK_CURRENT_MASTER (fe);
+ lock_level = DISK_LOCK_FILE_LOCK (fe);
+ OCFS_BH_PUT_DATA(bh);
/* This will always be zero when the first Node comes up after reboot */
/* (for volume lock) */
- if ((DISK_LOCK_CURRENT_MASTER (fe) == OCFS_INVALID_NODE_NUM) ||
- (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) {
+ if ((curr_master == OCFS_INVALID_NODE_NUM) ||
+ (curr_master == osb->node_num)) {
goto got_it;
}
- if (!IS_NODE_ALIVE (osb->publ_map,
- DISK_LOCK_CURRENT_MASTER (fe),
- OCFS_MAXIMUM_NODES)) {
-// LOG_TRACE_ARGS ("old_ocfs_recover_vol(%u)\n",
-// DISK_LOCK_CURRENT_MASTER (fe));
-// old_ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(fe));
-
+ if (!IS_NODE_ALIVE (osb->publ_map, curr_master, OCFS_MAXIMUM_NODES)) {
/* Reset the lock as not owned and return success?? */
/* This needs to be under some sort of cluster wide lock */
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ OCFS_BH_PUT_DATA(bh);
goto got_it;
}
/* If we are here in the code it means the local node is not the master */
- if (DISK_LOCK_FILE_LOCK (fe) <= lock_type)
+ if (lock_level <= lock_type)
goto got_it;
- OCFS_BH_PUT_DATA(bh);
brelse(bh);
ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
timewaited += WAIT_FOR_VOTE_INCREMENT;
continue;
got_it:
+ brelse(bh);
status = 0;
- OCFS_BH_PUT_DATA(bh);
- brelse(bh);
break;
}
@@ -347,6 +346,8 @@
__u32 timewaited = 0;
ocfs_file_entry *fe = NULL;
struct buffer_head *bh = NULL;
+ __u32 curr_master;
+ __u8 lock_level;
LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x, %u)\n", osb,
HI (offset), LO (offset), time_to_wait,
@@ -361,54 +362,53 @@
LOG_ERROR_STATUS (status = tmpstat);
goto finally;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+ curr_master = DISK_LOCK_CURRENT_MASTER (fe);
+ lock_level = DISK_LOCK_FILE_LOCK (fe);
+ OCFS_BH_PUT_DATA(bh);
- if ((DISK_LOCK_CURRENT_MASTER (fe) == OCFS_INVALID_NODE_NUM) ||
- (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) {
+ if ((curr_master == OCFS_INVALID_NODE_NUM) ||
+ (curr_master == osb->node_num)) {
goto got_it;
}
- if (!IS_NODE_ALIVE (osb->publ_map, DISK_LOCK_CURRENT_MASTER(fe),
- OCFS_MAXIMUM_NODES)) {
-// LOG_ERROR_ARGS ("old_ocfs_recover_vol(%u)",
-// DISK_LOCK_CURRENT_MASTER (fe));
-// old_ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(fe));
-
+ if (!IS_NODE_ALIVE (osb->publ_map, curr_master, OCFS_MAXIMUM_NODES)) {
/* Reset the lock as not owned and return success?? */
/* This needs to be under some sort of cluster wide lock, */
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ OCFS_BH_PUT_DATA(bh);
goto got_it;
}
/* The local node is not the master */
- if (DISK_LOCK_FILE_LOCK (fe) >= OCFS_DLM_ENABLE_CACHE_LOCK) {
+ if (lock_level == OCFS_DLM_ENABLE_CACHE_LOCK) {
int tmpstat;
- lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
- lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
+ lockres->lock_type = lock_level;
+ lockres->master_node_num = curr_master;
status = ocfs_break_cache_lock (osb, lockres, inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto finally;
}
- OCFS_BH_PUT_DATA(bh);
- brelse(bh);
tmpstat = ocfs_read_bh (osb, offset, &bh, 0, inode);
if (tmpstat < 0) {
LOG_ERROR_STATUS (tmpstat);
status = tmpstat;
goto finally;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ lock_level = OCFS_DLM_NO_LOCK;
+ OCFS_BH_PUT_DATA(bh);
}
- if (DISK_LOCK_FILE_LOCK (fe) <= lock_type)
+ if (lock_level <= lock_type)
goto got_it;
- OCFS_BH_PUT_DATA(bh);
brelse(bh);
ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
timewaited += WAIT_FOR_VOTE_INCREMENT;
@@ -421,18 +421,18 @@
finally:
if (lockres && status >= 0) {
ocfs_acquire_lockres (lockres);
- if (fe) {
- lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
- lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
- lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
- lockres->last_lock_upd = DISK_LOCK_LAST_WRITE (fe);
- } else
- LOG_ERROR_STR("fe was null!");
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+ lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
+ lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
+ lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
+ lockres->last_lock_upd = DISK_LOCK_LAST_WRITE (fe);
ocfs_release_lockres (lockres);
OCFS_BH_PUT_DATA(bh);
- brelse(bh);
}
+ if (bh)
+ brelse(bh);
+
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_wait_for_lock_release */
@@ -520,7 +520,7 @@
bool node_in_map = (IS_NODE_ALIVE (vote_map, i, numnodes));
status = 0;
- p = OCFS_BH_GET_DATA(bhs[i]);
+ p = OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
vote = (ocfs_vote *) p;
/* A node we were asking to vote is dead */
@@ -586,7 +586,7 @@
LOG_ERROR_STATUS (status);
goto finally;
}
- pubsect = (ocfs_publish *)OCFS_BH_GET_DATA(bh);
+ pubsect = (ocfs_publish *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
pubsect->dirty = false;
pubsect->vote = 0;
@@ -607,7 +607,7 @@
atomic_set (&osb->node_req_vote, 0);
- finally:
+finally:
if (bh != NULL)
brelse(bh);
up (&(osb->publish_lock));
@@ -778,7 +778,7 @@
atomic_read (&lockres->voted_event_woken), 1000);
atomic_set (&lockres->voted_event_woken, 0);
- finally:
+finally:
ocfs_safefree (dlm_msg);
LOG_EXIT_STATUS (status);
return status;
@@ -800,7 +800,7 @@
LOG_ENTRY ();
ocfs_acquire_lockres (lockres);
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
vote_map = osb->publ_map;
if (((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) &&
@@ -951,7 +951,7 @@
}
}
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_acquire_lockres_ex */
@@ -1001,7 +1001,7 @@
LOG_ERROR_STATUS (status);
goto finally;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(*bh); /* write */
if (flags & DLOCK_FLAG_MASTER)
DISK_LOCK_CURRENT_MASTER (fe) = lockres->master_node_num;
@@ -1090,7 +1090,7 @@
}
}
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_update_master_on_open */
@@ -1272,7 +1272,7 @@
ocfs_release_lockres (lockres);
- bail:
+bail:
ocfs_put_lockres(lockres);
LOG_EXIT_STATUS (status);
return status;
@@ -1368,8 +1368,9 @@
LOG_ERROR_STATUS (status);
goto finally;
}
- disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+ disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
+#ifdef SUSPICIOUS_CODE
// This code is added to avoid the case when fileentry is not yet updated
// but the lockresource is updated by NMthread and needsflush is set to FALSE.
if (lockres->master_node_num != osb->node_num &&
@@ -1379,6 +1380,7 @@
ocfs_sleep (1000);
goto again;
}
+#endif
if (lockres->master_node_num != osb->node_num ||
lockres->master_node_num != DISK_LOCK_CURRENT_MASTER (disklock)) {
@@ -1417,19 +1419,26 @@
fast_path = true;
if (local_lock && truncate_extend) {
#ifdef VERBOSE_LOCKING_TRACE
- printk("local_lock but an extend or truncate request! will do a master_request.\n");
+ LOG_TRACE_ARGS("local_lock but an extend or truncate request! will do a master_request.\n");
#endif
become_master = true;
}
if (!(fast_path || become_master || get_x || wait_for_release))
master_request = true;
+ /* hack upon hack... if the cachelock is still sitting around, skip voting */
+ if (!fast_path && (become_master || get_x)) {
+ if (ocfs_journal_new_file_search(osb, lock_id)==0) {
+ fast_path = true;
+ }
+ }
+
/* possible locking paths: */
/* fast_path, become_master, get_x, */
/* wait_for_release, master_request */
#ifdef VERBOSE_LOCKING_TRACE
- printk("acquire_lock: lock path is %s\n",
+ LOG_TRACE_ARGS("acquire_lock: lock path is %s\n",
fast_path ? "fast_path" :
(become_master ? "become_master" :
(get_x ? "get_x" :
@@ -1441,7 +1450,7 @@
if (fast_path) {
/* specifically keep an exclusive if we already have one on */
/* this node even if we are asking for a cache lock */
- disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+ disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
keep_exclusive = (DISK_LOCK_FILE_LOCK (disklock) == OCFS_DLM_EXCLUSIVE_LOCK);
OCFS_BH_PUT_DATA(*b);
goto got_lock;
@@ -1546,40 +1555,32 @@
got_lock:
- disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+ disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
have_cache_already = (DISK_LOCK_CURRENT_MASTER (disklock) == osb->node_num &&
DISK_LOCK_FILE_LOCK (disklock) == OCFS_DLM_ENABLE_CACHE_LOCK);
- DISK_LOCK_CURRENT_MASTER (disklock) = osb->node_num;
+ OCFS_BH_PUT_DATA(*b);
- if (!keep_exclusive) {
+ if (!keep_exclusive && !have_cache_already) {
+ disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(*b); /* write */
+ DISK_LOCK_CURRENT_MASTER (disklock) = osb->node_num;
DISK_LOCK_FILE_LOCK (disklock) = lock_type;
OCFS_BH_PUT_DATA(*b);
- if (have_cache_already)
- LOG_TRACE_STR("have cachelock already... skip the write");
- else if (lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) {
- status = ocfs_write_bh (osb, *b, 0, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
+ status = ocfs_write_bh (osb, *b, 0, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
}
- } else
- OCFS_BH_PUT_DATA(*b);
-
- status = ocfs_write_bh (osb, *b, lockflags, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- } else {
- /* We got the lock */
- disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
- lockres->lock_type = lock_type;
- lockres->master_node_num = osb->node_num;
- lockres->oin_openmap = DISK_LOCK_OIN_MAP (disklock);
- OCFS_BH_PUT_DATA(*b);
- status = 0;
}
+ /* We got the lock */
+ disklock = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
+ lockres->lock_type = lock_type;
+ lockres->master_node_num = osb->node_num;
+ lockres->oin_openmap = DISK_LOCK_OIN_MAP (disklock);
+ OCFS_BH_PUT_DATA(*b);
+ status = 0;
+
skip_lock_write:
lockres->lock_holders++;
LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
@@ -1617,15 +1618,18 @@
__u64 oin_node_map;
__u32 curr_master;
int lockflags = (lock_id >= osb->vol_layout.bitmap_off ? OCFS_BH_CACHED : 0);
+ bool clear_tmp = false;
LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (lock_id),
LO (lock_id), lock_type, flags, lockres);
if (bh != NULL)
b = &bh;
- else
+ else {
b = &tmpbh;
-
+ clear_tmp = true;
+ }
+
if (bh == NULL) {
status = ocfs_read_bh (osb, lock_id, b, lockflags, inode);
if (status < 0) {
@@ -1633,7 +1637,7 @@
goto finito;
}
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*b); /* read */
oin_node_map = DISK_LOCK_OIN_MAP (fe);
curr_master = DISK_LOCK_CURRENT_MASTER (fe);
OCFS_BH_PUT_DATA(*b);
@@ -1757,7 +1761,7 @@
LOG_ERROR_STATUS (tmpstat);
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*b);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(*b); /* write */
if (flags & FLAG_FILE_RELEASE_MASTER)
DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
@@ -1774,15 +1778,18 @@
/* Reset the lock on the disk */
if (!cachelock) {
- tmpstat = ocfs_write_bh (osb, *b, lockflags, inode);
+ tmpstat = ocfs_write_bh (osb, *b, 0, inode);
if (tmpstat < 0)
LOG_ERROR_STATUS (tmpstat);
+ clear_tmp = false;
}
finito:
- if (tmpbh)
+ if (tmpbh) {
+ if (clear_tmp)
+ ocfs_clear_buffer_modified(tmpbh);
brelse(tmpbh);
-
+ }
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_disk_release_lock */
@@ -1802,6 +1809,14 @@
ocfs_acquire_lockres (lockres);
+ if (bh) {
+ /* always get the write lock on the bh */
+ /* make sure to do this AFTER the lockres acquire */
+ OCFS_BH_GET_DATA_WRITE(bh);
+ OCFS_BH_PUT_DATA(bh);
+ }
+
+
if (lock_type == OCFS_DLM_SHARED_LOCK) {
if (atomic_dec_and_test (&lockres->lr_share_cnt)) {
if (lockres->lock_type == OCFS_DLM_SHARED_LOCK)
@@ -1837,6 +1852,8 @@
}
finally:
+ if (bh)
+ ocfs_clear_buffer_modified(bh);
lockres->lock_holders--;
LOG_TRACE_ARGS("lockres->lock_holders = %u\n", lockres->lock_holders);
ocfs_release_lockres (lockres);
@@ -2006,7 +2023,8 @@
LOG_TRACE_STR ("Network vote");
jif = jiffies;
status = ocfs_send_dlm_request_msg (osb, lockres->sector_num,
- lockres->lock_type, FLAG_FILE_RELEASE_CACHE,
+ lockres->lock_type,
+ FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE,
lockres, votemap);
if (status >= 0) {
status = lockres->vote_status;
@@ -2031,8 +2049,8 @@
disk_reset = false;
status = ocfs_request_vote (osb, lockres->sector_num,
lockres->lock_type,
- FLAG_FILE_RELEASE_CACHE, votemap,
- &lockseqno, inode);
+ FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE,
+ votemap, &lockseqno, inode);
if (status < 0) {
if (status == -EAGAIN) {
retry = true;
@@ -2044,8 +2062,8 @@
status = ocfs_wait_for_vote (osb, lockres->sector_num,
lockres->lock_type,
- FLAG_FILE_RELEASE_CACHE, votemap,
- 15000, lockseqno, lockres);
+ FLAG_ACQUIRE_LOCK|FLAG_FILE_RELEASE_CACHE,
+ votemap, 15000, lockseqno, lockres);
if (status < 0) {
if (status == -EAGAIN) {
retry = true;
Modified: trunk/src/extmap.c
===================================================================
--- trunk/src/extmap.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/extmap.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -217,7 +217,7 @@
victim->sectors = sectors;
}
- bail:
+bail:
LOG_EXIT_ULONG (ret);
return ret;
@@ -628,7 +628,7 @@
status = 0;
}
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_update_extent_map */
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/file.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -41,7 +41,6 @@
__u64 parent_off;
ocfs_sem *oin_sem = NULL;
-
LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", inode, file,
file->f_dentry->d_name.len, file->f_dentry->d_name.name);
atomic_inc (&parent->i_count);
@@ -66,6 +65,7 @@
if (inode_data_is_oin (inode))
oin = GET_INODE_OIN(inode);
status = -EFAIL;
+
if (oin != NULL) {
if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN) &&
!(oin->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) {
@@ -98,6 +98,8 @@
oin = NULL;
ocfs_down_sem (&(osb->osb_res), true);
status = ocfs_create_oin_from_entry (osb, fe_bh, &oin, parent_off, inode);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
new_oin = true;
ocfs_up_sem (&(osb->osb_res));
}
@@ -105,7 +107,10 @@
if (status < 0) { /* not found on disk or in mem */
if (status != -EINTR) {
- LOG_ERROR_STR ("Open request made for nonexistent file!");
+ LOG_ERROR_ARGS("Open request made for nonexistent "
+ "file! ('%*s')",
+ file->f_dentry->d_name.len,
+ file->f_dentry->d_name.name);
status = -ENOENT;
}
goto leave;
@@ -156,6 +161,7 @@
goto leave;
}
}
+
if (oin->open_hndl_cnt > 0) {
/* The OIN is currently in use by some thread. */
/* We must check whether the requested access/share access */
@@ -431,7 +437,6 @@
int tmpstat;
ocfs_file_entry *fileEntry = NULL;
__u64 dirOffset = 0;
- __u32 size;
bool bAcquiredLock = false;
ocfs_lock_res *pLockResource = NULL;
__u64 changeSeqNum = 0;
@@ -452,7 +457,7 @@
goto leave;
}
- fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
if (!IS_VALID_FILE_ENTRY(fileEntry)) {
LOG_ERROR_ARGS ("Invalid fe at offset %u.%u", HILO (*file_off));
@@ -500,7 +505,7 @@
goto leave;
}
- fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
if (bCacheLock) {
DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_ENABLE_CACHE_LOCK;
@@ -517,7 +522,6 @@
fileEntry->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE);
dirOffset = fileEntry->this_sector;
- size = (__u32) OCFS_SECTOR_ALIGN (sizeof (ocfs_file_entry));
flags = OCFS_FE_CACHE_FLAGS(osb, fileEntry);
OCFS_BH_PUT_DATA(bh);
@@ -551,7 +555,7 @@
LOG_ERROR_STATUS (tmpstat);
ocfs_put_lockres (pLockResource);
}
-
+
if (bh != NULL)
brelse(bh);
@@ -617,11 +621,22 @@
ret = -EIO;
goto bail;
}
+
+ if (filp->f_flags & O_APPEND) {
+ LOG_TRACE_ARGS("O_APPEND: inode->i_size=%u, ppos was %u\n",
+ inode->i_size, *ppos);
+ *ppos = inode->i_size;
+ }
if (filp->f_flags & O_DIRECT) {
/* anything special for o_direct? */
LOG_TRACE_STR ("O_DIRECT");
- } else {
+ if (((*ppos) & 511) || (count & 511) ||
+ ((unsigned long)buf & 511) || (inode->i_size & 511)) {
+ filp->f_flags &= ~O_DIRECT;
+ }
+ }
+ if (!(filp->f_flags & O_DIRECT)) {
/* FIXME: is the down_sem supposed to be here?! */
LOG_TRACE_ARGS ("non O_DIRECT write, fileopencount=%d\n",
oin->open_hndl_cnt);
@@ -668,13 +683,12 @@
HI (*ppos), LO (*ppos), HI (newsize), LO (newsize),
HI (inode->i_size), LO (inode->i_size));
- if (newsize > oin->alloc_size) {
+ if (writingAtEOF) {
LOG_TRACE_ARGS
("Will need more allocation: have=%u.%u, need=%u.%u\n",
HI (oin->alloc_size), LO (oin->alloc_size), HI (newsize),
LO (newsize));
-
status = ocfs_extend_file (osb, oin->parent_dirnode_off, oin, newsize, &oin->file_disk_off, NULL, inode, NULL);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
@@ -747,6 +761,11 @@
if (filp->f_flags & O_DIRECT) {
/* anything special for o_direct? */
LOG_TRACE_STR ("O_DIRECT");
+
+ if (((*ppos) & 511) || (count & 511) ||
+ ((unsigned long)buf & 511) || (inode->i_size & 511)) {
+ filp->f_flags &= ~O_DIRECT;
+ }
}
if (OIN_NEEDS_VERIFICATION (oin)) {
@@ -796,7 +815,8 @@
ocfs_bitmap_free_head *free_head = NULL;
ocfs_journal_handle *handle = NULL;
- LOG_ENTRY ();
+ LOG_ENTRY_ARGS ("(file_off = %u.%u, file_size = %u.%u\n",
+ HILO(file_off), HILO(file_size));
changeSeqNum = osb->curr_trans_id;
@@ -815,7 +835,7 @@
goto leave;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
if (!IS_VALID_FILE_ENTRY(fe)) {
LOG_ERROR_ARGS ("Invalid fe at offset %u.%u", HILO (file_off));
status = -EFAIL;
@@ -858,7 +878,34 @@
goto leave;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ /* if oin {
+ take oin->main_res
+ take fe bh lock
+ make file and alloc _size changes
+ release bh lock
+ drop oin->mani_res
+ } else {
+ take fe bh lock
+ make file and alloc _size changes
+ release bh lock
+ recheck inode to see if we have oin now
+ if we have it now {
+ take oin->main_res
+ drop oin->mani_res
+ }
+ }
+ */
+
+ /* alright, we're going to try to get the oin at least twice
+ * in this function if it hasn't already been passed to
+ * us.. This is our first try... */
+ if (!oin && inode_data_is_oin(inode))
+ oin = GET_INODE_OIN(inode);
+
+ if (oin)
+ ocfs_down_sem(&oin->main_res, true);
+
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */
/* the file entry might have changed underneath us (while
* waiting on the lock). make sure the size is still a valid
@@ -869,6 +916,8 @@
"to size (%u.%u)!\n", HILO(fe->file_size),
HILO(file_size));
OCFS_BH_PUT_DATA(bh);
+ if (oin)
+ ocfs_up_sem(&oin->main_res);
status = -EINVAL;
LOG_ERROR_STATUS(status);
goto leave;
@@ -880,6 +929,8 @@
status = ocfs_free_extents_for_truncate (osb, fe, handle, free_head, inode);
if (status < 0) {
OCFS_BH_PUT_DATA(bh);
+ if (oin)
+ ocfs_up_sem(&oin->main_res);
LOG_ERROR_STATUS (status);
goto leave;
}
@@ -899,13 +950,20 @@
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
+ if (oin)
+ ocfs_up_sem(&oin->main_res);
LOG_ERROR_STATUS (status);
goto leave;
}
+ /* second try for the oin... */
+ if (!oin && inode_data_is_oin(inode)) {
+ oin = GET_INODE_OIN(inode);
+ ocfs_down_sem(&oin->main_res, true);
+ }
+
if (oin) {
- /* if we updated correctly then we can update the OIN */
- ocfs_down_sem (&(oin->main_res), true);
+ /* if we updated correctly then we can update the OIN */
oin->alloc_size = new_alloc_size;
ocfs_up_sem (&(oin->main_res));
}
@@ -984,7 +1042,7 @@
goto leave;
}
- fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
if (!IS_VALID_FILE_ENTRY(fileEntry)) {
printk("fe->signature=%8s\n", fileEntry->signature);
@@ -994,11 +1052,12 @@
OCFS_BH_PUT_DATA(bh);
goto leave;
}
+ OCFS_BH_PUT_DATA(bh);
+ fileEntry = NULL;
+
if (passed_handle == NULL) {
/* cannot call start_trans with a locked buffer head. */
- OCFS_BH_PUT_DATA(bh);
-
handle = ocfs_start_trans(osb, OCFS_FILE_EXTEND_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS(status = -ENOMEM);
@@ -1012,14 +1071,15 @@
#endif
/* Grab a lock on the entry found if we have more than
* 1 extents and also make this node the master*/
- fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
/* now we always take an EXTEND lock */
lockId = fileEntry->this_sector;
lockFlags = FLAG_FILE_EXTEND;
bFileLockAcquired = true;
- if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) {
+ if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
+ (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) {
bCacheLock = true;
}
OCFS_BH_PUT_DATA(bh);
@@ -1037,8 +1097,6 @@
}
bAcquiredLock = true;
} else {
- OCFS_BH_PUT_DATA(bh);
- fileEntry = NULL;
handle = passed_handle;
}
@@ -1048,7 +1106,7 @@
goto leave;
}
- fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */ /* journal access */
if (bCacheLock) {
DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_ENABLE_CACHE_LOCK;
@@ -1093,6 +1151,21 @@
OCFS_BH_PUT_DATA(bh);
fileEntry = NULL;
+ {
+ struct buffer_head *alloc_bh;
+ unsigned long block;
+ struct super_block *sb = osb->sb;
+
+ for (block = actualDiskOffset >> 9;
+ block < (actualDiskOffset+actualLength) >> 9;
+ block++) {
+ LOG_TRACE_ARGS("setting block %lu as new!\n", block);
+ alloc_bh = getblk(OCFS_GET_BLOCKDEV(sb), block, sb->s_blocksize);
+ alloc_bh->b_state |= (1UL << BH_New);
+ brelse(alloc_bh);
+ }
+ }
+
/* note: ok if oin is null here, not used in
* ocfs_allocate_extent */
status = ocfs_allocate_extent (osb, oin, bh, handle,
@@ -1102,7 +1175,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fileEntry = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bh); /* write */ /* journal access */
/* update the total allocation size here */
fileEntry->alloc_size += actualLength;
@@ -1149,9 +1222,6 @@
leave:
if (passed_handle == NULL) {
if (handle) {
- ocfs_bitmap_free_head *f = osb->alloc_free_head;
- osb->alloc_free_head = NULL;
-
if (status < 0) {
ocfs_abort_trans(handle);
} else {
@@ -1167,11 +1237,6 @@
if (status < 0)
LOG_ERROR_STATUS(status);
}
-
- if (f) {
- ocfs_process_bitmap_free_head(osb, f);
- free_bitmap_free_head(f);
- }
}
}
@@ -1192,6 +1257,7 @@
if (bh != NULL)
brelse(bh);
+
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_extend_file */
Modified: trunk/src/hash.c
===================================================================
--- trunk/src/hash.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/hash.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -27,10 +27,21 @@
#include <ocfs.h>
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HASH
+extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+
+struct _ocfs_inode_num;
+
+static int ocfs_inode_hash_prune_all(ocfs_inode_hash *h);
+static struct _ocfs_inode_num * __ocfs_inode_hash_lookup(ocfs_inode_hash *h,
+ __u64 off);
+static inline struct _ocfs_inode_num * __ocfs_hash_remove(ocfs_inode_hash *h,
+ __u64 off);
+
/*
* ocfs_insert_sector_node()
*
@@ -73,7 +84,7 @@
HILO(lock_res->sector_num));
}
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_insert_sector_node */
@@ -115,7 +126,7 @@
} else
status = -ENOENT;
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_lookup_sector_node */
@@ -147,7 +158,7 @@
ocfs_put_lockres (lock_res);
- bail:
+bail:
LOG_EXIT ();
return ;
} /* ocfs_remove_sector_node */
@@ -190,7 +201,7 @@
memset (ht->buckets, 0, (ht->size * sizeof (HASHBUCKET)));
ret = 1;
- bail:
+bail:
LOG_EXIT_LONG (ret);
return ret;
} /* ocfs_hash_create */
@@ -242,7 +253,7 @@
ocfs_safefree (ht->buckets);
ht->buckets = NULL;
- bail:
+bail:
LOG_EXIT ();
return;
} /* ocfs_hash_destroy */
@@ -356,7 +367,7 @@
/* Increment the number of entries */
ht->entries++;
- bail:
+bail:
/* Release Lock */
if (lockacqrd)
ocfs_up_sem (&(ht->hashlock));
@@ -427,7 +438,7 @@
bucket = bucket->next;
}
- bail:
+bail:
/* Release Lock */
if (lockacqrd)
ocfs_up_sem (&(ht->hashlock));
@@ -472,7 +483,7 @@
bucket = bucket->next;
}
- bail:
+bail:
/* Release Lock */
if (lockacqrd)
ocfs_up_sem (&(ht->hashlock));
@@ -530,7 +541,7 @@
data[datalen - 1] = '\0';
- bail:
+bail:
/* Release Lock */
if (lockacqrd)
ocfs_up_sem (&(ht->hashlock));
@@ -621,3 +632,1063 @@
/*-------------------------------------------- report the result */
return c;
} /* hash */
+
+
+
+
+/* bh semaphore hashtable stuff */
+
+
+ocfs_bh_sem * ocfs_bh_sem_alloc()
+{
+ return kmem_cache_alloc(OcfsGlobalCtxt.bh_sem_cache, GFP_NOFS);
+}
+
+void ocfs_bh_sem_free(ocfs_bh_sem *sem)
+{
+ kmem_cache_free(OcfsGlobalCtxt.bh_sem_cache, sem);
+}
+
+void ocfs_bh_sem_get(ocfs_bh_sem *sem)
+{
+ atomic_inc(&sem->s_refcnt);
+}
+
+void ocfs_bh_sem_put(ocfs_bh_sem *sem)
+{
+ if (atomic_dec_and_lock(&sem->s_refcnt, &OcfsGlobalCtxt.bh_sem_hash_lock)) {
+ if (buffer_modified(sem->s_bh)) {
+ LOG_ERROR_ARGS("putting last refcount of a modified buffer! block %d\n",
+ sem->s_bh->b_blocknr);
+ }
+ put_bh(sem->s_bh);
+ sem->s_bh = NULL;
+ spin_unlock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+ }
+}
+
+void ocfs_bh_sem_down(ocfs_bh_sem *sem)
+{
+ down(&sem->s_sem);
+}
+
+void ocfs_bh_sem_up(ocfs_bh_sem *sem)
+{
+ up(&sem->s_sem);
+}
+
+/* ripped right out of inode.c */
+#define ocfs_bh_hash_shift (OcfsGlobalCtxt.bh_sem_hash_sz-1)
+#define _hashfn(dev,block) \
+ ((((dev)<<(ocfs_bh_hash_shift - 6)) ^ ((dev)<<(ocfs_bh_hash_shift - 9))) ^ \
+ (((block)<<(ocfs_bh_hash_shift - 6)) ^ ((block) >> 13) ^ \
+ ((block) << (ocfs_bh_hash_shift - 12))))
+#define ocfs_bh_sem_hash_fn(_b) \
+ (_hashfn((unsigned int)((_b)->b_dev), (_b)->b_blocknr) & ocfs_bh_hash_shift)
+
+int ocfs_bh_sem_hash_init()
+{
+ int i, ret;
+
+ LOG_ENTRY();
+
+ spin_lock_init (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ OcfsGlobalCtxt.bh_sem_hash = (struct list_head *)__get_free_pages(GFP_KERNEL, 2);
+ if (!OcfsGlobalCtxt.bh_sem_hash) {
+ LOG_ERROR_STR("ENOMEM allocating ocfs_bh_sem_hash");
+ ret = -ENOMEM;
+ goto bail;
+ }
+ OcfsGlobalCtxt.bh_sem_hash_sz = (PAGE_SIZE * 4) / sizeof(struct list_head);
+
+ for (i=OcfsGlobalCtxt.bh_sem_hash_sz-1; i>=0; i--)
+ INIT_LIST_HEAD(&OcfsGlobalCtxt.bh_sem_hash[i]);
+
+ atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
+ ret = 0;
+bail:
+ LOG_EXIT();
+ return ret;
+}
+
+int ocfs_bh_sem_hash_destroy()
+{
+ int missed;
+
+ LOG_ENTRY();
+
+ while (1) {
+ missed = ocfs_bh_sem_hash_prune_all();
+ if (missed == 0)
+ break;
+ LOG_TRACE_ARGS("still have %d entries in use in hashtable\n", missed);
+ }
+
+ spin_lock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ free_pages((unsigned long)OcfsGlobalCtxt.bh_sem_hash, 2);
+ OcfsGlobalCtxt.bh_sem_hash = NULL;
+
+ LOG_EXIT();
+ return 0;
+}
+
+
+ocfs_bh_sem * ocfs_bh_sem_lookup(struct buffer_head *bh)
+{
+ int depth, bucket;
+ struct list_head *head, *iter = NULL;
+ ocfs_bh_sem *sem = NULL, *newsem = NULL;
+
+#ifdef VERBOSE_BH_SEM
+ LOG_ENTRY();
+#endif
+ bucket = ocfs_bh_sem_hash_fn(bh);
+ head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+again:
+ depth = 0;
+ spin_lock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ list_for_each(iter, head) {
+ if (++depth > OCFS_BH_SEM_HASH_PRUNE_TRIGGER) {
+ /* Grandma, what a long list you have? */
+ atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, bucket);
+ }
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (sem->s_blocknr == bh->b_blocknr &&
+ sem->s_dev == bh->b_dev) {
+ if (atomic_read(&sem->s_refcnt)==0) {
+ if (sem->s_bh) {
+ LOG_ERROR_STR("refcount was zero but s_bh not NULL!");
+ BUG();
+ }
+ get_bh(bh);
+ sem->s_bh = bh;
+ }
+ if (sem->s_bh != bh) {
+ LOG_ERROR_STR("ocfs_bh_sem bufferhead does not match!");
+ BUG();
+ }
+ break;
+ }
+ sem = NULL;
+ }
+
+ if (newsem && !sem) {
+ /* second pass, we are first to insert */
+ sem = newsem;
+ list_add(&sem->s_list, head);
+ get_bh(bh);
+ sem->s_bh = bh;
+ }
+
+ if (sem) {
+ /* found something on first or second pass */
+ ocfs_bh_sem_get(sem);
+ if (newsem != sem) {
+ /* if not just added, mru to front */
+ list_del(&sem->s_list);
+ list_add(&sem->s_list, head);
+ }
+ //LOG_TRACE_ARGS("found bh_sem for %d, modified=%s, pid=%d\n",
+ // sem->s_bh->b_blocknr,
+ // buffer_modified(sem->s_bh) ? "true" : "false",
+ // sem->s_pid);
+
+ if (buffer_modified(sem->s_bh) && sem->s_pid == 0) {
+ LOG_ERROR_ARGS("found a%s sem with a modified bh but no pid!!! (block=%d)\n",
+ newsem != sem ? "n old" : " new",
+ sem->s_bh->b_blocknr);
+ }
+ } else {
+ /* first pass. not found. do alloc */
+ spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ newsem = ocfs_bh_sem_alloc();
+ if (newsem) {
+ newsem->s_bh = NULL;
+ atomic_set(&newsem->s_refcnt, 0);
+ newsem->s_blocknr = bh->b_blocknr;
+ newsem->s_dev = bh->b_dev;
+ init_MUTEX (&newsem->s_sem);
+ init_waitqueue_head(&newsem->s_wait);
+ newsem->s_pid = 0;
+ goto again;
+ }
+ sem = NULL;
+ goto bail;
+ }
+
+ spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ if (newsem && newsem != sem) {
+ /* another thread inserted while we were sleeping */
+ ocfs_bh_sem_free(newsem);
+ }
+
+bail:
+#ifdef VERBOSE_BH_SEM
+ LOG_EXIT_PTR(sem);
+#endif
+ return sem;
+}
+
+
+/* look up the semaphore for this blocknum, and lock it too */
+/* this can obviously block if someone else already has the bh */
+int ocfs_bh_sem_lock(struct buffer_head *bh)
+{
+ ocfs_bh_sem *sem;
+ int ret;
+
+#ifdef VERBOSE_BH_SEM
+ LOG_ENTRY_ARGS("(blocknr=%u)\n", bh->b_blocknr);
+#endif
+ sem = ocfs_bh_sem_lookup(bh);
+ if (!sem)
+ BUG();
+
+ ocfs_bh_sem_down(sem);
+ if (buffer_modified(bh) && sem->s_pid != current->pid) {
+#ifdef VERBOSE_BH_SEM
+ LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
+#endif
+ ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
+ } else {
+#ifdef VERBOSE_BH_SEM
+ LOG_TRACE_ARGS("got the lock\n");
+#endif
+ ret = OCFS_BH_SEM_GOT_LOCK;
+ }
+
+#ifdef VERBOSE_BH_SEM
+ LOG_EXIT_ULONG(ret);
+#endif
+ return ret;
+}
+
+int ocfs_bh_sem_lock_modify(struct buffer_head *bh)
+{
+ ocfs_bh_sem *sem;
+ int ret;
+
+#ifdef VERBOSE_BH_SEM
+ LOG_ENTRY_ARGS("(blocknr=%u)\n", bh->b_blocknr);
+#endif
+ sem = ocfs_bh_sem_lookup(bh);
+ if (!sem)
+ BUG();
+
+ ocfs_bh_sem_down(sem);
+ ret = OCFS_BH_SEM_GOT_LOCK;
+ if (buffer_modified(bh)) {
+ //LOG_TRACE_ARGS("buffer modified\n");
+ if (sem->s_pid == 0) {
+ LOG_ERROR_ARGS("modified, but pid is 0!\n");
+ // BUG();
+ sem->s_pid = current->pid;
+
+ /* this should really be a bug, but for now, up the */
+ /* refcount as if it weren't modified */
+ ocfs_bh_sem_get(sem);
+ } else if (sem->s_pid != current->pid) {
+ LOG_TRACE_ARGS("need to wait... modified and pid is %d\n", sem->s_pid);
+ ret = OCFS_BH_SEM_WAIT_ON_MODIFY;
+ }
+ } else {
+ //LOG_TRACE_ARGS("buffer NOT modified\n");
+ /* this is the first call to modify it */
+ if (sem->s_pid != 0)
+ LOG_ERROR_ARGS("first to modify, but pid is NOT 0!\n");
+ sem->s_pid = current->pid;
+
+ /* extra refcount for the modified bh */
+ ocfs_bh_sem_get(sem);
+
+ set_buffer_modified(bh);
+ }
+
+#ifdef VERBOSE_BH_SEM
+ LOG_EXIT_ULONG(ret);
+#endif
+ return ret;
+}
+
+
+
+int ocfs_bh_sem_unlock(struct buffer_head *bh)
+{
+ ocfs_bh_sem *sem;
+
+#ifdef VERBOSE_BH_SEM
+ LOG_ENTRY();
+#endif
+ sem = ocfs_bh_sem_lookup(bh);
+ if (!sem)
+ BUG();
+
+ /* take away one ref from this lookup */
+ ocfs_bh_sem_put(sem);
+
+ ocfs_bh_sem_up(sem);
+
+ /* take away another ref from the lock lookup */
+ ocfs_bh_sem_put(sem);
+
+#ifdef VERBOSE_BH_SEM
+ LOG_EXIT();
+#endif
+ return 0;
+}
+
+
+/* returns number of pruned entries */
+int ocfs_bh_sem_hash_prune()
+{
+ int bucket, pruned;
+ struct list_head *head, *iter = NULL, *tmpiter = NULL;
+ ocfs_bh_sem *sem = NULL;
+ LIST_HEAD(tmp);
+
+ LOG_ENTRY();
+
+ /* The better to prune you with, my dear! */
+ bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
+ if (bucket == -1) {
+ pruned = 0;
+ goto bail;
+ }
+
+ spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ bucket = atomic_read(&OcfsGlobalCtxt.bh_sem_hash_target_bucket);
+ if (bucket == -1) {
+ spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+ pruned = 0;
+ goto bail;
+ }
+
+ head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+ pruned = 0;
+
+ /* run in lru order */
+ list_for_each_prev_safe(iter, tmpiter, head) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (atomic_read(&sem->s_refcnt) < 1) {
+ list_del(&sem->s_list);
+ list_add(&sem->s_list, &tmp);
+ pruned++;
+ }
+ if (pruned >= OCFS_BH_SEM_HASH_PRUNE_MAX)
+ break;
+ }
+ atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
+
+ spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ list_for_each_safe(iter, tmpiter, &tmp) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (sem->s_bh) {
+ LOG_ERROR_STR("s_bh is NOT NULL");
+ BUG();
+ }
+ list_del(&sem->s_list);
+ ocfs_bh_sem_free(sem);
+ }
+bail:
+ LOG_EXIT_ULONG(pruned);
+ return pruned;
+}
+
+int ocfs_bh_sem_hash_cleanup_pid(pid_t pid)
+{
+ int bucket, found = 0;
+ struct list_head *head, *iter = NULL;
+ ocfs_bh_sem *sem = NULL;
+
+ LOG_ENTRY();
+
+ bucket = 0;
+again:
+ spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+ list_for_each(iter, head) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+
+ if (sem->s_bh &&
+ buffer_modified(sem->s_bh) &&
+ sem->s_pid == pid) {
+ found++;
+
+ /* only do one buffer at a time. */
+ spin_unlock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ ocfs_bh_sem_down(sem);
+ sem->s_pid = 0;
+ clear_buffer_modified(sem->s_bh);
+ ocfs_bh_sem_up(sem);
+
+ /* remove ref from ocfs_bh_sem_lock_modify */
+ ocfs_bh_sem_put(sem);
+
+ goto again;
+ }
+ }
+
+ spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ if (++bucket < OcfsGlobalCtxt.bh_sem_hash_sz)
+ goto again;
+
+ if (found)
+ LOG_ERROR_ARGS("Found %d modified buffers!\n", found);
+
+ LOG_EXIT_ULONG(found);
+ return found;
+}
+
+/* returns number of missed entries */
+int ocfs_bh_sem_hash_prune_all()
+{
+ int bucket, missed;
+ struct list_head *head, *iter = NULL, *tmpiter = NULL;
+ ocfs_bh_sem *sem = NULL;
+ LIST_HEAD(tmp);
+
+ LOG_ENTRY();
+
+ missed = 0;
+ bucket = 0;
+ spin_lock(&OcfsGlobalCtxt.bh_sem_hash_lock);
+ atomic_set(&OcfsGlobalCtxt.bh_sem_hash_target_bucket, -1);
+again:
+ head = &OcfsGlobalCtxt.bh_sem_hash[bucket];
+
+ /* run in lru order */
+ list_for_each_prev_safe(iter, tmpiter, head) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (atomic_read(&sem->s_refcnt) < 1) {
+ list_del(&sem->s_list);
+ list_add(&sem->s_list, &tmp);
+ } else {
+ missed++;
+ LOG_TRACE_ARGS("missed block %lu, refcount %u, "
+ "pid = %u\n",
+ sem->s_blocknr,
+ sem->s_refcnt,
+ sem->s_pid);
+ }
+ }
+
+ if (++bucket < OcfsGlobalCtxt.bh_sem_hash_sz)
+ goto again;
+
+ LOG_TRACE_ARGS("finished pruning, missed %d entries\n", missed);
+
+ spin_unlock (&OcfsGlobalCtxt.bh_sem_hash_lock);
+
+ list_for_each_safe(iter, tmpiter, &tmp) {
+ sem = list_entry (iter, ocfs_bh_sem, s_list);
+ if (sem->s_bh) {
+ LOG_ERROR_STR("s_bh is NOT NULL");
+ BUG();
+ }
+ list_del(&sem->s_list);
+ ocfs_bh_sem_free(sem);
+ }
+
+ LOG_EXIT_ULONG(missed);
+ return missed;
+}
+
+void wait_on_buffer_modified(struct buffer_head * bh)
+{
+ ocfs_bh_sem *sem = ocfs_bh_sem_lookup(bh);
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ LOG_ENTRY_ARGS("(block=%lu, sem->s_pid=%d)\n", bh->b_blocknr,
+ sem->s_pid );
+
+ add_wait_queue(&sem->s_wait, &wait);
+ do {
+ run_task_queue(&tq_disk);
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (!buffer_modified(bh) ||
+ sem->s_pid == current->pid)
+ break;
+ schedule();
+ } while (buffer_modified(bh) && sem->s_pid != current->pid);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->s_wait, &wait);
+ ocfs_bh_sem_put(sem);
+
+ LOG_EXIT();
+}
+
+void ocfs_clear_buffer_modified(struct buffer_head *bh)
+{
+ ocfs_bh_sem *sem = NULL;
+
+ LOG_ENTRY();
+ if (buffer_modified(bh)) {
+ sem = ocfs_bh_sem_lookup(bh);
+ if (ocfs_bh_sem_lock_modify(bh) != OCFS_BH_SEM_GOT_LOCK) {
+ printk("ocfs: sem->s_pid=%d, my pid=%d\n", sem->s_pid, current->pid);
+ BUG();
+ }
+ clear_buffer_modified(bh);
+ sem->s_pid = 0;
+ ocfs_bh_sem_unlock(bh);
+ if (waitqueue_active(&sem->s_wait))
+ wake_up(&sem->s_wait);
+ /* remove ref from lookup above */
+ ocfs_bh_sem_put(sem);
+
+ /* remove additional ref from ocfs_bh_sem_lock_modify */
+ ocfs_bh_sem_put(sem);
+ }
+ LOG_EXIT();
+}
+
+typedef struct _ocfs_inode_num {
+ struct list_head i_list;
+ unsigned long i_ino;
+ __u64 i_off; /* fe->this_sector OR
+ * fe->extents[0].disk_off */
+ __u64 i_fe_off; /* used only for directory inodes,
+ * points to parent fe of
+ * dirnode. for files i_off == i_fe_off,
+ * for root directory this is 0 */
+ atomic_t i_refcnt; /* this is very short lived */
+} ocfs_inode_num;
+
+static inline ocfs_inode_num *ocfs_create_inode_num(void)
+{
+ ocfs_inode_num *inum = NULL;
+
+ inum = ocfs_malloc(sizeof(ocfs_inode_num));
+ if (inum == NULL) {
+ LOG_ERROR_STATUS(-ENOMEM);
+ goto bail;
+ }
+ memset(inum, 0, sizeof(ocfs_inode_num));
+
+ INIT_LIST_HEAD(&inum->i_list);
+ atomic_set(&inum->i_refcnt, 1);
+
+bail:
+ return(inum);
+}
+#define ocfs_free_inode_num(inum) (ocfs_free(inum))
+
+/*
+ * ocfs_inode_hash_init()
+ *
+ * init the inode hash off an osb
+ */
+int ocfs_inode_hash_init(ocfs_super *osb)
+{
+ ocfs_inode_hash *h = &osb->inode_hash;
+ int i, status = 0;
+
+ LOG_ENTRY();
+
+ spin_lock_init(&h->lock);
+ h->num_ents = 0;
+
+ /* we only want one page for the hash. */
+ h->hash = (struct list_head *)__get_free_pages(GFP_KERNEL, 0);
+ if (!h->hash) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+
+ h->size = PAGE_SIZE / sizeof(struct list_head);
+
+ LOG_TRACE_ARGS("h->size = %d\n", h->size);
+
+ for (i = 0; i < h->size; i++)
+ INIT_LIST_HEAD(&(h->hash[i]));
+
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+} /* ocfs_inode_hash_init */
+
+/*
+ * ocfs_inode_hash_prune_all
+ *
+ * forcefully prunes the hash -- anything left with ANY refcount will
+ * be deleted. Returns the number of hash items we had to prune.
+ */
+static int ocfs_inode_hash_prune_all(ocfs_inode_hash *h)
+{
+ int retval = 0;
+ int i = 0;
+ struct list_head *head;
+ struct list_head *iter = NULL;
+ struct list_head *tmpiter = NULL;
+ ocfs_inode_num *inum;
+
+ LOG_ENTRY();
+
+ spin_lock(&h->lock);
+
+ for(i = 0; i < h->size; i++) {
+ head = &h->hash[i];
+
+ if (list_empty(head))
+ continue;
+
+ list_for_each_safe(iter, tmpiter, head) {
+ inum = list_entry(iter, ocfs_inode_num, i_list);
+ if (atomic_read(&inum->i_refcnt) != 1)
+ LOG_TRACE_ARGS("inum %lu has refcount %u "
+ "(offset = %u.%u)\n",
+ inum->i_ino,
+ atomic_read(&inum->i_refcnt),
+ HILO(inum->i_off));
+
+ list_del(&inum->i_list);
+ ocfs_free_inode_num(inum);
+ retval++;
+ h->num_ents--;
+ }
+ }
+
+ spin_unlock(&h->lock);
+
+ LOG_EXIT_STATUS(retval);
+ return retval;
+} /* ocfs_inode_hash_prune_all */
+
+/*
+ * ocfs_inode_hash_destroy()
+ */
+void ocfs_inode_hash_destroy(ocfs_inode_hash *h)
+{
+ int n;
+
+ LOG_ENTRY();
+
+ /* by shutdown, we shouldn't have anything left in the hash. */
+ n = ocfs_inode_hash_prune_all(h);
+ if (n)
+ LOG_TRACE_ARGS("%d items pruned from inode hash.\n", n);
+
+ spin_lock(&h->lock);
+ free_pages((unsigned long) h->hash, 0);
+ h->hash = NULL;
+ h->num_ents = 0;
+ h->size = 0;
+
+ LOG_EXIT();
+ return;
+} /* ocfs_inode_hash_destroy */
+
+#define OCFS_INODE_HASH(h, off) ((off / 512) % (h->size))
+
+/*
+ * __ocfs_inode_hash_lookup()
+ *
+ * You MUST be holding the inode hash lock before calling this!
+ */
+static ocfs_inode_num * __ocfs_inode_hash_lookup(ocfs_inode_hash *h,
+ __u64 off)
+{
+ ocfs_inode_num *inum = NULL;
+ int bucket;
+ struct list_head *head;
+ struct list_head *iter = NULL;
+
+ if (spin_trylock(&h->lock))
+ BUG();
+
+ bucket = OCFS_INODE_HASH(h, off);
+ LOG_TRACE_ARGS("off = %u.%u, bucket = %d\n", HILO(off), bucket);
+
+ head = &h->hash[bucket];
+
+ if (list_empty(head))
+ goto bail;
+
+ list_for_each(iter, head) {
+ inum = list_entry(iter, ocfs_inode_num, i_list);
+
+ if (inum->i_off == off)
+ break;
+ inum = NULL;
+ }
+
+bail:
+ return(inum);
+} /* __ocfs_inode_hash_lookup */
+
+/*
+ * ocfs_inode_hash_lookup()
+ *
+ * lookup an offset in the hash. return '0' if it doesn't exist or error,
+ * otherwise return the inode number and (optionally) the fe_off. You
+ * have no guaruntee that it will stay in the hash after this call, or
+ * that it won't get inserted either! */
+unsigned long ocfs_inode_hash_lookup(ocfs_inode_hash *h,
+ __u64 offset,
+ __u64 *fe_off)
+{
+ unsigned long ino = 0;
+ ocfs_inode_num *inum = NULL;
+
+ LOG_ENTRY_ARGS("(offset=%u.%u)\n", HILO(offset));
+
+ spin_lock(&h->lock);
+
+ inum = __ocfs_inode_hash_lookup(h, offset);
+ if (inum) {
+ ino = inum->i_ino;
+ if (fe_off)
+ *fe_off = inum->i_fe_off;
+ }
+
+ spin_unlock(&h->lock);
+
+ LOG_EXIT_ULONG(ino);
+ return(ino);
+} /* ocfs_inode_hash_lookup */
+
+/*
+ * ocfs_inode_hash_insert()
+ *
+ * returns the inode number for that offset if it already exists in
+ * the hash, otherwise inserts a new inode and returns the inode
+ * number passed in.
+ */
+unsigned long ocfs_inode_hash_insert(ocfs_super *osb,
+ __u64 offset,
+ __u64 fe_off,
+ unsigned long ino)
+{
+ ocfs_inode_hash *h = &osb->inode_hash;
+ ocfs_inode_num *inum = NULL;
+ ocfs_inode_num *new_inum = NULL;
+ struct list_head *head;
+ int bucket;
+ unsigned long retval = ino;
+
+ LOG_ENTRY_ARGS("(offset = %u.%u, ino = %lu, fe_off = %u.%u)\n",
+ HILO(offset), ino, HILO(fe_off));
+
+again:
+ spin_lock(&h->lock);
+
+ inum = __ocfs_inode_hash_lookup(h, offset);
+
+ /* whoa, offset better be the same! */
+ if (inum && (inum->i_off != offset))
+ BUG();
+
+ if (inum && (inum->i_fe_off != fe_off))
+ BUG();
+
+ /* (second pass) we didn't find anything, insert new one. */
+ if (inum == NULL && new_inum) {
+ bucket = OCFS_INODE_HASH(h, offset);
+ head = &h->hash[bucket];
+
+ list_add(&new_inum->i_list, head);
+ h->num_ents++;
+ }
+
+ spin_unlock(&h->lock);
+
+ /* if this is our first pass and we haven't found anything,
+ * create it now and go back up to try an insert. */
+ if (inum == NULL && new_inum == NULL) {
+ new_inum = ocfs_create_inode_num();
+ new_inum->i_ino = ino;
+ new_inum->i_off = offset;
+ new_inum->i_fe_off = fe_off;
+
+ goto again;
+ }
+
+ /* we created a new one to add, but someone added it before we
+ * could start our second pass, so just clean up. */
+ if (inum && new_inum) {
+ ocfs_free(new_inum);
+ new_inum = NULL;
+ }
+
+ if (inum)
+ retval = inum->i_ino;
+
+ if (retval != ino)
+ LOG_TRACE_ARGS("Returning a different i_ino! "
+ "(offset = %u.%u, passed ino = %lu, "
+ "returned = %lu\n", HILO(offset), ino, retval);
+
+ LOG_EXIT_ULONG(retval);
+
+ return retval;
+} /* ocfs_inode_hash_insert */
+
+/*
+ * __ocfs_hash_remove()
+ *
+ * only return inum if we're supposed to free it, otherwise return NULL.
+ */
+static inline ocfs_inode_num * __ocfs_hash_remove(ocfs_inode_hash *h,
+ __u64 off)
+{
+ ocfs_inode_num *inum;
+
+ inum = __ocfs_inode_hash_lookup(h, off);
+
+ if (inum == NULL) {
+ printk("Cannot remove a nonexistent inum from hash! (%u.%u)\n",
+ HILO(off));
+
+ BUG();
+ }
+
+ if (atomic_dec_and_test(&inum->i_refcnt)) {
+ list_del(&inum->i_list);
+ h->num_ents--;
+ } else {
+ inum = NULL;
+ }
+
+ return(inum);
+} /* __ocfs_hash_remove */
+
+/*
+ * ocfs_inode_hash_remove()
+ */
+void ocfs_inode_hash_remove(ocfs_inode_hash *h, __u64 off)
+{
+ ocfs_inode_num *inum = NULL;
+
+ LOG_ENTRY_ARGS("(off = %u.%u)\n", HILO(off));
+
+ spin_lock(&h->lock);
+
+ inum = __ocfs_hash_remove(h, off);
+
+ spin_unlock(&h->lock);
+
+ if (inum)
+ ocfs_free(inum);
+
+ LOG_EXIT();
+ return;
+} /* ocfs_inode_hash_remove */
+
+/*
+ * ocfs_inode_rehash()
+ *
+ * update the offset value returned by a lookup on this key. Used
+ * during rename.
+ *
+ * TODO: This should also take an inode argument and reset
+ * the offset on that while holding the hash lock.
+ */
+int ocfs_inode_rehash(ocfs_inode_hash *h,
+ __u64 oldoff,
+ __u64 newoff,
+ __u64 new_fe_off)
+{
+ int status = 0;
+ ocfs_inode_num *inum = NULL;
+ struct list_head *head;
+ int bucket;
+
+ LOG_ENTRY_ARGS("(oldoff = %u.%u, newoff = %u.%u, "
+ "new_fe_off = %u.%u)\n",
+ HILO(oldoff), HILO(newoff), HILO(new_fe_off));
+
+ spin_lock(&h->lock);
+
+ inum = __ocfs_inode_hash_lookup(h, oldoff);
+ if (inum == NULL)
+ BUG();
+
+ list_del(&inum->i_list);
+
+ inum->i_off = newoff;
+ inum->i_fe_off = new_fe_off;
+
+ bucket = OCFS_INODE_HASH(h, newoff);
+ head = &h->hash[bucket];
+ list_add(&inum->i_list, head);
+
+ spin_unlock(&h->lock);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+} /* ocfs_inode_rehash */
+
+#if 0
+/* NOTE: After a put, you can't trust 'inum', as it may have been
+ * freed. Returns true if we freed it from memory. */
+static inline int __put_inum(ocfs_inode_hash *h, ocfs_inode_num *inum)
+{
+ int free = 0;
+
+ free = atomic_dec_and_lock(&inum->i_refcnt, &h->lock);
+
+ if (free) {
+ list_del(&inum->i_list);
+ h->num_ents--;
+ spin_unlock(&h->lock);
+ ocfs_free(inum);
+ }
+
+ return(free);
+}
+#endif
+
+/*
+ * ocfs_get_inode_from_offset()
+ *
+ * Ok, because we don't have inode->i_sem when going into this, things
+ * are a bit tricky. Basically the kernel can call clear_inode on it
+ * while we're lookup up the inode number. Clear inode will call
+ * remove, and though we've got a number, it'll have been deleted from
+ * the hash. So we up a refcount on the inode_num to avoid it being
+ * deleted during remove. This doesn't prevent the inode itself from
+ * being removed however, and we might have to recreate it.
+ */
+struct inode *ocfs_get_inode_from_offset(ocfs_super *osb,
+ __u64 offset,
+ struct buffer_head *fe_bh)
+{
+ struct inode *inode = NULL;
+ ocfs_inode_num *inum = NULL;
+ ocfs_inode_num *new_inum = NULL;
+ struct super_block *sb = osb->sb;
+ unsigned long new_ino = 0;
+ ocfs_inode_hash *h = &(osb->inode_hash);
+ int bucket;
+ struct list_head *head;
+ ocfs_find_inode_args args;
+ __u64 fe_off;
+ ocfs_file_entry *fe;
+
+ LOG_ENTRY_ARGS("(offset = %u.%u)\n", HILO(offset));
+
+ /* This is ugly, but...
+ * There are several cases where we may not want an inode:
+ * 1) any time during 1st mount (root_start_off will be 0)
+ * 2) any system file, EXCEPT the journal as JBD requires one
+ */
+ if (osb->vol_layout.root_start_off == 0
+ || offset < osb->vol_layout.root_start_off) {
+ /* OHMYGODTHISISTHEUGLIESTIFEVER */
+ if (offset < (JOURNAL_FILE_BASE_ID * osb->sect_size
+ + osb->vol_layout.root_int_off)
+ ||
+ offset >= ((JOURNAL_FILE_BASE_ID + OCFS_MAXIMUM_NODES)
+ * osb->sect_size
+ + osb->vol_layout.root_int_off)) {
+ printk("skipping inode create for %u.%u\n",
+ HILO(offset));
+ goto bail;
+ }
+ }
+
+ /* if they ask for the root dirnode, just return it. */
+ if (offset == osb->vol_layout.root_start_off) {
+ LOG_TRACE_ARGS("Asked for root dirnode (%u.%u)\n",
+ HILO(offset));
+
+ inode = osb->sb->s_root->d_inode;
+
+ /* should we iget it or not? i suppose if you're in
+ * here and you've asked for the root inode you don't
+ * know what it is and will prolly iput it later... */
+ if (inode)
+ atomic_inc(&inode->i_count);
+ goto bail;
+ }
+
+ /* if it's a directory, we want the parent fe off so get it here. */
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh);
+ if (!IS_VALID_FILE_ENTRY(fe)) {
+ OCFS_BH_PUT_DATA(fe_bh);
+ LOG_ERROR_STATUS(-EINVAL);
+ goto bail;
+ }
+ fe_off = fe->this_sector;
+ OCFS_BH_PUT_DATA(fe_bh);
+
+ /* this is allowed to be slow. Create the inode num 1st to
+ * simplify stuff.*/
+ new_inum = ocfs_create_inode_num();
+ if (new_inum == NULL) {
+ LOG_ERROR_STATUS(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock(&h->lock);
+
+ inum = __ocfs_inode_hash_lookup(h, offset);
+ LOG_TRACE_ARGS("return from lookup, inum=0x%x\n", inum);
+
+ /* if not found, insert it into hash (create new one) and inc
+ * refcount */
+ if (!inum) {
+ inum = new_inum;
+
+ new_ino = iunique(sb, OCFS_ROOT_INODE_NUMBER);
+ inum->i_ino = new_ino;
+ inum->i_off = offset;
+ inum->i_fe_off = fe_off;
+
+ LOG_TRACE_ARGS("Allocating a new inode number, "
+ "(offset = %u.%u, i_ino = %lu\n",
+ HILO(offset), new_ino);
+
+ bucket = OCFS_INODE_HASH(h, offset);
+ head = &h->hash[bucket];
+ list_add(&inum->i_list, head);
+ h->num_ents++;
+ }
+
+ atomic_inc(&inum->i_refcnt);
+
+ spin_unlock(&h->lock);
+
+ if (inum != new_inum)
+ ocfs_free(new_inum);
+
+ /* call iget4, return inode */
+ args.offset = offset;
+ args.fe_bh = fe_bh;
+ inode = iget4(sb, inum->i_ino, (find_inode_t) ocfs_find_inode, &args);
+ if (!inode || is_bad_inode (inode)) {
+ LOG_ERROR_STATUS(-EINVAL);
+ if (inode) {
+ iput(inode);
+ inode = NULL;
+ }
+ /* we want to cleanup after ourselves. */
+ atomic_dec(&inum->i_refcnt);
+ ocfs_inode_hash_remove(h, inum->i_off);
+ inum = NULL;
+ }
+
+bail:
+ /* We don't have to worry about freeing the inum after this
+ * dec because the inode cannot have been destroyed yet (it's
+ * still got a refcount of at least 1) */
+ if (inum)
+ atomic_dec(&inum->i_refcnt);
+
+ if (inode)
+ LOG_TRACE_ARGS("returning inode with number %lu\n",
+ inode->i_ino);
+
+ LOG_EXIT_PTR(inode);
+
+ return(inode);
+} /* ocfs_get_inode_from_offset */
Modified: trunk/src/heartbeat.c
===================================================================
--- trunk/src/heartbeat.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/heartbeat.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -71,7 +71,7 @@
}
}
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(*pub_bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(*pub_bh); /* write */
if ((publish->dirty) && (!osb->publish_dirty)) {
LOG_TRACE_STR(("NMThread reads the bit as dirty"));
publish->dirty = false;
@@ -136,7 +136,7 @@
if (first_time) {
#if !defined(USERSPACE_TOOL)
/* Read the last comm_seq_num */
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(bhs[osb->node_num]);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bhs[osb->node_num]); /* read */
spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
OcfsGlobalCtxt.comm_seq_num = publish->comm_seq_num + 10;
spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
@@ -144,7 +144,7 @@
#endif
/* Refresh local buffers */
for (i = 0; i < num_nodes; i++) {
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(bhs[i]);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
node_map->time[i] = publish->time;
node_map->scan_rate[i] = publish->hbm[i];
node_map->scan_time[i] = curr_time;
@@ -166,7 +166,7 @@
}
for (i = 0; i < num_nodes; i++) {
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(bhs[i]);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bhs[i]); /* read */
/* Loop if slot is unused */
if (publish->time == (__u64) 0) {
Modified: trunk/src/inc/journal.h
===================================================================
--- trunk/src/inc/journal.h 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inc/journal.h 2004-01-24 01:22:15 UTC (rev 15)
@@ -102,6 +102,25 @@
struct list_head lock_list;
};
+#ifdef OCFS_PARANOID_ABORTS
+typedef struct _ocfs_journal_copyout ocfs_journal_copyout;
+struct _ocfs_journal_copyout {
+ unsigned long blocknr; /* what block is this for? */
+ char *data; /* the actual data */
+};
+
+#define ocfs_handle_free_all_copyout(handle) \
+do { \
+ while (handle->num_co) { \
+ handle->num_co--; \
+ if (handle->co_buffs[handle->num_co].data) \
+ ocfs_free(handle->co_buffs[handle->num_co].data); \
+ } \
+ ocfs_free(handle->co_buffs); \
+ handle->co_buffs = NULL; \
+} while (0)
+#endif
+
typedef struct _ocfs_journal_handle ocfs_journal_handle;
struct _ocfs_journal_handle {
handle_t *k_handle; /* kernel handle. */
@@ -130,6 +149,32 @@
struct list_head locks; /* A bunch of locks to
* release on commit/abort. This
* should be a list_head */
+
+#ifdef OCFS_PARANOID_ABORTS
+ int num_co;
+ ocfs_journal_copyout *co_buffs; /* Copy-out buffers. On 1st
+ * journal_access of a buffer
+ * we make a copy of it into
+ * one of these. That way if we
+ * abort we can place the
+ * original copy back into the
+ * buffer. */
+#endif
+ /* The next two structures are ONLY to be used for local alloc
+ * code. It's very, very ugly. */
+ struct _ocfs_bitmap_free_head *commit_bits; /* bits to be
+ * freed ONLY if
+ * we commit the
+ * handle. */
+ struct _ocfs_bitmap_free_head *abort_bits; /* bits to be
+ * freed ONLY if
+ * we abort the
+ * handle. */
+ __u64 new_file_lockid; /* offset for the
+ * most recently
+ * created file
+ * sitting on this
+ * journal handle */
};
/* should we checkpoint this handle on commit? */
@@ -184,6 +229,7 @@
int ocfs_journal_wipe(ocfs_journal *journal, int full);
int ocfs_journal_load(ocfs_journal *journal);
void ocfs_recovery_thread(struct _ocfs_super *osb, int node_num);
+int ocfs_journal_new_file_search(struct _ocfs_super *osb, __u64 lockid);
/*
* Transaction Handling:
@@ -311,7 +357,9 @@
- OCFS_JOURNAL_FUZZ_CREDITS)
/* fe change, locknode change, dirnode head, times two plus a possible
- * delete, and fuzz */
+ * delete, three to fix the up_node_hdr_ptr values of any extents
+ * below the moved fe, and fuzz */
#define OCFS_FILE_RENAME_CREDITS (2 * (1 + 1 + 1) + OCFS_FILE_DELETE_CREDITS \
- + OCFS_JOURNAL_FUZZ_CREDITS)
+ + OCFS_MAX_FILE_ENTRY_EXTENTS \
+ + OCFS_JOURNAL_FUZZ_CREDITS)
#endif /* _OCFSJOURNAL_H_ */
Modified: trunk/src/inc/ocfs.h
===================================================================
--- trunk/src/inc/ocfs.h 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inc/ocfs.h 2004-01-24 01:22:15 UTC (rev 15)
@@ -84,8 +84,7 @@
#define OCFS_GCC_ATTR_PACKED __attribute__ ((packed))
#define OCFS_GCC_ATTR_ALIGNED __attribute__ ((aligned(4)))
#define OCFS_GCC_ATTR_PACKALGN __attribute__ ((aligned(4), packed))
-#endif
-#ifdef __i386__
+#else
#define OCFS_GCC_ATTR_PACKED
#define OCFS_GCC_ATTR_ALIGNED
#define OCFS_GCC_ATTR_PACKALGN
@@ -173,7 +172,13 @@
for (pos = (head)->next, n = pos->next; pos != (head); pos = n, n = pos->next)
#endif
+#ifndef list_for_each_prev_safe
+#define list_for_each_prev_safe(pos, n, head) \
+ for (pos = (head)->prev, n = pos->prev; pos != (head); \
+ pos = n, n = pos->prev)
+#endif
+
#ifdef LINUX_2_5
#define OcfsQuerySystemTime(t) \
do { \
@@ -205,7 +210,7 @@
typedef struct _ocfs_inode_private
{
void * generic_ip;
- __u8 pad[8];
+ __u64 offset;
// struct list_head i_clean_buffers;
atomic_t i_clean_buffer_seq;
} ocfs_inode_private;
@@ -214,48 +219,53 @@
#define INODE_PRIVATE_OFF ((unsigned long)(&((struct inode *)0)->u.generic_ip))
#define GET_INODE_CLEAN_SEQ(i) (atomic_t *)(((unsigned long)i) + INODE_PRIVATE_OFF + CLEAN_SEQ_OFF)
-/* i_flags flag - heh yeah i know it's evil! */
-#define S_OCFS_OIN_VALID 256
-
-#define inode_data_is_oin(i) (i->i_flags & S_OCFS_OIN_VALID)
-
#define OCFS_GENERIC_IP(i) ((ocfs_inode_private *)(&(i->u.generic_ip)))
-//#define GET_INODE_CLEAN_LIST(i) (OCFS_GENERIC_IP(i)->i_clean_buffers)
-//#define EVIL_LIST_HEAD(_inode) (&(GET_INODE_CLEAN_LIST(_inode)))
+#define inode_data_is_oin(i) (OCFS_GENERIC_IP(i)->generic_ip != NULL)
-//#define GET_INODE_CLEAN_SEQ(i) (&(OCFS_GENERIC_IP(i)->i_clean_buffer_seq))
+#define SET_INODE_OFFSET(i,o) \
+do { \
+ OCFS_GENERIC_IP(i)->offset = o; \
+} while (0)
+#define GET_INODE_OFFSET(i) OCFS_GENERIC_IP(i)->offset
-#define GET_INODE_OIN(i) ((ocfs_inode *)(OCFS_GENERIC_IP(i)->generic_ip))
+#define CLEAR_INODE_OIN(i) \
+do { \
+ GET_INODE_OIN(i)= (void *)NULL; \
+} while (0)
-#define SET_INODE_OFFSET(i,o) do { \
- i->i_flags &= ~S_OCFS_OIN_VALID; \
- GET_INODE_OIN(i)= (void *)HI(o); \
- i->i_ino = LO(o); \
- } while (0)
+#define SET_INODE_OIN(i,o) \
+do { \
+ GET_INODE_OIN(i)= (void *)o; \
+} while (0)
-#define GET_INODE_OFFSET(i) (__u64)((((__u64)((unsigned long)i->u.generic_ip))<<32) + \
- ((__u64)i->i_ino))
+#define GET_INODE_OIN(i) ((ocfs_inode *)(OCFS_GENERIC_IP(i)->generic_ip))
-#define SET_INODE_OIN(i,o) do { \
- i->i_flags |= S_OCFS_OIN_VALID; \
- GET_INODE_OIN(i)= (void *)o; \
- } while (0)
-
#define FIRST_FILE_ENTRY(dir) ((char *) ((char *)dir)+OCFS_SECTOR_SIZE)
#define FILEENT(dir,idx) (ocfs_file_entry *) ( ((char *)dir) + \
((dir->index[idx]+1) * OCFS_SECTOR_SIZE))
-#define FILEENT_GETBH(dir,bhs,idx) ({ \
- int _i = dir->index[idx]+1; \
- ocfs_file_entry *_ret = NULL; \
- if (!buffer_locked(bhs[_i])) \
- _ret = (ocfs_file_entry *)OCFS_BH_GET_DATA(bhs[_i]); \
- _ret; \
- })
-#define FILEENT_PUTBH(dir,bhs,idx) OCFS_BH_PUT_DATA(bhs[(dir->index[idx]+1)])
+#define FILEENT_GETBH_WRITE(dir,bhs,idx) \
+({ \
+ int _i = dir->index[idx]+1; \
+ ocfs_file_entry *_ret = NULL; \
+ _ret = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(bhs[_i]); \
+ _ret; \
+})
+
+#define FILEENT_GETBH(dir,bhs,idx) \
+({ \
+ int _i = dir->index[idx]+1; \
+ ocfs_file_entry *_ret = NULL; \
+ _ret = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bhs[_i]); \
+ _ret; \
+})
+
+#define FILEENT_PUTBH(dir,bhs,idx) OCFS_BH_PUT_DATA(bhs[(dir->index[idx]+1)])
+
+
#define OCFS_DIR_FILENAME "DirFile"
#define OCFS_DIR_BITMAP_FILENAME "DirBitMapFile"
#define OCFS_FILE_EXTENT_FILENAME "ExtentFile"
@@ -430,7 +440,6 @@
#define OCFS_OIN_DELETE_ON_CLOSE (0x00000200)
#define OCFS_OIN_NEEDS_DELETION (0x00000400)
#define OCFS_INITIALIZED_MAIN_RESOURCE (0x00002000)
-#define OCFS_INITIALIZED_PAGING_IO_RESOURCE (0x00004000)
#define OCFS_OIN_INVALID (0x00008000)
#define OCFS_OIN_IN_USE (0x00020000)
#define OCFS_OIN_OPEN_FOR_DIRECTIO (0x00100000)
@@ -529,6 +538,11 @@
#define OCFS_NM_HEARTBEAT_TIME 500 /* in ms */
#define OCFS_HEARTBEAT_INIT 10 /* number of NM iterations to stabilize the publish map */
+
+#define OCFS_BH_SEM_HASH_PRUNE_TRIGGER 50 /* trigger nm to prune the hash when list size is > this */
+#define OCFS_BH_SEM_HASH_PRUNE_MAX 20 /* nm will prune at most this many in one cycle */
+
+
#ifndef O_DIRECT
#warning this depends on the architecture!
#define O_DIRECT 040000
@@ -715,15 +729,33 @@
ocfs_linux_dbg_free(x); \
} while (0)
-# define ocfs_vmalloc(size) ({ void *p = vmalloc(size); \
- if (!p) printk("ERROR: unable to allocate %u bytes of memory\n", (size)); \
- p; \
- })
+# define ocfs_vmalloc(size) \
+({ \
+ void *p = vmalloc(size); \
+ if (!p) \
+ printk("ERROR: unable to allocate %u bytes of memory\n", \
+ (size)); \
+ else \
+ memset(p, 0, size); \
+ p; \
+})
# define ocfs_vfree vfree
#elif !defined(OCFS_LINUX_MEM_DEBUG)
-# define ocfs_malloc(size) kmalloc((size_t)(size), GFP_KERNEL)
+# define ocfs_malloc(size) \
+({ \
+ void *__ptr = kmalloc((size_t)(size), GFP_KERNEL); \
+ if (__ptr) \
+ memset(__ptr, 0, size); \
+ __ptr; \
+})
# define ocfs_free kfree
-# define ocfs_vmalloc(size) vmalloc(size)
+# define ocfs_vmalloc(size) \
+({ \
+ void *__ptr = vmalloc((size_t)(size)); \
+ if (__ptr) \
+ memset(__ptr, 0, size); \
+ __ptr; \
+})
# define ocfs_vfree vfree
#endif /* ! defined(OCFS_MEM_DBG) */
@@ -1173,13 +1205,16 @@
__u32 lohi[2];
} my_timing_t;
-#define IO_FUNC_TIMING_DECL my_timing_t begin, end; \
- rdtsc (begin.lohi[0], begin.lohi[1]);
-#define IO_FUNC_TIMING_PRINT(_fn,_ret) rdtsc (end.lohi[0], end.lohi[1]); \
- IF_LEVEL_NO_CONTEXT(OCFS_DEBUG_LEVEL_TIMING) \
- printk("(%d) EXIT : %s() = %d => [%u.%u]\n",\
- ocfs_getpid(), _fn, \
- _ret, HILO(end.q-begin.q));
+#define IO_FUNC_TIMING_DECL my_timing_t begin, end; rdtsc (begin.lohi[0], begin.lohi[1]);
+
+#define IO_FUNC_TIMING_PRINT(_fn,_ret) \
+ do { \
+ rdtsc (end.lohi[0], end.lohi[1]); \
+ IF_LEVEL_NO_CONTEXT(OCFS_DEBUG_LEVEL_TIMING) \
+ printk("(%d) EXIT : %s() = %d => [%u.%u]\n", \
+ ocfs_getpid(), _fn, _ret, \
+ HILO(end.q-begin.q)); \
+ } while(0)
#else
#define IO_FUNC_TIMING_DECL
#define IO_FUNC_TIMING_PRINT(_fn,_ret)
@@ -1502,6 +1537,24 @@
}
HASHTABLE;
+enum {
+ OCFS_BH_SEM_GOT_LOCK,
+ OCFS_BH_SEM_WAIT_ON_MODIFY,
+};
+
+typedef struct _ocfs_bh_sem
+{
+ struct semaphore s_sem;
+ struct list_head s_list;
+ unsigned long s_blocknr;
+ kdev_t s_dev;
+ atomic_t s_refcnt;
+ struct buffer_head *s_bh;
+ wait_queue_head_t s_wait;
+ pid_t s_pid;
+} ocfs_bh_sem;
+
+
typedef struct _ocfs_vol_disk_hdr // CLASS
{
__u32 minor_version; // NUMBER RANGE(0,UINT_MAX)
@@ -1762,26 +1815,24 @@
struct _ocfs_inode
{
ocfs_obj_id obj_id;
- __s64 alloc_size;
struct inode *inode;
+ struct _ocfs_super *osb; /* ocfs_inode belongs to this volume */
+ struct list_head needs_flush_list;
+ struct list_head recovery_list;
ocfs_sem main_res;
- ocfs_sem paging_io_res;
ocfs_lock_res *lock_res;
+ ocfs_extent_map map;
+ __s64 alloc_size;
__u64 file_disk_off; /* file location on the volume */
__u64 dir_disk_off; /* for dirs, offset to dirnode structure */
__u64 chng_seq_num;
__u64 parent_dirnode_off; /* from the start of vol */
- ocfs_extent_map map;
- struct _ocfs_super *osb; /* ocfs_inode belongs to this volume */
+ __u32 open_hndl_cnt;
__u32 oin_flags;
- struct list_head next_ofile; /* list of all ofile(s) */
- __u32 open_hndl_cnt;
bool needs_verification;
bool cache_enabled;
- struct list_head needs_flush_list;
bool in_needs_flush_list;
bool journal_inode; /* is this the journal oin? */
- struct list_head recovery_list;
};
typedef enum _ocfs_vol_state
@@ -1846,6 +1897,13 @@
struct _ocfs_bitmap_free_head;
+typedef struct _ocfs_inode_hash {
+ spinlock_t lock; /* protects the whole hash */
+ int size; /* number of lists in the hash */
+ int num_ents; /* global number of offsets in there */
+ struct list_head *hash;
+} ocfs_inode_hash;
+
/*
* ocfs_super
*
@@ -1932,9 +1990,9 @@
struct list_head lock_recovery_lists[OCFS_MAXIMUM_NODES];
__u64 last_publ_seq_num[OCFS_MAXIMUM_NODES];
bool have_local_alloc;
- /* These two are protected by the trans_lock. */
+ /* Protected by the trans_lock. */
struct buffer_head *local_alloc_bh;
- struct _ocfs_bitmap_free_head *alloc_free_head;
+ ocfs_inode_hash inode_hash;
};
typedef struct _ocfs_comm_info
@@ -1956,6 +2014,7 @@
kmem_cache_t *fe_cache;
kmem_cache_t *lockres_cache;
kmem_cache_t *extent_cache;
+ kmem_cache_t *bh_sem_cache;
__u32 flags;
__u32 pref_node_num; /* preferred... osb has the real one */
ocfs_guid guid; /* uniquely identifies a node */
@@ -1970,6 +2029,10 @@
struct list_head item_list;
#endif
atomic_t cnt_lockres; /* count of allocated lockres */
+ struct list_head *bh_sem_hash;
+ spinlock_t bh_sem_hash_lock;
+ int bh_sem_hash_sz;
+ atomic_t bh_sem_hash_target_bucket;
}
ocfs_global_ctxt;
@@ -2490,7 +2553,7 @@
#endif /* !USERSPACE_TOOL */
-#include "ocfsio.h"
+#include "io.h"
#define OCFS_FE_CACHE_FLAGS(__osb, __fe) \
({ \
Modified: trunk/src/inc/proto.h
===================================================================
--- trunk/src/inc/proto.h 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inc/proto.h 2004-01-24 01:22:15 UTC (rev 15)
@@ -25,7 +25,7 @@
int ocfs_create_log_extent_map (ocfs_super * osb, __u64 diskOffset, __u64 ByteCount);
int ocfs_write_map_file (ocfs_super * osb);
-int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle);
+int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle, bool zero);
void ocfs_extent_map_init (ocfs_extent_map * map);
@@ -55,7 +55,7 @@
int ocfs_find_inode (struct inode *inode, unsigned long ino, void *opaque);
-void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr);
+void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr, bool create_ino);
void ocfs_read_locked_inode (struct inode *inode, ocfs_file_entry *entry);
void ocfs_read_inode2 (struct inode *inode, void *opaque);
void ocfs_read_inode (struct inode *inode);
@@ -81,6 +81,23 @@
int ocfs_hash_get (HASHTABLE * ht, void *key, __u32 keylen, void **val, __u32 * vallen);
int ocfs_hash_add (HASHTABLE * ht, void *key, __u32 keylen, void *val, __u32 vallen, void **found, __u32 *foundlen);
void ocfs_hash_stat (HASHTABLE * ht, char *data, __u32 datalen);
+int ocfs_bh_sem_hash_init(void);
+int ocfs_bh_sem_hash_destroy(void);
+int ocfs_bh_sem_hash_prune(void);
+int ocfs_bh_sem_hash_prune_all(void);
+int ocfs_bh_sem_lock(struct buffer_head *bh);
+int ocfs_bh_sem_lock_modify(struct buffer_head *bh);
+int ocfs_bh_sem_unlock(struct buffer_head *bh);
+int ocfs_bh_sem_hash_cleanup_pid(pid_t pid);
+void ocfs_bh_sem_up(ocfs_bh_sem *sem);
+void ocfs_bh_sem_down(ocfs_bh_sem *sem);
+void ocfs_bh_sem_put(ocfs_bh_sem *sem);
+void ocfs_bh_sem_get(ocfs_bh_sem *sem);
+void ocfs_bh_sem_free(ocfs_bh_sem *sem);
+ocfs_bh_sem * ocfs_bh_sem_alloc(void);
+ocfs_bh_sem * ocfs_bh_sem_lookup(struct buffer_head *bh);
+void wait_on_buffer_modified(struct buffer_head * bh);
+void ocfs_clear_buffer_modified(struct buffer_head *bh);
void ocfs_version_print (void);
@@ -121,7 +138,6 @@
void ocfs_update_publish_map (ocfs_super * osb, struct buffer_head *bhs[], bool first_time);
-struct inode * ocfs_get_inode_from_bh(ocfs_super * osb, struct buffer_head *bh);
int ocfs_recv_thread (void *unused);
int ocfs_volume_thread (void *arg);
int ocfs_init_udp_sock (struct socket **send_sock, struct socket **recv_sock);
@@ -148,7 +164,6 @@
int ocfs_init_system_file (ocfs_super * osb, __u32 file_id, char *filename);
int ocfs_read_system_file (ocfs_super * osb, __u32 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset);
-int ocfs_write_system_file (ocfs_super * osb, __u64 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset);
int ocfs_get_system_file_size (ocfs_super * osb, __u32 FileId, __u64 * Length, __u64 * AllocSize);
__u64 ocfs_file_to_disk_off (ocfs_super * osb, __u32 FileId, __u64 Offset);
@@ -204,3 +219,23 @@
struct buffer_head **local_alloc_bh, bool sync);
int ocfs_find_space(ocfs_super * osb, __u64 file_size, __u64 * cluster_off, __u64 * cluster_count, bool sysfile, ocfs_journal_handle *handle);
int ocfs_recover_local_alloc(ocfs_super *osb, int node_num);
+
+void ocfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate);
+
+int ocfs_inode_hash_init(ocfs_super *osb);
+void ocfs_inode_hash_destroy(ocfs_inode_hash *h);
+
+unsigned long ocfs_inode_hash_insert(ocfs_super *osb,
+ __u64 offset,
+ __u64 fe_off,
+ unsigned long ino);
+void ocfs_inode_hash_remove(ocfs_inode_hash *h, __u64 off);
+int ocfs_inode_rehash(ocfs_inode_hash *h,
+ __u64 oldoff,
+ __u64 newoff,
+ __u64 new_fe_off);
+struct inode *ocfs_get_inode_from_offset(ocfs_super *osb, __u64 offset,
+ struct buffer_head *fe_bh);
+unsigned long ocfs_inode_hash_lookup(ocfs_inode_hash *h,
+ __u64 offset,
+ __u64 *fe_off);
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/inode.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -24,7 +24,13 @@
#endif
#endif /* version >= 2.4.10 */
+#ifdef AIO_ENABLED
+int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos);
+int ocfs_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos);
+int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos);
+#endif /* AIO_ENABLED */
+
static struct address_space_operations ocfs_aops = {
.readpage = ocfs_readpage,
.writepage = ocfs_writepage,
@@ -130,7 +136,7 @@
goto bail;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(args->fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
if (S_ISDIR (inode->i_mode)) {
LOG_TRACE_STR ("find_inode -> S_ISDIR");
if (fe->extents[0].disk_off != fileOff) {
@@ -179,8 +185,9 @@
}
oin = NULL; /* set it back to our current OIN if we have one */
if (inode_data_is_oin (inode))
- oin = ((ocfs_inode *)inode->u.generic_ip);
- ocfs_populate_inode (inode, fe, mode, oin);
+ oin = GET_INODE_OIN(inode);
+
+ ocfs_populate_inode (inode, fe, mode, oin, false);
#endif /* REPOPULATE_INODE */
ret = 1;
@@ -197,18 +204,26 @@
* ocfs_populate_inode()
*
*/
-void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr)
+void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, void *genptr, bool create_ino)
{
struct super_block *sb;
ocfs_super *osb;
- __u64 offset;
+ __u64 offset, fe_off;
+ unsigned long uniq_ino;
LOG_ENTRY_ARGS ("(0x%08x, %u, size:%u)\n", inode, mode, fe->file_size);
sb = inode->i_sb;
osb = (ocfs_super *) OCFS_GENERIC_SB_P(sb);
+ fe_off = fe->this_sector;
offset = S_ISDIR (mode) ? fe->extents[0].disk_off : fe->this_sector;
+ if (!IS_VALID_FILE_ENTRY(fe)) {
+ printk("ocfs: invalid file entry!\n");
+
+ BUG();
+ }
+
OCFS_SET_INODE_DEV(sb, inode);
inode->i_mode = mode;
inode->i_uid = fe->uid;
@@ -222,10 +237,22 @@
OCFS_SET_INODE_TIME(inode, i_mtime, fe->modify_time);
OCFS_SET_INODE_TIME(inode, i_ctime, fe->create_time);
if (genptr)
- SET_INODE_OIN (inode, genptr);
+ SET_INODE_OIN(inode, genptr);
else
- SET_INODE_OFFSET (inode, offset);
+ CLEAR_INODE_OIN(inode);
+ SET_INODE_OFFSET(inode, offset);
+
+ if (create_ino) {
+ uniq_ino = iunique(sb, OCFS_ROOT_INODE_NUMBER);
+ uniq_ino = ocfs_inode_hash_insert(osb, offset, fe_off,
+ uniq_ino);
+ inode->i_ino = uniq_ino;
+ }
+ LOG_TRACE_ARGS("offset = %u.%u, ino = %lu, create_ino = %s\n",
+ HILO(offset), inode->i_ino,
+ create_ino ? "true" : "false");
+
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
@@ -299,6 +326,7 @@
inode->i_uid = osb->vol_layout.uid;
inode->i_gid = osb->vol_layout.gid;
SET_INODE_OIN (inode, osb->oin_root_dir);
+ SET_INODE_OFFSET(inode, osb->vol_layout.root_start_off);
goto bail;
}
@@ -339,7 +367,7 @@
mode |= S_IFREG;
break;
}
- ocfs_populate_inode (inode, entry, mode, newoin);
+ ocfs_populate_inode (inode, entry, mode, newoin, false);
bail:
LOG_EXIT ();
@@ -389,6 +417,7 @@
inode->i_uid = osb->vol_layout.uid;
inode->i_gid = osb->vol_layout.gid;
SET_INODE_OIN (inode, osb->oin_root_dir);
+ SET_INODE_OFFSET(inode, osb->vol_layout.root_start_off);
goto bail;
}
@@ -399,7 +428,7 @@
args = (ocfs_find_inode_args *) opaque;
newoin = NULL;
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(args->fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
mode = fe->prot_bits;
@@ -429,7 +458,7 @@
mode |= S_IFREG;
break;
}
- ocfs_populate_inode (inode, fe, mode, newoin);
+ ocfs_populate_inode (inode, fe, mode, newoin, false);
bail:
if (fe)
@@ -487,7 +516,7 @@
if (S_ISDIR (inode->i_mode)) {
LOG_TRACE_STR ("find_actor -> S_ISDIR\n");
- fe = OCFS_BH_GET_DATA(args->fe_bh);
+ fe = OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
if (fe->extents[0].disk_off != fileOff) {
LOG_TRACE_ARGS
("DIR : inode number same but full offset does not match: %u.%u != %u.%u\n",
@@ -502,7 +531,7 @@
}
ret = 1;
- bail:
+bail:
if (fe)
OCFS_BH_PUT_DATA(args->fe_bh);
LOG_EXIT_LONG (ret);
@@ -570,7 +599,7 @@
if (inode->i_state & I_NEW) {
LOG_TRACE_STR("Inode was not in inode cache, reading it.");
if (args)
- fe = OCFS_BH_GET_DATA(args->fe_bh);
+ fe = OCFS_BH_GET_DATA_READ(args->fe_bh); /* read */
ocfs_read_locked_inode(inode, fe);
if (args)
OCFS_BH_PUT_DATA(fe_bh);
@@ -591,7 +620,7 @@
*/
void ocfs_put_inode (struct inode *inode)
{
- LOG_ENTRY_ARGS ("(0x%08x)\n", inode);
+ LOG_ENTRY_ARGS ("(0x%08x, inode_i_ino=%lu)\n", inode, inode->i_ino);
LOG_TRACE_ARGS ("put_inode: count=%d\n", inode->i_count);
if (inode_data_is_oin(inode) && (atomic_read (&inode->i_count) == 1) ) {
ocfs_inode *oin;
@@ -620,12 +649,16 @@
*/
void ocfs_clear_inode (struct inode *inode)
{
+ __u64 offset;
+ ocfs_super *osb;
+
LOG_ENTRY();
if (inode) {
+ ocfs_linux_get_inode_offset(inode, &offset, NULL);
+
if (inode_data_is_oin (inode)) {
ocfs_inode *oin;
- ocfs_super *osb;
LOG_TRACE_STR ("inode with oin : clear inode");
@@ -651,7 +684,6 @@
} else {
__u64 fileOff;
ocfs_lock_res *lockres = NULL;
- ocfs_super *osb;
osb = (ocfs_super *) OCFS_GENERIC_SB_P(inode->i_sb);
@@ -675,8 +707,12 @@
LOG_TRACE_STR ("Could not find offset");
}
}
+ /* we may be called after unmount, in which case
+ * don't do this. */
+ if (osb->inode_hash.size)
+ ocfs_inode_hash_remove(&osb->inode_hash, offset);
}
- bail:
+bail:
LOG_EXIT ();
return;
} /* ocfs_clear_inode */
@@ -827,7 +863,7 @@
LOG_ERROR_STATUS (status);
goto bail;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
if (!IS_VALID_FILE_ENTRY (fe)) {
OCFS_BH_PUT_DATA(bh);
@@ -845,8 +881,6 @@
map_bh(bh_result, inode->i_sb, (fe->extents[0].disk_off >> 9) + iblock);
OCFS_BH_PUT_DATA(bh);
- if (create)
- bh_result->b_state |= (1UL << BH_New);
err = 0;
bail:
@@ -918,17 +952,8 @@
goto bail;
}
- if (create) {
- LOG_TRACE_ARGS ("CREATE: offset: %u -> block#: %d\n", iblock,
- lbo >> inode->i_sb->s_blocksize_bits);
- bh_result->b_state |= (1UL << BH_New);
- }
-
map_bh(bh_result, inode->i_sb, lbo >> inode->i_sb->s_blocksize_bits);
- if (create)
- bh_result->b_state |= (1UL << BH_New);
-
err = 0;
if (bh_result->b_blocknr == 0) {
@@ -1052,7 +1077,7 @@
HILO(vbo), HILO(lbo), len, oin->file_disk_off);
}
- bail:
+bail:
if (err < 0)
err = -EIO;
LOG_EXIT_LONG (err);
@@ -1532,8 +1557,6 @@
#ifdef AIO_ENABLED
-static int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos);
-
int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
{
return ocfs_kvec_rw(file, READ, cb, size, pos);
Modified: trunk/src/ioctl.c
===================================================================
--- trunk/src/ioctl.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/ioctl.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -63,7 +63,7 @@
break;
}
- exit_ioctl:
+exit_ioctl:
LOG_EXIT_LONG (ret);
return ret;
} /* ocfs_ioctl */
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/journal.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -1,5 +1,5 @@
/*
- * ocfsjournal.c
+ * journal.c
*
* Defines functions of journalling api
*
@@ -21,7 +21,7 @@
* Boston, MA 021110-1307, USA.
*
* Authors: Kurt Hackel, Sunil Mushran, Manish Singh, Wim Coekaerts,
- * Mark Fasheh
+ * Mark Fasheh, Joel Becker
*/
#include <ocfs.h>
@@ -66,19 +66,31 @@
retval->buffs = ocfs_malloc(sizeof(struct buffer_head *) * max_buffs);
if (!retval->buffs) {
- LOG_ERROR_STR("Failed to allocate memory for journal buffers!");
+ LOG_ERROR_STR("Failed to allocate memory for journal buffs!");
goto done_free;
}
memset(retval->buffs, 0, sizeof(struct buffer_head *) * max_buffs);
+#ifdef OCFS_PARANOID_ABORTS
+ retval->co_buffs = ocfs_malloc(sizeof(ocfs_journal_copyout)*max_buffs);
+ if (!retval->co_buffs) {
+ LOG_ERROR_STR("Failed to allocate memory for journal co_buffs!");
+ goto done_free;
+ }
+ memset(retval->co_buffs, 0, sizeof(ocfs_journal_copyout) * max_buffs);
+#endif
spin_lock_init(&(retval->list_lock));
INIT_LIST_HEAD(&(retval->h_list));
INIT_LIST_HEAD(&(retval->locks));
retval->max_buffs = max_buffs;
retval->num_buffs = 0;
retval->num_locks = 0;
+#ifdef OCFS_PARANOID_ABORTS
+ retval->num_co = 0;
+#endif
retval->journal = &osb->journal;
retval->osb = osb;
+ retval->commit_bits = retval->abort_bits = NULL;
/* actually start the transaction now */
retval->k_handle = journal_start(journal, max_buffs);
@@ -114,10 +126,12 @@
return(NULL);
} /* ocfs_start_trans */
+#define OCFS_JOURNAL_CHECKPOINT_RETRIES 3
static int ocfs_checkpoint_handle(ocfs_journal_handle *handle)
{
int retval = 0;
ocfs_super *osb = NULL;
+ int i;
LOG_ENTRY();
@@ -126,10 +140,15 @@
if (!handle->num_buffs)
goto done;
- retval = ocfs_write_bhs(osb, handle->buffs, handle->num_buffs,
- OCFS_BH_IGNORE_JBD, NULL);
- if (retval < 0)
- LOG_ERROR_STR("Error checkpointing handle.");
+ /* Try up to 3 times to checkpoint the handle */
+ for (i = 0; i < OCFS_JOURNAL_CHECKPOINT_RETRIES; i++) {
+ retval = ocfs_write_bhs(osb, handle->buffs, handle->num_buffs,
+ OCFS_BH_IGNORE_JBD, NULL);
+ if (retval < 0)
+ LOG_ERROR_STATUS(retval);
+ else
+ break;
+ }
done:
@@ -145,7 +164,8 @@
*
* The call to journal_revoke does a brelse. It also winds up removing
* the journal_head from the buffer, and therefore the JBD bit is no
- * longer set.
+ * longer set. We do a get_bh before calling journal_revoke so that
+ * the count doesn't change.
*/
static int ocfs_revoke_handle(ocfs_journal_handle *handle)
{
@@ -176,6 +196,8 @@
for(i = 0; i < handle->num_buffs; i++) {
bh = handle->buffs[i];
+ get_bh(bh); /* want to keep this around after the revoke */
+
retval = journal_revoke(new_handle, bh->b_blocknr, bh);
if (retval < 0) {
LOG_ERROR_STR("Could not revoke buffer!");
@@ -236,7 +258,36 @@
return(status);
}
+int ocfs_journal_new_file_search(ocfs_super *osb, __u64 lockid)
+{
+ ocfs_journal_handle *handle=NULL;
+ ocfs_journal *journal=NULL;
+ int status = -ENOENT;
+ struct list_head *p1;
+
+ LOG_ENTRY_ARGS("(%u.%u)\n", lockid);
+ /* make sure that we have a root_start off */
+ /* this can be called early in the first mount */
+ if (osb->vol_layout.root_start_off != 0 &&
+ lockid >= osb->vol_layout.root_start_off) {
+ journal = &osb->journal;
+ down(&journal->commit_sem);
+ list_for_each(p1, &(journal->commited)) {
+ handle = list_entry(p1, ocfs_journal_handle, h_list);
+ if (handle->new_file_lockid == lockid) {
+ status = 0;
+ break;
+ }
+ }
+ up(&journal->commit_sem);
+ }
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+
/* This for loop is for debug purposes. Basically we want to check the
* BH_JBD bit on our buffers. If the handle was checkpointed, then
* none of them should have that bit set after the revoke
@@ -253,9 +304,8 @@
} while (0)
/*
- Should this function also mark the buffers dirty (journal_dirty_*)
- or should we expect a higher layer to be doing that?
-*/
+ * ocfs_commit_trans
+ */
int ocfs_commit_trans(ocfs_journal_handle * handle)
{
ocfs_super *osb;
@@ -263,7 +313,6 @@
transaction_t *kern_trans;
int retval = 0, i;
struct buffer_head *bh;
- bool revoked = false;
bool checkpoint, sync;
ocfs_journal *journal;
@@ -291,12 +340,16 @@
else
kern_handle->h_sync = 0;
+ for(i = 0; i < handle->num_buffs; i++)
+ check_rootdir_overwrite(handle->buffs[i]);
+
/* actually stop the transaction. if we've set h_sync,
* it'll have been commited when we return */
retval = journal_stop(kern_handle);
if (retval < 0) {
LOG_ERROR_STATUS(retval);
- goto done;
+ LOG_ERROR_STR("Could not commit transaction");
+ BUG();
}
/* for now we manually checkpoint and force out our revoke
@@ -311,7 +364,7 @@
retval = ocfs_checkpoint_handle(handle);
if (retval < 0) {
LOG_ERROR_STR("Could not checkpoint transaction!");
- goto done;
+ BUG();
}
/* revoke from buffer_head list, commit revoke records */
@@ -319,31 +372,25 @@
if (retval < 0) {
LOG_ERROR_STR("Could not completely revoke "
"transaction!");
- goto done;
+ BUG();
}
-
- revoked = true;
- } else {
-
- /* we'll want to get rid of the buffers now as
- * journal_flush does the other work for us, so leave
- * revoked to false. */
- }
-
-
-done:
- if (!revoked) {
- /* usually the journal_revoke in ocfs_revoke_handle
- * will brelse the buffers for us, but if we aren't
- * checkpointing this handle, or we've gotten here
- * because of error then we have to do it manually. */
+ } else {
+ /* If we're not checkpointing, we have to be careful
+ * to also clear the modified bits. */
for(i = 0; i < handle->num_buffs; i++) {
bh = handle->buffs[i];
handle->buffs[i] = NULL;
- brelse(bh);
+
+ ocfs_clear_buffer_modified(bh);
}
}
+/* done: */
+ for(i = 0; i < handle->num_buffs; i++) {
+ bh = handle->buffs[i];
+ brelse(bh);
+ }
+
down(&journal->commit_sem);
journal->curr = NULL;
@@ -355,8 +402,6 @@
retval = ocfs_journal_release_locks(handle, 0);
if (retval < 0)
LOG_ERROR_STATUS(retval);
-
-
} else {
/* If we're not going to checkpoint the handle on
* commit then we need to add it to our journals list
@@ -366,6 +411,10 @@
up(&journal->commit_sem);
}
+#ifdef OCFS_PARANOID_ABORTS
+ /* At this point, we don't need the copyout buffers. */
+ ocfs_handle_free_all_copyout(handle);
+#endif
/* we don't free the kernel handle because jbd has freed it. */
if (handle->buffs) {
ocfs_free(handle->buffs);
@@ -376,13 +425,21 @@
/* This has to happen after we release the other locks. */
ocfs_release_trans_lock(osb);
+ if (handle->commit_bits && (retval == 0)) {
+ if (!sync)
+ BUG();
+ ocfs_process_bitmap_free_head(osb, handle->commit_bits);
+ }
+ free_bitmap_free_head(handle->commit_bits);
+ free_bitmap_free_head(handle->abort_bits);
+
if (checkpoint)
ocfs_free(handle);
LOG_EXIT_STATUS(retval);
return(retval);
-}
+} /* ocfs_commit_trans */
/*
* ocfs_abort_trans
@@ -393,10 +450,17 @@
ocfs_super *osb = NULL;
int i;
int retval;
+ ocfs_journal * journal = NULL;
+#ifdef OCFS_PARANOID_ABORTS
+ int j;
+ ocfs_journal_copyout *co = NULL;
+ char *data;
+#endif
LOG_ENTRY();
osb = handle->osb;
+ journal = &osb->journal;
/* There is a potential bug here which we may have to
* resolve. What if you do a get_write_access on a buffer,
@@ -408,6 +472,32 @@
* though it were clean, even though it contains aborted
* data!*/
+#ifdef OCFS_PARANOID_ABORTS
+ /* Ok, we're aborting. For all dirtied buffers, copy our old
+ * data back in. This should reverse what happened during the
+ * transaction and revert us back.*/
+ for(i = 0; i < handle->num_buffs; i++) {
+ bh = handle->buffs[i];
+
+ /* find the copyout. */
+ co = NULL;
+ for(j = 0; j < handle->num_co; j++)
+ if (handle->co_buffs[j].blocknr == bh->b_blocknr) {
+ co = &(handle->co_buffs[j]);
+ break;
+ }
+
+ if (co == NULL)
+ BUG();
+ LOG_TRACE_ARGS("Aborting block %lu\n", co->blocknr);
+ data = OCFS_BH_GET_DATA_WRITE(bh);
+ memcpy(data, co->data, bh->b_size);
+ OCFS_BH_PUT_DATA(bh);
+ }
+
+ /* done copying them, free it now. */
+ ocfs_handle_free_all_copyout(handle);
+#else
/* take all our dirtied buffers and make sure they can't be
* written to disk */
for(i = 0; i < handle->num_buffs; i++) {
@@ -420,34 +510,70 @@
/* clear the uptodate and dirty flags so this never
* gets written to disk inadvertantly by someone
* else. */
+
#ifdef LINUX_2_5
clear_buffer_uptodate(bh);
#else
mark_buffer_uptodate(bh, false);
#endif
clear_bit(BH_Dirty, &bh->b_state);
+
unlock_buffer(bh);
/* journal_forget will bforget the buffers for us too. */
+ get_bh(bh); /* keep a reference around so we can
+ * reread after our journal_flush */
+
journal_forget(handle->k_handle, bh);
}
-
+#endif
/* want to force our handle to disk in abort case. */
handle->k_handle->h_sync = 1;
retval = journal_stop(handle->k_handle);
if (retval < 0) {
LOG_ERROR_STR("Could not commit aborted transaction!");
- goto done;
+ LOG_ERROR_STATUS(retval);
}
+
handle->k_handle = NULL;
atomic_dec(&(osb->journal.num_trans));
-done:
+/* done: */
+
down(&osb->journal.commit_sem);
osb->journal.curr = NULL;
up(&osb->journal.commit_sem);
+ /* Ok, we now want to fill our buffers with the older (but
+ * valid) data, instead of leaving them with the aborted
+ * data. To do so we want to first checkpoint the valid
+ * transactions in the journal so that we know that disk
+ * reflects the latest correct blocks. After that, we just
+ * repopulate the buffers from disk. */
+
+ /* journal flush here */
+ journal_lock_updates(journal->k_journal);
+ retval = journal_flush(journal->k_journal);
+ journal_unlock_updates(journal->k_journal);
+ if (retval < 0)
+ LOG_ERROR_STATUS(retval);
+
+ /* reread buffers here and then brelse them */
+ if (handle->num_buffs != 0)
+ retval = ocfs_read_bhs(osb,
+ handle->buffs[0]->b_blocknr * 512,
+ handle->num_buffs * 512,
+ handle->buffs, 0, NULL);
+ if (retval < 0)
+ LOG_ERROR_STATUS(retval);
+
+ for(i = 0; i < handle->num_buffs; i++) {
+ ocfs_clear_buffer_modified(handle->buffs[i]);
+ brelse(handle->buffs[i]);
+ }
+
+ /* drop locks associated with the handle here. */
retval = ocfs_journal_release_locks(handle, 1);
if (retval < 0)
LOG_ERROR_STATUS(retval);
@@ -455,6 +581,12 @@
/* This has to happen after we release the other locks. */
ocfs_release_trans_lock(osb);
+ if (handle->abort_bits && (retval == 0))
+ ocfs_process_bitmap_free_head(osb, handle->abort_bits);
+
+ free_bitmap_free_head(handle->commit_bits);
+ free_bitmap_free_head(handle->abort_bits);
+
if (handle->buffs)
ocfs_free(handle->buffs);
ocfs_free(handle);
@@ -463,15 +595,58 @@
return;
} /* ocfs_abort_trans */
+/*
+ * ocfs_journal_access
+ */
int ocfs_journal_access(ocfs_journal_handle *handle, struct buffer_head *bh, int type)
{
int status = -1;
-
- LOG_ENTRY_ARGS("(bh->b_blocknr=%lu, type=%d (\"%s\"))\n", bh->b_blocknr,
- type, (type == OCFS_JOURNAL_ACCESS_CREATE) ?
+ char *data;
+#ifdef OCFS_PARANOID_ABORTS
+ int i;
+ bool found = false;
+#endif
+ LOG_ENTRY_ARGS("(bh->b_blocknr=%lu, type=%d (\"%s\"), "
+ "bh->b_size = %hu)\n",
+ bh->b_blocknr, type,
+ (type == OCFS_JOURNAL_ACCESS_CREATE) ?
"OCFS_JOURNAL_ACCESS_CREATE" :
- "OCFS_JOURNAL_ACCESS_WRITE");
+ "OCFS_JOURNAL_ACCESS_WRITE", bh->b_size);
+ /* by taking a "read" lock, we prevent anyone from doing any
+ * IO on the buffers while in journal_get_*_access */
+ data = OCFS_BH_GET_DATA_READ(bh);
+
+#ifdef OCFS_PARANOID_ABORTS
+
+ /* search for this buffer in our copyout list. If it's already
+ * there, we need to do nothing. Otherwise, add it to the
+ * handle.
+ *
+ * Note that we want to make a copy of the buffer on the 1st access
+ * call as that when we know for sure it's clean. */
+ for(i = 0; i < handle->num_co; i++)
+ if (handle->co_buffs[i].blocknr == bh->b_blocknr) {
+ found = true;
+ break;
+ }
+
+ if (!found) {
+ i = handle->num_co;
+
+ LOG_TRACE_ARGS("Copying buffer out to position %d\n", i);
+ /* This malloc should just be a slab. */
+ handle->co_buffs[i].data = ocfs_malloc(bh->b_size);
+ if (handle->co_buffs[i].data == NULL) {
+ status = -ENOMEM;
+ goto done;
+ }
+ memcpy(handle->co_buffs[i].data, data, bh->b_size);
+ handle->co_buffs[i].blocknr = bh->b_blocknr;
+ handle->num_co++;
+ }
+#endif
+
switch (type) {
case OCFS_JOURNAL_ACCESS_CREATE:
status = journal_get_create_access(handle->k_handle, bh);
@@ -495,11 +670,15 @@
status = 0;
done:
+ OCFS_BH_PUT_DATA(bh);
+
LOG_EXIT_STATUS(status);
return(status);
-}
+} /* ocfs_journal_access */
/*
+ * ocfs_journal_dirty
+ *
* We also have to add the buffer to our handles list.
*/
int ocfs_journal_dirty(ocfs_journal_handle *handle, struct buffer_head *bh)
@@ -535,7 +714,6 @@
get_bh(bh);
handle->buffs[i] = bh;
handle->num_buffs++;
- LOG_TRACE_ARGS("Dirtied buffer at position %d\n", i);
call_jbd:
status = journal_dirty_metadata(handle->k_handle, bh);
@@ -549,12 +727,13 @@
goto done;
}
+ check_rootdir_overwrite(bh);
status = 0;
done:
LOG_EXIT_STATUS(status);
return(status);
-}
+} /* ocfs_journal_dirty */
/* We are expecting to be run on the current running transaction, so
@@ -658,6 +837,7 @@
__u64 lock_id = 0;
ocfs_inode * oin = NULL;
struct buffer_head *bh = NULL;
+ __u64 alloc_size;
LOG_ENTRY();
@@ -683,7 +863,7 @@
LOG_ERROR_STR("Could not get lock on journal!");
goto done;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
/* extend the system file if we need to - it should be exactly
* eight megs. */
@@ -692,18 +872,20 @@
fe = NULL;
status = ocfs_extend_system_file(osb, cleanup_file_id,
OCFS_JOURNAL_DEFAULT_SIZE,
- bh, NULL);
+ bh, NULL, false);
if (status < 0) {
LOG_ERROR_STR("Could not extend journal file!");
goto done;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(bh); /* read */
}
LOG_TRACE_ARGS("fe->file_size = %u.%u\n", HI(fe->file_size),
LO(fe->file_size));
LOG_TRACE_ARGS("fe->alloc_size = %u.%u\n", HI(fe->alloc_size),
LO(fe->alloc_size));
+ /* gonna need this later */
+ alloc_size = fe->alloc_size;
/* Ok, look up the inode for our journal */
args.offset = fe->this_sector;
@@ -715,9 +897,7 @@
#ifdef LINUX_2_5
inode = ocfs_iget (sb, &args);
#else
- inode =
- iget4 (osb->sb, LO (args.offset),
- (find_inode_t) ocfs_find_inode, (void *) (&args));
+ inode = ocfs_get_inode_from_offset(osb, args.offset, bh);
#endif
if (inode == NULL) {
LOG_ERROR_STR("access error");
@@ -732,19 +912,15 @@
goto done;
}
LOG_TRACE_ARGS("inode->i_size = %u\n", inode->i_size);
-
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
- status = ocfs_create_new_oin(&oin, fe->alloc_size, osb);
- status = ocfs_initialize_oin(oin, osb, 0, fe->this_sector, 0, false, NULL);
+
+ status = ocfs_create_new_oin(&oin, alloc_size, osb);
+ status = ocfs_initialize_oin(oin, osb, 0, lock_id, lock_id, false, NULL);
oin->journal_inode = true;
oin->open_hndl_cnt++;
SET_INODE_OIN(inode, oin);
LOG_TRACE_ARGS("oin->alloc_size = %u.%u\n", HI(oin->alloc_size),
LO(oin->alloc_size));
- OCFS_BH_PUT_DATA(bh);
- fe = NULL;
-
/* call the kernels journal init function now */
k_journal = journal_init_inode(inode);
if (k_journal == NULL) {
@@ -837,7 +1013,7 @@
/* release the oin here. Isn't this racy? */
if (inode_data_is_oin(inode)) {
oin = GET_INODE_OIN(inode);
- inode->i_flags &= ~S_OCFS_OIN_VALID;
+ CLEAR_INODE_OIN(inode);
oin->open_hndl_cnt--;
ocfs_release_oin(oin, true);
}
@@ -970,7 +1146,7 @@
LOG_ERROR_STATUS (status);
goto done;
}
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(publish_bh); /* read */
retval = publish->mounted;
@@ -1001,7 +1177,7 @@
LOG_ERROR_STATUS (status);
goto done;
}
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(publish_bh); /* write */
/* change it */
publish->mounted = value;
@@ -1249,6 +1425,7 @@
bool recovery_lock = false;
struct buffer_head *config_bh = NULL;
ocfs_disk_node_config_info *config = NULL;
+ __u64 alloc_size;
LOG_ENTRY_ARGS("(node_num=%d, osb->node_num = %d)\n", node_num,
osb->node_num);
@@ -1309,7 +1486,7 @@
goto done;
}
- config = (ocfs_disk_node_config_info *) OCFS_BH_GET_DATA(config_bh);
+ config = (ocfs_disk_node_config_info *) OCFS_BH_GET_DATA_READ(config_bh); /* read */
if (config->journal_version < OCFS_JOURNAL_CURRENT_VERSION) {
OCFS_BH_PUT_DATA(config_bh);
LOG_ERROR_ARGS("Cannot recover node %d, it has an old journal"\
@@ -1318,8 +1495,12 @@
}
OCFS_BH_PUT_DATA(config_bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(bh); /* read */
+
+ /* gonna need this later */
+ alloc_size = fe->alloc_size;
+
/* Ok, look up the inode for our journal */
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
args.offset = fe->this_sector;
args.fe_bh = bh;
OCFS_BH_PUT_DATA(bh);
@@ -1327,9 +1508,7 @@
#ifdef LINUX_2_5
inode = ocfs_iget (sb, &args);
#else
- inode =
- iget4 (osb->sb, LO (args.offset),
- (find_inode_t) ocfs_find_inode, (void *) (&args));
+ inode = ocfs_get_inode_from_offset(osb, args.offset, bh);
#endif
if (inode == NULL) {
LOG_ERROR_STR("access error");
@@ -1345,17 +1524,15 @@
}
LOG_TRACE_ARGS("inode->i_size = %u\n", inode->i_size);
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(bh);
- status = ocfs_create_new_oin(&oin, fe->alloc_size, osb);
+ status = ocfs_create_new_oin(&oin, alloc_size, osb);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto done;
}
- status = ocfs_initialize_oin(oin, osb, 0, fe->this_sector, 0, false,
+ status = ocfs_initialize_oin(oin, osb, 0, lock_id, lock_id, false,
NULL);
oin->journal_inode = true;
SET_INODE_OIN(inode, oin);
- OCFS_BH_PUT_DATA(bh);
status = ocfs_force_read_journal(osb, inode->i_size, oin);
if (status < 0) {
@@ -1413,7 +1590,7 @@
done:
/* close the journal file */
if (inode)
- inode->i_flags &= ~S_OCFS_OIN_VALID;
+ CLEAR_INODE_OIN(inode);
if (oin)
ocfs_release_oin(oin, true);
@@ -1477,7 +1654,7 @@
goto finally;
}
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(publish_bh); /* write */
publish->dirty = false;
publish->vote = 0;
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/namei.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -17,6 +17,10 @@
__u64 id2, __u32 type2, __u32 flags2,
ocfs_lock_res **res2, struct buffer_head **bh2,
struct inode *inode2);
+static int ocfs_fix_extent_pointers(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct buffer_head *fe_bh,
+ struct inode *inode);
static struct dentry_operations ocfs_dentry_ops = {
.d_revalidate = ocfs_dentry_revalidate // let's test it out!
@@ -33,7 +37,7 @@
ocfs_file_entry *fe = NULL;
struct buffer_head *fe_bh = NULL;
ocfs_inode *parentOin = NULL;
- __u64 parentOffset;
+ __u64 parentOffset, fe_off;
struct inode *inode = NULL;
struct super_block *sb = dir->i_sb;
struct dentry *ret;
@@ -65,17 +69,35 @@
status = ocfs_find_files_on_disk (osb, parentOffset, &(dentry->d_name), &fe_bh, NULL, dir);
if (status >= 0) {
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ unsigned long ino;
+ __u64 inode_off;
+
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
args.offset = fe->this_sector;
// args.entry = fe;
args.fe_bh = fe_bh;
+ if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+ inode_off = fe->extents[0].disk_off;
+ else
+ inode_off = fe->this_sector;
+
+ fe_off = fe->this_sector;
OCFS_BH_PUT_DATA(fe_bh);
+
fe = NULL;
+ /* we should put this guy in the hash now... */
+
+ LOG_TRACE_STR("calling iget4");
+ /* alright, allocate a new inode number for this guy
+ * and insert it into the hash. */
+ ino = iunique(osb->sb, OCFS_ROOT_INODE_NUMBER);
+ ino = ocfs_inode_hash_insert(osb, inode_off, fe_off, ino);
+
#ifdef LINUX_2_5
inode = ocfs_iget (sb, &args);
#else
inode =
- iget4 (sb, LO (args.offset),
+ iget4 (sb, ino,
(find_inode_t) ocfs_find_inode, (void *) (&args));
#endif
if (inode == NULL) {
@@ -185,8 +207,14 @@
LOG_ERROR_STATUS(status);
ocfs_abort_trans(handle);
goto leave;
- } else if (ocfs_commit_trans(handle) < 0)
- LOG_ERROR_STR("Could not complete create!");
+ } else {
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
+ handle->new_file_lockid = fe->this_sector;
+ OCFS_BH_PUT_DATA(new_fe_bh);
+ fe = NULL;
+ if (ocfs_commit_trans(handle) < 0)
+ LOG_ERROR_STR("Could not complete create!");
+ }
status = ocfs_create_new_oin (&oin, 0ULL, osb);
if (status < 0) {
@@ -194,7 +222,7 @@
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
file_off = fe->this_sector;
dirnode_off = fe->extents[0].disk_off;
@@ -214,7 +242,7 @@
if (ParentOin)
OCFS_CLEAR_FLAG (ParentOin->oin_flags, OCFS_OIN_IN_USE);
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
/* is this safe if we no longer have it locked? */
if (oin->lock_res != NULL) {
@@ -233,9 +261,8 @@
leave:
if (status >= 0 && !IS_ERR (inode)) {
- inode->i_ino = LO (oin->file_disk_off);
oin->inode = inode;
- ocfs_populate_inode (inode, fe, mode, oin);
+ ocfs_populate_inode (inode, fe, mode, oin, true);
insert_inode_hash (inode);
d_instantiate (dentry, inode);
} else if (status == -ENOSPC)
@@ -345,9 +372,7 @@
fe->create_time = fe->modify_time = OCFS_CURRENT_TIME;
- pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
- /* is this always going to be false, considering we just
- * passed OCFS_DLM_EXCLUSIVE_LOCK above? */
+ pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(lock_bh); /* read */
cache_lock = (DISK_LOCK_FILE_LOCK (pLockNode) == OCFS_DLM_ENABLE_CACHE_LOCK);
OCFS_BH_PUT_DATA(lock_bh);
@@ -386,7 +411,7 @@
if (!cache_lock)
DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
-
+
status = ocfs_read_bhs(osb, bitmapOffset, osb->vol_layout.dir_node_size, dirbhs, OCFS_BH_CACHED, NULL);
if (status < 0) {
ocfs_safefree (dirbhs);
@@ -396,12 +421,12 @@
}
for (i = 0; i < numblks; i++) {
- tmp = OCFS_BH_GET_DATA(dirbhs[i]);
+ tmp = OCFS_BH_GET_DATA_WRITE(dirbhs[i]); /* write */
memset(tmp, 0, osb->sect_size);
OCFS_BH_PUT_DATA(dirbhs[i]);
}
- new_dir = (ocfs_dir_node *) OCFS_BH_GET_DATA(dirbhs[0]);
+ new_dir = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(dirbhs[0]); /* write */
ocfs_initialize_dir_node (osb, new_dir, bitmapOffset,
fileOffset, osb->node_num);
@@ -705,6 +730,101 @@
} /* ocfs_double_lock */
/*
+ * ocfs_fix_extent_pointers
+ *
+ * If you move a file entry from one directory to another, the files
+ * offset changes (obviously). This function updates all the
+ * up_hdr_node_ptr's on any extents hanging off that file entry.
+ */
+static int ocfs_fix_extent_pointers(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct buffer_head *fe_bh,
+ struct inode *inode)
+{
+ int status = 0;
+ ocfs_file_entry *fe = NULL;
+ __u64 new_ptr;
+ struct buffer_head *extent_bh = NULL;
+ ocfs_extent_group *extent = NULL;
+ int i;
+
+ LOG_ENTRY();
+
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh);
+
+ if (!IS_VALID_FILE_ENTRY(fe)) {
+ LOG_ERROR_STATUS(status = -EINVAL);
+ goto bail;
+ }
+
+ LOG_TRACE_ARGS("fe->this_sector = %u.%u, fe->local_ext = %s, "
+ "fe->next_free_ext = %u\n",
+ HILO(fe->this_sector),
+ (fe->local_ext) ? "true" : "false",
+ fe->next_free_ext);
+
+ /* If we have local extents, then don't even worry about
+ * this. Directories, by definition, always have local_ext
+ * true, so we don't need a seperate check for them. */
+ if (fe->local_ext)
+ goto bail;
+
+
+ new_ptr = fe->this_sector;
+
+ for(i = 0; i < fe->next_free_ext; i++) {
+ status = ocfs_read_bh(osb, fe->extents[i].disk_off, &extent_bh,
+ OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_journal_access(handle, extent_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_WRITE(extent_bh);
+ if ((!IS_VALID_EXTENT_HEADER(extent))
+ && (!IS_VALID_EXTENT_DATA(extent))) {
+ LOG_ERROR_STATUS(status = -EINVAL);
+ OCFS_BH_PUT_DATA(extent_bh);
+ clear_buffer_modified(extent_bh);
+ goto bail;
+ }
+
+ /* this next line does the real work of the function. */
+ extent->up_hdr_node_ptr = new_ptr;
+
+ OCFS_BH_PUT_DATA(extent_bh);
+ extent = NULL;
+
+ status = ocfs_journal_dirty(handle, extent_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ brelse(extent_bh);
+ extent_bh = NULL;
+ }
+bail:
+ if (fe)
+ OCFS_BH_PUT_DATA(fe_bh);
+
+ if (extent_bh) {
+ if (extent)
+ OCFS_BH_PUT_DATA(extent_bh);
+ brelse(extent_bh);
+ }
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
+/*
* ocfs_rename()
*
*/
@@ -717,12 +837,13 @@
ocfs_file_entry *newfe = NULL, *oldfe = NULL;
struct buffer_head *oldfe_bh = NULL;
struct buffer_head *newfe_bh = NULL;
+ struct buffer_head *insert_bh = NULL;
ocfs_file_entry *tmpfe = NULL;
ocfs_super *osb = NULL;
__u64 oldOffset, newDirOff, oldDirOff, t;
- bool DeleteTargetOin = false;
__u64 tmpoff = 0;
bool kill_newfe = false;
+ bool delete_target_oin = false;
ocfs_bitmap_free_head *free_head = NULL;
ocfs_journal_handle *handle = NULL;
__u32 dir_lock_flags = FLAG_FILE_CREATE | FLAG_DIR;
@@ -800,7 +921,7 @@
OCFS_SET_FLAG (newOIN->oin_flags, OCFS_OIN_IN_USE);
ocfs_up_sem (&(newOIN->main_res));
status = ocfs_verify_update_oin (osb, newOIN);
- DeleteTargetOin = true;
+ delete_target_oin = true;
}
}
@@ -838,7 +959,7 @@
/* lock old_fe. we read it ourselves instead of letting
* acquire_lock do it because if it's a directory, we lock the
* dirnode instead. */
- oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA(oldfe_bh);
+ oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(oldfe_bh); /* read */
if (oldfe->attribs & OCFS_ATTRIB_DIRECTORY) {
oldfe_lockid = oldfe->extents[0].disk_off;
oldfe_flags = FLAG_DIR;
@@ -879,7 +1000,7 @@
/* In case we need to overwrite an existing file, we blow it
* away first */
if (kill_newfe) {
- newfe = (ocfs_file_entry *) OCFS_BH_GET_DATA(newfe_bh);
+ newfe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(newfe_bh); /* read */
if (newfe->attribs & OCFS_ATTRIB_DIRECTORY) {
newfe_lockid = newfe->extents[0].disk_off;
newfe_flags = FLAG_DIR;
@@ -912,6 +1033,7 @@
}
}
+
/* If we're moving to a different directory, all we've gotta
* do is copy the fe information from the old directory to the
* new one. */
@@ -938,7 +1060,7 @@
goto finally;
}
- oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA(oldfe_bh);
+ oldfe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(oldfe_bh); /* read */
memcpy(tmpfe, oldfe, sizeof(ocfs_file_entry));
OCFS_BH_PUT_DATA(oldfe_bh);
oldfe = NULL;
@@ -957,20 +1079,55 @@
DISK_LOCK_WRITER_NODE (tmpfe) = osb->node_num;
tmpfe->modify_time = OCFS_CURRENT_TIME;
- status= ocfs_insert_file(osb, tmpfe, new_dir_bh, NULL, handle, new_dir, new_inode);
+ if (tmpfe->attribs & OCFS_ATTRIB_DIRECTORY)
+ tmpoff = tmpfe->extents[0].disk_off;
+ else
+ tmpoff = tmpfe->this_sector;
+
+ down(&old_inode->i_sem);
+
+ status = ocfs_insert_file(osb, tmpfe, new_dir_bh, &insert_bh,
+ handle, old_dir, old_inode);
if (status < 0) {
+ up(&old_inode->i_sem);
LOG_ERROR_STATUS (status);
goto finally;
}
+ status = ocfs_fix_extent_pointers(osb, handle, insert_bh,
+ old_inode);
+ if (status < 0) {
+ up(&old_inode->i_sem);
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+ LOG_TRACE_ARGS("(after) tmpfe->this_sector = %u.%u\n",
+ HILO(tmpfe->this_sector));
+
+ if (oldOIN)
+ ocfs_down_sem(&oldOIN->main_res, true);
+
/* move the inode offset over to the new entry */
- if (S_ISDIR (old_dentry->d_inode->i_mode)) {
- SET_INODE_OFFSET(old_dentry->d_inode,
- tmpfe->extents[0].disk_off);
+ if (S_ISDIR(old_dentry->d_inode->i_mode)) {
+ /* the vote offset doesn't actually change for
+ * a directory, but the fe offset does... */
+ ocfs_inode_rehash(&osb->inode_hash,
+ tmpoff,
+ tmpfe->extents[0].disk_off,
+ tmpfe->this_sector);
} else {
SET_INODE_OFFSET(old_dentry->d_inode,
tmpfe->this_sector);
+ ocfs_inode_rehash(&osb->inode_hash,
+ tmpoff,
+ tmpfe->this_sector,
+ tmpfe->this_sector);
}
+ if (oldOIN) {
+ oldOIN->file_disk_off = tmpfe->this_sector;
+ ocfs_up_sem(&oldOIN->main_res);
+ }
+ up(&old_inode->i_sem);
} else {
/* Ok, we're moving inside of the same directory --
* this is easy then -- we just change the name on the
@@ -1008,9 +1165,11 @@
if (new_dentry->d_inode)
fsync_inode_buffers(old_dentry->d_inode);
}
- if (kill_newfe && DeleteTargetOin) {
- ocfs_release_cached_oin (osb, oldOIN);
- ocfs_release_oin (oldOIN, true);
+
+ /* delete the targets oin here as we've just blown it away! */
+ if (kill_newfe && newOIN && delete_target_oin) {
+ ocfs_release_cached_oin (osb, newOIN);
+ ocfs_release_oin (newOIN, true);
}
}
@@ -1057,6 +1216,8 @@
OCFS_BH_PUT_DATA(newfe_bh);
brelse(newfe_bh);
}
+ if (insert_bh)
+ brelse(insert_bh);
if (old_dir_bh)
brelse(old_dir_bh);
if (new_dir_bh)
@@ -1144,9 +1305,8 @@
goto abort_trans;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
file_off = fe->this_sector;
- printk("ok in symlink, got the fe, this sector is %u.%u\n", file_off);
OCFS_BH_PUT_DATA(new_fe_bh);
fe = NULL;
@@ -1162,9 +1322,6 @@
abort_trans:
if (handle) {
- ocfs_bitmap_free_head *f = osb->alloc_free_head;
- osb->alloc_free_head = NULL;
-
if (status < 0)
ocfs_abort_trans(handle);
else {
@@ -1172,11 +1329,6 @@
if (status < 0)
LOG_ERROR_STATUS(status);
}
-
- if (f) {
- ocfs_process_bitmap_free_head(osb, f);
- free_bitmap_free_head(f);
- }
}
if (lock_res != NULL) {
@@ -1203,11 +1355,10 @@
#else
inode->i_rdev = OCFS_NODEV;
#endif
- inode->i_ino = LO (oin->file_disk_off);
oin->inode = inode;
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(new_fe_bh);
- ocfs_populate_inode (inode, fe, S_IFLNK | S_IRWXUGO, oin);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(new_fe_bh); /* read */
+ ocfs_populate_inode (inode, fe, S_IFLNK | S_IRWXUGO, oin, true);
OCFS_BH_PUT_DATA(new_fe_bh);
fe = NULL;
@@ -1261,22 +1412,16 @@
goto leave;
}
- status = ocfs_journal_access(handle, fe_bh,
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
/* lock file ent for a dir is out in the 1st extent, this_sector
for file */
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
dir_node_ptr = fe->dir_node_ptr;
if (fe->attribs & OCFS_ATTRIB_DIRECTORY) {
lockId = fe->extents[0].disk_off;
lockFlags = (FLAG_DIR | FLAG_FILE_RENAME);
- status = ocfs_read_bh(osb, lockId, &lockbh, OCFS_BH_CACHED, inode);
+ status = ocfs_read_bh(osb, lockId, &lockbh, OCFS_BH_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -1293,32 +1438,40 @@
lockFlags = FLAG_FILE_RENAME;
lockbh = fe_bh;
}
+ OCFS_BH_PUT_DATA(fe_bh);
+ fe = NULL;
- /* Change the name and write it back.... */
- fe->filename[0] = '\0';
- strncpy (fe->filename, file_name->name, file_name->len);
- fe->filename[file_name->len] = '\0';
-
- DISK_LOCK_SEQNUM (fe) = changeSeqNum;
-
- /* Set the Valid bit here */
- SET_VALID_BIT (fe->sync_flags);
- fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE);
-
- status = ocfs_read_bh (osb, dir_node_ptr, &dirbh, OCFS_BH_CACHED, inode);
+ status = ocfs_read_bh (osb, dir_node_ptr, &dirbh, OCFS_BH_CACHED,
+ inode);
if (status < 0) {
- OCFS_BH_PUT_DATA(fe_bh);
LOG_ERROR_STATUS(status);
goto leave;
}
+
status = ocfs_journal_access(handle, dirbh, OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
- OCFS_BH_PUT_DATA(fe_bh);
LOG_ERROR_STATUS (status);
goto leave;
}
+ status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
- pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(dirbh);
+ /* preserve bh lock ordering so grab the write on dirbh 1st. */
+ pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(dirbh); /* write */
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
+
+ /* Change the actual name now */
+ fe->filename[0] = '\0';
+ strncpy (fe->filename, file_name->name, file_name->len);
+ fe->filename[file_name->len] = '\0';
+ DISK_LOCK_SEQNUM (fe) = changeSeqNum;
+ SET_VALID_BIT (fe->sync_flags);
+ fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE);
+
+ /* mark the dirnode as dirty */
pLockNode->index_dirty = 1;
pLockNode->bad_off = (fe->this_sector - dir_node_ptr) / osb->sect_size;
pLockNode->bad_off -= 1;
@@ -1331,20 +1484,21 @@
needs_reindex = (index < pLockNode->num_ent_used);
if (needs_reindex) {
- memmove (&pLockNode->index[index], &pLockNode->index[index + 1],
+ memmove (&pLockNode->index[index],
+ &pLockNode->index[index + 1],
pLockNode->num_ent_used - (index + 1));
pLockNode->index[pLockNode->num_ent_used - 1] = pLockNode->bad_off;
/* is this a safe cast? */
flags = OCFS_FE_CACHE_FLAGS(osb, ((ocfs_file_entry *) pLockNode));
- OCFS_BH_PUT_DATA(dirbh);
- status = ocfs_journal_dirty(handle, dirbh);
- if (status < 0) {
- OCFS_BH_PUT_DATA(fe_bh);
- LOG_ERROR_STATUS (status);
- goto leave;
- }
}
+ OCFS_BH_PUT_DATA(dirbh);
+ status = ocfs_journal_dirty(handle, dirbh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
flags = OCFS_FE_CACHE_FLAGS(osb, fe);
OCFS_BH_PUT_DATA(fe_bh);
fe = NULL;
@@ -1442,7 +1596,7 @@
* deleting? in that case, we also need to read the
* head of it's first dirnode which would have been
* done implicitely by locking it. */
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (fe->attribs & OCFS_ATTRIB_DIRECTORY) {
status = ocfs_read_bh(osb, fe->extents[0].disk_off,
&lock_bh, OCFS_BH_CACHED, inode);
@@ -1477,7 +1631,7 @@
}
/* lock the file entry */
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (fe->attribs & OCFS_ATTRIB_DIRECTORY) {
lock_id = fe->extents[0].disk_off;
@@ -1504,19 +1658,43 @@
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ /* need to preserve locking order, so take a 'write' lock on
+ * the dirnode sector first. it won't get passed to
+ * journal_dirty until ocfs_remove_file so clean up the write
+ * lock on errors before that */
+ OCFS_BH_GET_DATA_WRITE(lock_node_bh);
+ OCFS_BH_PUT_DATA(lock_node_bh);
+
+ /* we call ocfs_clear_buffer_modified in several error cases
+ * here if we set the modify bit on this buffer, but haven't
+ * journal_dirtied it yet. Otherwise, it'll stay modified even
+ * after the abort_trans. */
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
is_dir = fe->attribs & OCFS_ATTRIB_DIRECTORY;
if (is_dir) {
__u8 numused;
ocfs_dir_node *pLockNode;
- pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA(lock_bh);
+ pLockNode = (ocfs_dir_node *)OCFS_BH_GET_DATA_READ(lock_bh);/* read */
+ if (!IS_VALID_DIR_NODE(pLockNode)) {
+ OCFS_BH_PUT_DATA(lock_bh);
+ OCFS_BH_PUT_DATA(fe_bh);
+ ocfs_clear_buffer_modified(fe_bh);
+ ocfs_clear_buffer_modified(lock_node_bh);
+ status = -EIO;
+ LOG_TRACE_STR("Uhoh, invalid dirnode found!");
+ goto leave;
+ }
+
numused = pLockNode->num_ent_used;
OCFS_BH_PUT_DATA(lock_bh);
if (numused && !(flags & FLAG_DEL_NAME)) {
OCFS_BH_PUT_DATA(fe_bh);
+ ocfs_clear_buffer_modified(fe_bh);
+ ocfs_clear_buffer_modified(lock_node_bh);
status = -ENOTEMPTY;
+ LOG_TRACE_ARGS("-ENOTEMPY, numused = %u\n", numused);
goto leave;
}
}
@@ -1526,6 +1704,8 @@
* doing a rename so skip the 1st part of this function. */
status = 0;
OCFS_BH_PUT_DATA(fe_bh);
+ ocfs_clear_buffer_modified(fe_bh);
+ ocfs_clear_buffer_modified(lock_node_bh);
goto delete_entry;
}
@@ -1543,11 +1723,13 @@
if (is_dir) {
/* Iterate through all the dir nodes for this
* directory and mark them to be freed */
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
status = ocfs_free_directory_block (osb, fe, free_head, inode);
OCFS_BH_PUT_DATA(fe_bh);
if (status < 0) {
OCFS_BH_PUT_DATA(fe_bh);
+ ocfs_clear_buffer_modified(fe_bh);
+ ocfs_clear_buffer_modified(lock_node_bh);
LOG_ERROR_STATUS (status);
goto leave;
}
@@ -1556,6 +1738,8 @@
* this file so we can remove them after commit. */
status = ocfs_free_file_extents (osb, fe_bh, free_head);
if (status < 0) {
+ ocfs_clear_buffer_modified(fe_bh);
+ ocfs_clear_buffer_modified(lock_node_bh);
LOG_ERROR_STATUS (status);
goto leave;
}
@@ -1563,7 +1747,8 @@
delete_entry:
/* remove the fe from the dirnode.*/
- status = ocfs_remove_file(osb, fe_bh, lock_node_bh, handle, parent_inode, inode);
+ status = ocfs_remove_file(osb, fe_bh, lock_node_bh, handle,
+ parent_inode, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/nm.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -29,7 +29,7 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NM
-static struct inode * ocfs_get_inode_from_offset(ocfs_super * osb, __u64 fileoff);
+static struct inode * ocfs_get_inode_no_bh(ocfs_super * osb, __u64 voteoff);
static int ocfs_release_dir_cache_lock (ocfs_super *osb, struct buffer_head **dir_bhs, struct inode *inode);
static inline int get_process_vote_action(ocfs_super * osb, ocfs_lock_res *lockres, __u32 node_num, __u32 flags, int status, bool *master_alive, ocfs_inode **oin);
static int ocfs_disk_update_resource (ocfs_super * osb, ocfs_lock_res * lock_res, struct buffer_head **bh, __u32 timeout, struct inode *inode);
@@ -163,7 +163,7 @@
LOG_ENTRY_ARGS("(vote_node = %d, bh = 0x%x)\n", vote_node, bh);
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bh); /* read */
if (osb->last_publ_seq_num[vote_node] == publish->publ_seq_num){
LOG_TRACE_ARGS("Already voted on node %d, seqnum (%u.%u)\n",
@@ -237,7 +237,7 @@
ocfs_node_config_hdr *node_cfg_hdr = NULL;
__u64 curr_node_map;
__u64 cfg_seq_num;
- int which;
+ int which, pruned;
int flush_misses = 0;
struct buffer_head *bh = NULL;
@@ -301,6 +301,10 @@
}
}
+ /* try to prune some bh_sem hash entries if list is too long */
+ pruned = ocfs_bh_sem_hash_prune();
+ LOG_TRACE_ARGS("pruned %d entries from nm thread\n", pruned);
+
/* lock publish to prevent overwrites from vote_req and vote_reset */
down (&(osb->publish_lock));
@@ -321,7 +325,7 @@
up (&(osb->publish_lock));
/* If another node was added to the config read and update the cfg */
- node_cfg_hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(osb->cfg_bhs[1]);
+ node_cfg_hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_READ(osb->cfg_bhs[1]); /* read */
num_nodes = node_cfg_hdr->num_nodes;
cfg_seq_num = node_cfg_hdr->cfg_seq_num;
OCFS_BH_PUT_DATA(osb->cfg_bhs[1]);
@@ -355,7 +359,7 @@
/* Check for the highest node looking for a vote, if anybody is looking */
for (i = 0, which = OCFS_VOLCFG_NEWCFG_SECTORS; i < num_nodes; i++, which++) {
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(osb->cfg_bhs[which]);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(osb->cfg_bhs[which]); /* read */
if (publish->time == (__u64) 0)
goto loop;
@@ -392,13 +396,19 @@
if ((vote_node != OCFS_INVALID_NODE_NUM) &&
(vote_node != osb->node_num)) {
+ __s32 voted;
LOG_TRACE_ARGS("vote_node = %d\n", vote_node);
bh = osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS
+ osb->node_num];
down(&(osb->publish_lock));
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(bh);
- if (publish->vote) {
+
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(bh); /* read */
+ voted = publish->vote;
+ OCFS_BH_PUT_DATA(bh);
+
+ if (voted) {
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(bh); /* write */
publish->vote = 0;
OCFS_BH_PUT_DATA(bh);
status = ocfs_write_bh(osb, bh, 0, NULL);
@@ -406,8 +416,7 @@
LOG_ERROR_STATUS (status);
goto finally;
}
- } else
- OCFS_BH_PUT_DATA(bh);
+ }
publish = NULL;
up(&(osb->publish_lock));
@@ -487,7 +496,7 @@
goto finally;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(*bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_READ(*bh); /* read */
lock_res->lock_type = DISK_LOCK_FILE_LOCK (fe);
lock_res->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
lock_res->oin_openmap = DISK_LOCK_OIN_MAP (fe);
@@ -587,8 +596,10 @@
/* If we found the lockres in the hash and it's asked for, we still
* need to return a buffer_head */
if (status >= 0) {
+ int flags = (OCFS_NONCACHED(osb, (*lockres)->sector_num) ?
+ 0 : OCFS_BH_CACHED);
status = ocfs_read_bh(osb, (*lockres)->sector_num, b,
- OCFS_BH_CACHED, NULL);
+ flags, NULL);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -623,62 +634,62 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DLM
/*
- * ocfs_get_inode_from_offset()
+ * ocfs_get_inode_no_bh()
*
*/
-struct inode * ocfs_get_inode_from_bh(ocfs_super * osb, struct buffer_head *bh)
+static struct inode * ocfs_get_inode_no_bh(ocfs_super * osb, __u64 voteoff)
{
+ int status;
struct inode *inode = NULL;
- ocfs_file_entry *fe = NULL;
- ocfs_find_inode_args args;
+ struct buffer_head *fe_bh = NULL;
+ __u64 fe_off = 0;
- LOG_ENTRY ();
-
- args.fe_bh = bh;
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(bh);
- args.offset = fe->this_sector;
- OCFS_BH_PUT_DATA(bh);
- fe = NULL;
+ LOG_ENTRY_ARGS("(voteoff = %u.%u)\n", HILO(voteoff));
-#ifdef LINUX_2_5
- inode = ocfs_iget(osb->sb, &args);
-#else
- inode = iget4 (osb->sb, (__u32) LO (args.offset),
- (find_inode_t) ocfs_find_inode,
- (void *) (&args));
-#endif
- if (inode != NULL && is_bad_inode (inode)) {
- iput (inode);
- inode = NULL;
+ if (voteoff == osb->vol_layout.root_start_off) {
+ inode = osb->sb->s_root->d_inode;
+ if (inode)
+ atomic_inc(&inode->i_count);
+ goto bail;
}
- if (inode)
- SET_BH_SEQNUM(inode, bh);
- LOG_EXIT_PTR (inode);
- return inode;
-} /* ocfs_get_inode_from_offset */
+ /* try to lookup the offset in the hash. If it's in there,
+ * then we have an inode and we should continue. Get the fe
+ * offset and read that in. */
+ /* if it's not in the inode hash, then it can't have an inode
+ * in memory. */
+ if (ocfs_inode_hash_lookup(&osb->inode_hash, voteoff, &fe_off) == 0)
+ goto bail;
+ LOG_TRACE_ARGS("got fe_off = %u.%u\n", HILO(fe_off));
-/*
- * ocfs_get_inode_from_offset()
- *
- */
-static struct inode * ocfs_get_inode_from_offset(ocfs_super * osb, __u64 fileoff)
-{
- int status;
- struct inode *inode = NULL;
- struct buffer_head *fe_bh = NULL;
+ /* only root dir has that fe_off in inode hash and we
+ * should've caught that case above... */
+ if (fe_off == 0) {
+ LOG_ERROR_STATUS(-EFAIL);
+ goto bail;
+ }
- LOG_ENTRY ();
-
- status = ocfs_read_bh(osb, fileoff, &fe_bh, OCFS_BH_CACHED, NULL);
- inode = ocfs_get_inode_from_bh(osb, fe_bh);
- brelse(fe_bh);
+ /* use the fe_off passed back as the offset might be for a
+ * directory and we actually want to give the FE bh. */
+ status = ocfs_read_bh(osb, fe_off, &fe_bh, OCFS_BH_CACHED, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ inode = ocfs_get_inode_from_offset(osb, voteoff, fe_bh);
+ if (inode)
+ SET_BH_SEQNUM(inode, fe_bh);
+
+bail:
+ if (fe_bh)
+ brelse(fe_bh);
+
LOG_EXIT_PTR (inode);
return inode;
-} /* ocfs_get_inode_from_offset */
+} /* ocfs_get_inode_no_bh */
@@ -851,7 +862,7 @@
}
/* if we're lucky this will not need to do an IO */
- inode = ocfs_get_inode_from_offset(osb, lock_id);
+ inode = ocfs_get_inode_no_bh(osb, lock_id);
status = ocfs_find_update_res (osb, lock_id, &lockres, NULL, NULL,
(OCFS_NM_HEARTBEAT_TIME/2), inode);
if (status < 0) {
@@ -873,13 +884,13 @@
printk("ocfs_process_vote: %s request for lockid: %u.%u, action: %s, type: %s\n",
flags & FLAG_RELEASE_LOCK ? "RELEASE" :
- (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "INVALID!!!"), lock_id,
+ (flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote" );
if (disk_vote) {
/* Zero out the vote for everybody, if any already set and hung */
- vote = (ocfs_vote *) OCFS_BH_GET_DATA(vote_bh);
+ vote = (ocfs_vote *) OCFS_BH_GET_DATA_WRITE(vote_bh); /* write */
for (i = 0; i < num_nodes; i++)
vote->vote[i] = 0;
OCFS_BH_PUT_DATA(vote_bh);
@@ -975,7 +986,7 @@
if (status < 0)
LOG_ERROR_STATUS (status);
if (status >= 0) {
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
DISK_LOCK_CURRENT_MASTER (fe) = node_num;
OCFS_BH_PUT_DATA(fe_bh);
status = ocfs_write_bh(osb, fe_bh, 0, inode);
@@ -1107,7 +1118,7 @@
break;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
is_dir = IS_VALID_DIR_NODE(fe);
is_locked = DISK_LOCK_FILE_LOCK (fe) > OCFS_DLM_NO_LOCK;
if (vote_type == CHANGE_MASTER) {
@@ -1153,20 +1164,28 @@
/* need to do the write only if fe lock values need to change */
if (is_locked || vote_type == CHANGE_MASTER) {
if (vote_type == RELEASE_CACHE) {
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
OCFS_BH_PUT_DATA(fe_bh);
- }
- status = ocfs_write_bh(osb, fe_bh, 0, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- brelse(fe_bh);
- break;
- }
- if (vote_type == RELEASE_CACHE)
+ status = ocfs_write_bh(osb, fe_bh, 0, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ brelse(fe_bh);
+ break;
+ }
lockres->lock_type = lockres->lock_state = OCFS_DLM_NO_LOCK;
- else
+ } else {
+ if (!is_dir) {
+ /* fe_bh was written in ocfs_release_dir_cache_lock */
+ status = ocfs_write_bh(osb, fe_bh, 0, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ brelse(fe_bh);
+ break;
+ }
+ }
lockres->master_node_num = node_num;
+ }
}
brelse(fe_bh);
vote_response = FLAG_VOTE_NODE;
@@ -1182,7 +1201,7 @@
LOG_ERROR_STATUS (status);
break;
}
- fe = (ocfs_file_entry *)OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *)OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) ||
(!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
@@ -1239,9 +1258,6 @@
break;
}
- if (inode)
- iput(inode);
-
if (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE) &&
((flags & FLAG_ACQUIRE_LOCK && vote_response==FLAG_VOTE_NODE) ||
(flags & FLAG_RELEASE_LOCK))) {
@@ -1294,9 +1310,9 @@
}
}
}
-
+
if (disk_vote) {
- vote = (ocfs_vote *) OCFS_BH_GET_DATA(vote_bh);
+ vote = (ocfs_vote *) OCFS_BH_GET_DATA_WRITE(vote_bh); /* write */
vote->dir_ent = lock_id;
vote->vote_seq_num = seq_num;
vote->open_handle = open_handle;
@@ -1315,8 +1331,10 @@
ocfs_put_lockres(lockres);
}
- if (inc_inode_seq) {
- ocfs_inc_inode_seq(osb, inode);
+ if (inode) {
+ if (inc_inode_seq)
+ ocfs_inc_inode_seq(osb, inode);
+ iput(inode);
}
leave:
LOG_EXIT_STATUS (status);
@@ -1398,10 +1416,15 @@
LOG_ENTRY_ARGS ("(osb=0x%08x, dirnd=0x%08x)\n", osb, dirnode);
- dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dir_bhs[0]);
+ /* need to mark ALL buffers in a dir for write before calling write_bhs */
+ for (i = 0; i<256; i++) {
+ OCFS_BH_GET_DATA_WRITE(dir_bhs[i]);
+ OCFS_BH_PUT_DATA(dir_bhs[i]);
+ }
+ dirnode = (ocfs_dir_node *) OCFS_BH_GET_DATA_READ(dir_bhs[0]); /* read */
for(i = 0; i < dirnode->num_ent_used; i++) {
- fe = (ocfs_file_entry *) FILEENT_GETBH(dirnode, dir_bhs, i);
+ fe = (ocfs_file_entry *) FILEENT_GETBH_WRITE(dirnode, dir_bhs, i); /* write */
if (DISK_LOCK_FILE_LOCK(fe) == OCFS_DLM_ENABLE_CACHE_LOCK)
DISK_LOCK_FILE_LOCK(fe) = OCFS_DLM_NO_LOCK;
@@ -1426,21 +1449,27 @@
brelse(dir_bhs[i]);
memset(dir_bhs, 0, dirblks * sizeof(*dir_bhs));
- status = ocfs_read_bhs(osb, dirnode->next_node_ptr,
- dirblks, dir_bhs, OCFS_BH_CACHED, inode);
+ status = ocfs_read_bhs(osb, next_node_ptr,
+ osb->vol_layout.dir_node_size,
+ dir_bhs, OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
- dirnode = (ocfs_dir_node *)OCFS_BH_GET_DATA(dir_bhs[0]);
+ for (i = 0; i<256; i++) {
+ OCFS_BH_GET_DATA_WRITE(dir_bhs[i]);
+ OCFS_BH_PUT_DATA(dir_bhs[i]);
+ }
+
+ dirnode = (ocfs_dir_node *)OCFS_BH_GET_DATA_WRITE(dir_bhs[0]); /* write */
if(!IS_VALID_DIR_NODE(dirnode))
break;
DISK_LOCK_FILE_LOCK(dirnode) = OCFS_DLM_NO_LOCK;
for(i = 0; i < dirnode->num_ent_used; i++) {
- fe = FILEENT_GETBH(dirnode, dir_bhs, i);
+ fe = FILEENT_GETBH_WRITE(dirnode, dir_bhs, i); /* write */
if (DISK_LOCK_FILE_LOCK(fe) == OCFS_DLM_ENABLE_CACHE_LOCK)
DISK_LOCK_FILE_LOCK(fe) = OCFS_DLM_NO_LOCK;
FILEENT_PUTBH(dirnode, dir_bhs, i);
@@ -1459,7 +1488,7 @@
}
}
- bail:
+bail:
if (dirnode)
OCFS_BH_PUT_DATA(dir_bhs[0]);
Modified: trunk/src/oin.c
===================================================================
--- trunk/src/oin.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/oin.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -32,7 +32,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
/* Make sure that what we found is not a directory. */
if (!(oin->oin_flags & OCFS_OIN_DIRECTORY)) {
@@ -179,11 +179,11 @@
}
}
- pLockRes = oin->lock_res;
- ocfs_get_lockres (pLockRes);
+ /* ??? we need to the lock resource before updating it */
+ if (oin->lock_res) {
+ ocfs_get_lockres(oin->lock_res);
- /* ??? we need to the lock resource before updating it */
- if (pLockRes) {
+ pLockRes = oin->lock_res;
pLockRes->lock_type = DISK_LOCK_FILE_LOCK (fe);
pLockRes->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
pLockRes->oin_openmap = DISK_LOCK_OIN_MAP (fe);
@@ -191,8 +191,9 @@
pLockRes->last_read_time = DISK_LOCK_LAST_READ (fe);
pLockRes->reader_node_num = DISK_LOCK_READER_NODE (fe);
pLockRes->writer_node_num = DISK_LOCK_WRITER_NODE (fe);
+
+ ocfs_put_lockres(oin->lock_res);
}
- ocfs_put_lockres (pLockRes);
status = 0;
} else {
@@ -244,7 +245,7 @@
LOG_ERROR_STATUS(status = -ENOMEM);
goto leave;
}
- tmp = OCFS_BH_GET_DATA(fe_bh);
+ tmp = OCFS_BH_GET_DATA_READ(fe_bh); /* read */
memcpy(fe, tmp, sizeof(ocfs_file_entry));
OCFS_BH_PUT_DATA(fe_bh);
@@ -321,7 +322,7 @@
}
while (1) {
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
if (!IS_VALID_EXTENT_DATA (extent)) {
LOG_ERROR_STATUS(status = -EFAIL);
@@ -435,7 +436,7 @@
int status = 0;
ocfs_inode *oin = NULL;
- LOG_ENTRY ();
+ LOG_ENTRY_ARGS("(alloc_size = %u.%u)\n", HILO(alloc_size));
OCFS_ASSERT (osb);
@@ -514,7 +515,7 @@
goto finally;
}
- volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA(hdr_bh);
+ volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA_READ(hdr_bh); /* read */
root_off = volDiskHdr->root_off;
int_off = volDiskHdr->internal_off;
OCFS_BH_PUT_DATA(hdr_bh);
@@ -553,8 +554,10 @@
LOG_ERROR_STATUS (status);
goto finally;
}
-
+ /* put the offset/inode number in the inode cache thingy. */
+ ocfs_inode_hash_insert(osb, osb->vol_layout.root_start_off,
+ 0, OCFS_ROOT_INODE_NUMBER);
// oin->Parentoin = NULL; /* Root has no parent */
/* Set the Rootdirectories root Dir Node */
@@ -616,7 +619,7 @@
if (inode) {
__u64 savedOffset = oin->file_disk_off;
- SET_INODE_OIN (inode, NULL);
+ CLEAR_INODE_OIN(inode);
SET_INODE_OFFSET (inode, savedOffset);
LOG_TRACE_ARGS ("inode oin cleared / flags: %d / offset: %u.%u\n",
inode->i_flags, savedOffset);
@@ -629,13 +632,10 @@
ocfs_del_sem (&(oin->main_res));
OCFS_CLEAR_FLAG (oin->oin_flags, OCFS_INITIALIZED_MAIN_RESOURCE);
}
- if (oin->oin_flags & OCFS_INITIALIZED_PAGING_IO_RESOURCE) {
- ocfs_del_sem (&(oin->paging_io_res));
- OCFS_CLEAR_FLAG (oin->oin_flags,
- OCFS_INITIALIZED_PAGING_IO_RESOURCE);
- }
if (FreeMemory) {
+ /* clean out the oin */
+ memset(oin, 0, sizeof(ocfs_inode));
#ifdef OCFS_MEM_DBG
ocfs_dbg_slab_free (OcfsGlobalCtxt.oin_cache, oin);
#else
@@ -701,7 +701,10 @@
lockResource = (ocfs_lock_res *) oin->lock_res;
if (lockResource == NULL) {
- LOG_ERROR_STR ("lockres=null");
+ LOG_ERROR_ARGS("lockres=null, oin->file_disk_off "
+ "= %u.%u\n",
+ HILO(oin->file_disk_off));
+
goto bail;
}
Modified: trunk/src/osb.c
===================================================================
--- trunk/src/osb.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/osb.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -33,6 +33,7 @@
vol_layout->cluster_size = (__u32) (vdh->cluster_size);
osb->obj_id.type = OCFS_TYPE_OSB;
osb->obj_id.size = sizeof (ocfs_super);
+ INIT_LIST_HEAD (&(osb->osb_next));
#define HASHBITS 12
@@ -41,6 +42,7 @@
goto bail;
}
+
ocfs_init_sem (&(osb->osb_res));
ocfs_init_sem (&(osb->map_lock));
ocfs_init_sem (&(osb->log_lock));
@@ -165,13 +167,11 @@
/* Read the Publish Sector of local Node */
offset = vol_layout->publ_sect_off + (osb->node_num * osb->sect_size);
status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
-/* status = ocfs_read_force_disk_ex (osb, (void **)&publish,
- osb->sect_size, osb->sect_size, offset);*/
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_WRITE(publish_bh); /* write */
/* Zero out the time stamp to write a new value */
publish->time = 0;
@@ -179,6 +179,7 @@
OCFS_BH_PUT_DATA(publish_bh);
publish = NULL;
+
status = ocfs_write_bh (osb, publish_bh, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -200,6 +201,13 @@
for(i = 0; i < OCFS_MAXIMUM_NODES; i++)
osb->last_publ_seq_num[i] = (__u64) (-1);
+ /* init the inode hash */
+ status = ocfs_inode_hash_init(osb);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+
/* We might need to add a variable in Global List of osb to */
/* delay any creation, if any other node is already creating a file */
@@ -288,7 +296,7 @@
/* Check to see who else is alive. */
/* Kick in the NM i/f to start writing time stamps to the disk */
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_verify_volume */
@@ -318,7 +326,7 @@
goto finally;
}
- publish = (ocfs_publish *) OCFS_BH_GET_DATA(publish_bh);
+ publish = (ocfs_publish *) OCFS_BH_GET_DATA_READ(publish_bh); /* read */
/* we copy these two flags out of the publish sector and then unlock
* the bh as other functions will need to modify it. */
dirty = publish->dirty;
@@ -440,7 +448,8 @@
/* Remove the osb from the global linked list of all osb structures. */
/* The Global Link List is mainted for the whole driver */
ocfs_down_sem (&(OcfsGlobalCtxt.res), true);
- list_del (&(osb->osb_next));
+ if (!list_empty(&(osb->osb_next)))
+ list_del (&(osb->osb_next));
ocfs_up_sem (&(OcfsGlobalCtxt.res));
for (i=0; i<32; i++)
@@ -554,12 +563,12 @@
}
for (i = 0; i < OCFS_DEFAULT_DIR_NODE_SECTS; i++) {
- char *sect = OCFS_BH_GET_DATA(dirnode_bhs[i]);
+ char *sect = OCFS_BH_GET_DATA_WRITE(dirnode_bhs[i]); /* write */
memset(sect, 0, osb->sect_size);
OCFS_BH_PUT_DATA(dirnode_bhs[i]);
}
- NewDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA(dirnode_bhs[0]);
+ NewDirNode = (ocfs_dir_node *) OCFS_BH_GET_DATA_WRITE(dirnode_bhs[0]); /* write */
osb->vol_layout.root_start_off = bitmapOffset;
ocfs_initialize_dir_node (osb, NewDirNode, bitmapOffset, fileOffset, osb->node_num);
NewDirNode->dir_node_flags |= DIR_NODE_FLAG_ROOT;
@@ -580,7 +589,7 @@
goto bail;
}
- volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA(hdr_bh);
+ volDiskHdr = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA_WRITE(hdr_bh); /* write */
volDiskHdr->root_off = osb->vol_layout.root_start_off;
volDiskHdr->internal_off = osb->vol_layout.root_int_off;
OCFS_BH_PUT_DATA(hdr_bh);
Modified: trunk/src/sem.c
===================================================================
--- trunk/src/sem.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/sem.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -38,8 +38,8 @@
if (res->magic != OCFS_SEM_MAGIC) {
LOG_ERROR_ARGS("semaphore magic value is bad!\n");
- ret = false;
- goto bail;
+
+ BUG();
}
if (res->pid == 0) {
@@ -68,7 +68,6 @@
}
}
-bail:
LOG_EXIT_ULONG (ret);
return ret;
} /* ocfs_down_sem */
@@ -87,7 +86,8 @@
if (res->magic != OCFS_SEM_MAGIC) {
LOG_ERROR_ARGS("semaphore magic value is bad!\n");
- goto bail;
+
+ BUG();
}
if (res->count && current->pid == res->pid) {
@@ -98,7 +98,6 @@
}
}
-bail:
LOG_EXIT ();
return;
} /* ocfs_up_sem */
Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/super.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -69,7 +69,7 @@
module_param (ip_address, charp, 0);
module_param (ip_port, ulong, 0);
module_param (guid, charp, 0);
-module_param (cs, ulong, 0);
+module_param (cs, int, 0);
module_param (comm_voting, ulong, 0);
#else /* LINUX_2_5 */
MODULE_PARM (node_name, "s");
@@ -88,7 +88,7 @@
MODULE_PARM_DESC(ip_port, "Port number for the network dlm on this node");
MODULE_PARM (guid, "s");
MODULE_PARM_DESC(guid, "GUID for this machine");
-MODULE_PARM (cs, "l");
+MODULE_PARM (cs, "i");
MODULE_PARM_DESC(cs, "Checksum");
MODULE_PARM (comm_voting, "l");
MODULE_PARM_DESC(comm_voting, "Enable/Disable network dlm");
@@ -426,6 +426,12 @@
LOG_ERROR_STATUS (status);
goto leave;
}
+
+ status = ocfs_bh_sem_hash_init();
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
/* Initialize the DLM */
status = ocfs_init_dlm ();
@@ -457,6 +463,9 @@
leave:
if (status < 0) {
+ if (OcfsGlobalCtxt.bh_sem_hash && ocfs_bh_sem_hash_destroy() < 0)
+ LOG_ERROR_STR("failed to destroy bh_sem hashtable");
+
/* Free up lookaside lists */
if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
ocfs_free_mem_lists ();
@@ -608,6 +617,9 @@
ocfs_down_sem (&(OcfsGlobalCtxt.res), true);
OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_SHUTDOWN_VOL_THREAD);
+ if (ocfs_bh_sem_hash_destroy() < 0)
+ LOG_ERROR_STR("failed to destroy bh_sem hashtable");
+
if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
ocfs_free_mem_lists ();
@@ -670,7 +682,7 @@
LOG_ERROR_STR("failed to read bitmap data");
return -EIO;
}
- bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA(bh);
+ bm_lock = (ocfs_bitmap_lock *)OCFS_BH_GET_DATA_READ(bh); /* read */
if (numbits >= bm_lock->used_bits)
freebits = numbits - bm_lock->used_bits;
@@ -742,6 +754,10 @@
OcfsGlobalCtxt.extent_cache = kmem_cache_create ("extent_cache",
sizeof(ocfs_extent) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
NULL, NULL);
+
+ OcfsGlobalCtxt.bh_sem_cache = kmem_cache_create ("bh_sem_cache",
+ sizeof(ocfs_bh_sem), 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
@@ -759,6 +775,7 @@
kmem_cache_destroy (OcfsGlobalCtxt.fe_cache);
kmem_cache_destroy (OcfsGlobalCtxt.lockres_cache);
kmem_cache_destroy (OcfsGlobalCtxt.extent_cache);
+ kmem_cache_destroy (OcfsGlobalCtxt.bh_sem_cache);
OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
} /* ocfs_free_mem_lists */
@@ -800,8 +817,8 @@
for (i=0; i<2; i++)
wait_on_buffer(bhs[i]);
- vol_header = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA(bhs[0]);
- vol_label = (ocfs_vol_label *) OCFS_BH_GET_DATA(bhs[1]);
+ vol_header = (ocfs_vol_disk_hdr *) OCFS_BH_GET_DATA_READ(bhs[0]); /* read */
+ vol_label = (ocfs_vol_label *) OCFS_BH_GET_DATA_READ(bhs[1]); /* read */
LOG_TRACE_STR ("ocfs_verify_volume...");
status = ocfs_verify_volume (vol_header);
@@ -1085,6 +1102,9 @@
// list_del(&osb->osb_next); /* this has been moved into ocfs_delete_osb */
ocfs_up_sem (&(OcfsGlobalCtxt.res));
+ /* destroy the inode hash */
+ ocfs_inode_hash_destroy(&osb->inode_hash);
+
osb->vol_state = VOLUME_DISMOUNTED;
if (AcquiredOSB) {
ocfs_up_sem (&(osb->osb_res));
Modified: trunk/src/sysfile.c
===================================================================
--- trunk/src/sysfile.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/sysfile.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -90,7 +90,7 @@
LOG_ERROR_STATUS(status);
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
memset (fe, 0, sizeof (ocfs_file_entry));
/* Set the Flag to use the Local Extents */
@@ -118,11 +118,8 @@
}
leave:
- if (fe_bh) {
- if (fe)
- OCFS_BH_PUT_DATA(fe_bh);
+ if (fe_bh)
brelse(fe_bh);
- }
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_init_system_file */
@@ -166,7 +163,7 @@
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (!IS_VALID_FILE_ENTRY (fe)) {
LOG_ERROR_STATUS(status = -EINVAL);
@@ -223,35 +220,7 @@
return status;
} /* ocfs_read_system_file */
-
/*
- * ocfs_write_system_file()
- * This should disappear actually -- we oughta just use ocfs_write_bhs
- *
- * NOTE: 'Length' and 'Offset' are essentially ignored -- the
- * entire buffer_head array is written out to disk!
- */
-int ocfs_write_system_file (ocfs_super * osb, __u64 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset)
-{
- int status = 0;
- __u32 numblocks;
-
- LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId);
-
- if (Offset != 0)
- LOG_ERROR_STR("Asked to write at non zero offset, but we" \
- " don't support that yet!");
-
- numblocks = (Length + 511) >> 9;
- status = ocfs_write_bhs(osb, bhs, numblocks, 0, NULL);
- if (status < 0)
- LOG_ERROR_STATUS (status);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_write_system_file */
-
-/*
* ocfs_file_to_disk_off()
*
*/
@@ -275,7 +244,7 @@
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (!IS_VALID_FILE_ENTRY (fe)) {
LOG_ERROR_STATUS(status = -EINVAL);
@@ -295,7 +264,7 @@
/* Return the disk offset of first run . */
StartOffset = (IoRuns[0].disk_off);
- leave:
+leave:
if (fe_bh) {
OCFS_BH_PUT_DATA(fe_bh);
brelse(fe_bh);
@@ -340,7 +309,7 @@
goto leave;
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (!IS_VALID_FILE_ENTRY (fe)) {
LOG_ERROR_ARGS("offset=%u.%u", HILO (offset));
@@ -369,7 +338,7 @@
* of course, if you've already read it off disk, then give us fe_bh to avoid
* an extra read. We always do the write out of the new fe.
*/
-int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle)
+int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle, bool zero)
{
int status = 0;
__u64 actualDiskOffset = 0, actualLength = 0;
@@ -377,6 +346,10 @@
bool local_fe = false;
ocfs_file_entry *fe = NULL;
int flags = OCFS_BH_COND_CACHED;
+ __u64 alloc_size;
+ int numbhs, i;
+ char *data;
+ struct buffer_head **bhs;
LOG_ENTRY_ARGS ("(FileId = %u, Size = %u.%u)\n", FileId, HI (FileSize),
LO (FileSize));
@@ -397,33 +370,33 @@
goto leave;
}
}
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(fe_bh); /* read */
if (!IS_VALID_FILE_ENTRY (fe)) {
+ OCFS_BH_PUT_DATA(fe_bh);
LOG_ERROR_STATUS (status = -EINVAL);
goto leave;
}
+ alloc_size = fe->alloc_size;
+ OCFS_BH_PUT_DATA(fe_bh);
+ fe = NULL;
if (handle) {
- OCFS_BH_PUT_DATA(fe_bh);
status = ocfs_journal_access(handle, fe_bh,
OCFS_JOURNAL_ACCESS_WRITE);
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
}
- if (FileSize <= fe->alloc_size) {
- fe->file_size = FileSize;
- } else {
+ if (FileSize > alloc_size) {
/* We need to allocate from bitmap */
__u64 numClusterAlloc = 0, BitmapOffset = 0;
status =
ocfs_find_contiguous_space_from_bitmap (osb,
- FileSize - fe->alloc_size,
+ FileSize - alloc_size,
&BitmapOffset,
&numClusterAlloc, true,
NULL);
@@ -438,65 +411,53 @@
actualLength =
(__u64) (numClusterAlloc * osb->vol_layout.cluster_size);
-#ifdef ZERO_METADATA_BLOCKS
- /* zero the entire metadata block! */
- {
- int nbhs, bufsize, j;
- __u64 iosize;
- struct buffer_head **bhs = NULL;
- char *mem;
+ status = ocfs_allocate_extent (osb, NULL, fe_bh, handle,
+ actualDiskOffset, actualLength, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ if (zero) {
+ numbhs = actualLength >> 9;
- iosize = OCFS_ALIGN(actualLength, 512);
- nbhs = (int) (iosize >> 9);
- bufsize = nbhs * sizeof(struct buffer_head *);
- LOG_TRACE_ARGS("about to zero out %d new metadata blocks, newlen=%u.%u\n",
- nbhs, iosize);
-
- bhs = (struct buffer_head **)ocfs_malloc(bufsize);
- if (bhs == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
+ bhs = ocfs_malloc(numbhs*sizeof(struct buffer_head *));
+ if (!bhs) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
goto leave;
}
- memset(bhs, 0, bufsize);
- status = ocfs_read_bhs(osb, actualDiskOffset, iosize, bhs, 0, NULL);
+ memset(bhs, 0, numbhs * sizeof(struct buffer_head *));
+
+ status = ocfs_read_bhs(osb, actualDiskOffset,
+ actualLength, bhs, 0, NULL);
if (status < 0) {
- ocfs_safefree(bhs);
- LOG_ERROR_STATUS (status);
+ ocfs_free(bhs);
+ LOG_ERROR_STATUS(status);
goto leave;
}
- for (j=0; j<nbhs; j++) {
- mem = OCFS_BH_GET_DATA(bhs[j]);
- memset(mem, 0, 512);
- OCFS_BH_PUT_DATA(bhs[j]);
+
+ for(i = 0; i < numbhs; i++) {
+ data = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+ memset(data, 0, 512);
+ OCFS_BH_PUT_DATA(bhs[i]);
}
- LOG_TRACE_STR("writing zeroed blocks now");
-
- status = ocfs_write_bhs(osb, bhs, nbhs, 0, NULL);
- for (j=0; j<nbhs; j++)
- brelse(bhs[j]);
- ocfs_safefree(bhs);
+ status = ocfs_write_bhs(osb, bhs, numbhs, 0, NULL);
+ for(i = 0; i < numbhs; i++)
+ brelse(bhs[i]);
+ ocfs_free(bhs);
if (status < 0) {
- LOG_ERROR_STATUS (status);
+ LOG_ERROR_STATUS(status);
goto leave;
}
}
-#endif
-
- OCFS_BH_PUT_DATA(fe_bh);
- fe = NULL;
-
- status = ocfs_allocate_extent (osb, NULL, fe_bh, handle,
- actualDiskOffset, actualLength, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ } else
+ actualLength = 0;
- fe = (ocfs_file_entry *) OCFS_BH_GET_DATA(fe_bh);
- fe->alloc_size += actualLength;
- fe->file_size = FileSize;
- }
+ fe = (ocfs_file_entry *) OCFS_BH_GET_DATA_WRITE(fe_bh); /* write */
+ fe->alloc_size += actualLength;
+ fe->file_size = FileSize;
if (!bWriteThru) {
DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num;
@@ -508,7 +469,7 @@
if (handle)
status = ocfs_journal_dirty(handle, fe_bh);
else
- status = ocfs_write_bh(osb, fe_bh, flags, NULL);
+ status = ocfs_write_bh(osb, fe_bh, 0, NULL);
if (status < 0)
LOG_ERROR_STATUS (status);
@@ -655,7 +616,7 @@
goto leave;
}
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);;
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
while (extent->type != OCFS_EXTENT_DATA) {
__u64 diskoffset;
@@ -686,7 +647,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
- extent = (ocfs_extent_group *) OCFS_BH_GET_DATA(extent_bh);
+ extent = (ocfs_extent_group *) OCFS_BH_GET_DATA_READ(extent_bh); /* read */
}
searchVbo = newOffset;
@@ -818,7 +779,7 @@
HILO (allocSize), HILO (neededSize));
status = ocfs_extend_system_file (osb,
(OCFS_FILE_VOL_META_DATA + osb->node_num),
- neededSize, NULL, NULL);
+ neededSize, NULL, NULL, false);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
Modified: trunk/src/util.c
===================================================================
--- trunk/src/util.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/util.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -264,24 +264,11 @@
if (oin != NULL)
*oin = NULL;
- if (inode_data_is_oin (inode)) {
- ocfs_inode *f = GET_INODE_OIN(inode);
+ if (oin && inode_data_is_oin (inode))
+ *oin = GET_INODE_OIN(inode);
+ *off = GET_INODE_OFFSET (inode);
- if (f == NULL) {
- LOG_ERROR_STR ("bad inode oin");
- *off = -1;
- return false;
- } else {
- if (oin != NULL)
- *oin = f;
- if (S_ISDIR (inode->i_mode))
- *off = f->dir_disk_off;
- else
- *off = f->file_disk_off;
- }
- } else {
- *off = GET_INODE_OFFSET (inode);
- }
+ LOG_TRACE_ARGS("offset=%u.%u, i_ino=%u\n", HILO((*off)), inode->i_ino);
return (*off != -1);
} /* ocfs_linux_get_inode_offset */
@@ -303,7 +290,7 @@
status = ocfs_find_files_on_disk (osb, parentOff, fileName, &ent_bh, NULL, parent_inode);
if (status >= 0) {
- ent = (ocfs_file_entry *) OCFS_BH_GET_DATA(ent_bh);
+ ent = (ocfs_file_entry *) OCFS_BH_GET_DATA_READ(ent_bh); /* read */
*off = ent->this_sector;
OCFS_BH_PUT_DATA(ent_bh);
} else
@@ -360,3 +347,20 @@
truncate_inode_pages(&inode->i_data, off);
#endif
} /* ocfs_truncate_inode_pages */
+
+void ocfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+{
+// LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
+// uptodate);
+
+ if (!uptodate)
+ LOG_ERROR_STATUS(-EIO);
+
+ mark_buffer_uptodate(bh, uptodate);
+ unlock_buffer(bh);
+ VERBOSE_UNLOCK_BUFFER_STR(bh);
+
+// LOG_EXIT();
+ return;
+}
+
Modified: trunk/src/volcfg.c
===================================================================
--- trunk/src/volcfg.c 2003-12-18 23:28:02 UTC (rev 14)
+++ trunk/src/volcfg.c 2004-01-24 01:22:15 UTC (rev 15)
@@ -61,13 +61,14 @@
/* Obtain the volume for which we need to reiterate the lock */
osb = cfg_task->osb;
- //buffer = cfg_task->buffer;
bh = cfg_task->bh;
length = osb->sect_size;
offset = cfg_task->lock_off;
/* Write the sector back */
- status = ocfs_write_bh(osb, bh, 0, NULL);
+ /* NOTE: another thread owns this bh! */
+ /* we *must* pass OCFS_BH_CONCURRENT_WRITE here */
+ status = ocfs_write_bh(osb, bh, OCFS_BH_CONCURRENT_WRITE, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
/* deliberate no exit jump here */
@@ -150,7 +151,7 @@
/* Check if preferred node num is available */
node_num = OCFS_INVALID_NODE_NUM;
if (pref_node_num >= 0 && pref_node_num < OCFS_MAXIMUM_NODES) {
- p = OCFS_BH_GET_DATA(cfg_bhs[pref_node_num]);
+ p = OCFS_BH_GET_DATA_READ(cfg_bhs[pref_node_num]); /* read */
disk_node = (ocfs_disk_node_config_info *)p;
if (disk_node->node_name[0] == '\0')
node_num = pref_node_num;
@@ -160,7 +161,7 @@
/* if not, find the first available empty slot */
if (node_num == OCFS_INVALID_NODE_NUM) {
for (node_num = 0; node_num < OCFS_MAXIMUM_NODES; node_num++) {
- p = OCFS_BH_GET_DATA(cfg_bhs[node_num]);
+ p = OCFS_BH_GET_DATA_READ(cfg_bhs[node_num]); /* read */
disk_node = (ocfs_disk_node_config_info *) p;
if (disk_node->node_name[0] == '\0')
done = true;
@@ -179,7 +180,7 @@
}
/* Copy the new nodecfg into the memory buffer */
- p = OCFS_BH_GET_DATA(cfg_bhs[node_num]);
+ p = OCFS_BH_GET_DATA_WRITE(cfg_bhs[node_num]); /* write */
memcpy (p, new_disk_node, sect_size);
OCFS_BH_PUT_DATA(cfg_bhs[node_num]);
@@ -197,7 +198,7 @@
goto finally;
}
- finally:
+finally:
for (i = 0; i < OCFS_MAXIMUM_NODES; i++)
if (cfg_bhs[i])
brelse(cfg_bhs[i]);
@@ -236,8 +237,8 @@
goto bail;
}
- hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(node_cfg_bhs[0]);
- hdr_copy = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(node_cfg_bhs[1]);
+ hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_WRITE(node_cfg_bhs[0]); /* write */
+ hdr_copy = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_WRITE(node_cfg_bhs[1]); /* write */
if (op == OCFS_VOLCFG_ADD)
hdr->num_nodes++;
@@ -337,22 +338,15 @@
}
cfg_task->bh = bh;
- //while (1) {
for (i=0; i<50; i++) {
/* Read the volcfg lock sector */
- lock_buffer(bh);
- if (!buffer_dirty(bh)) {
-#ifdef LINUX_2_5
- clear_buffer_uptodate(bh);
-#else
- mark_buffer_uptodate(bh, false);
-#endif
+ status = ocfs_read_bh(osb, lock_off, &bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finito;
}
- unlock_buffer(bh);
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- lock_buf = OCFS_BH_GET_DATA(bh);
+ lock_buf = OCFS_BH_GET_DATA_WRITE(bh); /* write */
bh_locked = true;
@@ -390,19 +384,13 @@
bh_locked = false;
/* Read the volcfg lock sector again... */
- lock_buffer(bh);
- if (!buffer_dirty(bh)) {
-#ifdef LINUX_2_5
- clear_buffer_uptodate(bh);
-#else
- mark_buffer_uptodate(bh, false);
-#endif
+ status = ocfs_read_bh(osb, lock_off, &bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finito;
}
- unlock_buffer(bh);
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- lock_buf = OCFS_BH_GET_DATA(bh);
+ lock_buf = OCFS_BH_GET_DATA_WRITE(bh); /* write */
/* If we tried to acquire and we still own it we take it... */
if ((tried_acq) && (memcmp (lock_buf, cfg_buf, sect_size) == 0)) {
@@ -454,7 +442,7 @@
if (i >= 50)
status = -EFAIL;
- finito:
+finito:
ocfs_release_disk_lock (osb, lock_off);
ocfs_safefree (cfg_task);
@@ -499,18 +487,19 @@
goto finally;
}
- buffer = OCFS_BH_GET_DATA(bh);
+ buffer = OCFS_BH_GET_DATA_WRITE(bh); /* write */
/* reset lock... */
memset (buffer, 0, sect_size);
-#ifdef LINUX_2_5
- set_buffer_uptodate(bh);
-#else
- mark_buffer_uptodate(bh, true);
-#endif
- mark_buffer_dirty(bh);
OCFS_BH_PUT_DATA(bh);
+ /* Release the lock */
+ status = ocfs_write_bh(osb, bh, 0, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
+
/* Cancel the timer so that we don't reiterate the lock anymore */
LOG_TRACE_STR ("Waiting for osb->lock_event");
atomic_set (&osb->lock_stop, 1);
@@ -518,6 +507,11 @@
atomic_set (&osb->lock_event_woken, 0);
del_timer_sync(&osb->lock_timer);
+ buffer = OCFS_BH_GET_DATA_WRITE(bh); /* write */
+ /* reset lock... */
+ memset (buffer, 0, sect_size);
+ OCFS_BH_PUT_DATA(bh);
+
/* Release the lock */
status = ocfs_write_bh(osb, bh, 0, NULL);
if (status < 0) {
@@ -525,7 +519,7 @@
goto finally;
}
- finally:
+finally:
if (bh)
brelse(bh);
LOG_EXIT_STATUS (status);
@@ -574,7 +568,7 @@
goto bail;
}
- bail:
+bail:
ocfs_safefree (buffer);
LOG_EXIT_STATUS (status);
@@ -613,7 +607,7 @@
(*node)->journal_version = disk->journal_version;
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_disknode_to_node */
@@ -646,14 +640,8 @@
goto finally;
}
- buf = OCFS_BH_GET_DATA(bh);
+ buf = OCFS_BH_GET_DATA_WRITE(bh); /* write */
memcpy(buf, disk, osb->sect_size);
-#ifdef LINUX_2_5
- set_buffer_uptodate(bh);
-#else
- mark_buffer_uptodate(bh, true);
-#endif
- mark_buffer_dirty(bh);
OCFS_BH_PUT_DATA(bh);
status = ocfs_write_bh(osb, bh, 0, NULL);
@@ -668,7 +656,7 @@
goto finally;
}
- finally:
+finally:
if (bh)
brelse(bh);
@@ -774,7 +762,7 @@
}
/* 1st block in buffer is the NodeCfgHdr */
- hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA(cfg_bhs[0]);
+ hdr = (ocfs_node_config_hdr *) OCFS_BH_GET_DATA_READ(cfg_bhs[0]); /* read */
if (strncmp (hdr->signature, NODE_CONFIG_HDR_SIGN,
NODE_CONFIG_SIGN_LEN)) {
@@ -805,7 +793,7 @@
int which;
which = i + OCFS_VOLCFG_HDR_SECTORS;
disk = (ocfs_disk_node_config_info *)
- OCFS_BH_GET_DATA(cfg_bhs[which]);
+ OCFS_BH_GET_DATA_READ(cfg_bhs[which]); /* read */
if (disk->node_name[0] == '\0')
goto loop;
@@ -860,7 +848,7 @@
status = -EFAIL;
goto finally;
}
- loop:
+loop:
OCFS_BH_PUT_DATA(cfg_bhs[which]);
continue;
}
@@ -873,7 +861,7 @@
finally:
if (cfg_bhs) {
- if (cfg_bhs[0] && buffer_locked(cfg_bhs[0]))
+ if (cfg_bhs[0])
OCFS_BH_PUT_DATA(cfg_bhs[0]);
for (i = 0; i < numblocks; i++)
if (cfg_bhs[i])
@@ -925,7 +913,7 @@
LOG_TRACE_ARGS ("Node Num: %d\n", osb->node_num);
- bail:
+bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_get_config */
@@ -1009,7 +997,7 @@
goto bail;
}
- bail:
+bail:
ocfs_safefree(buffer);
LOG_EXIT_STATUS (status);
return status;
More information about the Ocfs2-commits
mailing list