[Ocfs2-commits] jlbec commits r1128 - in trunk: . src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Jun 17 17:06:35 CDT 2004
Author: jlbec
Date: 2004-06-17 16:06:33 -0500 (Thu, 17 Jun 2004)
New Revision: 1128
Added:
trunk/src/alloc.h
trunk/src/bitmap.h
trunk/src/buffer_head_io.c
trunk/src/dcache.h
trunk/src/dir.h
trunk/src/dlm.h
trunk/src/extmap.h
trunk/src/file.h
trunk/src/heartbeat.h
trunk/src/inode.h
trunk/src/ioctl.h
trunk/src/journal.h
trunk/src/lockres.h
trunk/src/namei.h
trunk/src/nm.h
trunk/src/ocfs.h
trunk/src/ocfs1_fs_compat.h
trunk/src/ocfs2_fs.h
trunk/src/ocfs_buffer_head.h
trunk/src/ocfs_compat.h
trunk/src/ocfs_journal.h
trunk/src/ocfs_log.h
trunk/src/proc.h
trunk/src/super.h
trunk/src/symlink.h
trunk/src/sysfile.h
trunk/src/util.h
trunk/src/ver.h
trunk/src/volcfg.h
trunk/src/vote.h
Removed:
trunk/src/inc/
trunk/src/io.c
Modified:
trunk/Config.make.in
trunk/TODO
trunk/configure.in
trunk/on_disk_changes
trunk/src/Makefile
trunk/src/alloc.c
trunk/src/bitmap.c
trunk/src/dcache.c
trunk/src/dir.c
trunk/src/dlm.c
trunk/src/extmap.c
trunk/src/file.c
trunk/src/heartbeat.c
trunk/src/inode.c
trunk/src/ioctl.c
trunk/src/journal.c
trunk/src/lockres.c
trunk/src/namei.c
trunk/src/nm.c
trunk/src/proc.c
trunk/src/super.c
trunk/src/symlink.c
trunk/src/sysfile.c
trunk/src/util.c
trunk/src/ver.c
trunk/src/volcfg.c
trunk/src/vote.c
Log:
o Land format-changes branch to trunk.
- Complete reorganization of the disk format.
- See http://oss.oracle.com/projects/ocfs2/documentation/ocfs2-whats-new.txt for details.
Modified: trunk/Config.make.in
===================================================================
--- trunk/Config.make.in 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/Config.make.in 2004-06-17 21:06:33 UTC (rev 1128)
@@ -60,7 +60,6 @@
OCFS_LARGEIO = @OCFS_LARGEIO@
OCFS_AIO = @OCFS_AIO@
OCFS_MEMDEBUG = @OCFS_MEMDEBUG@
-OCFS_DBG_LOCKRES = @OCFS_DBG_LOCKRES@
OCFS_TRACE = @OCFS_TRACE@
OCFS_PROCESSOR = @OCFS_PROCESSOR@
Modified: trunk/TODO
===================================================================
--- trunk/TODO 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/TODO 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,5 +1,6 @@
* figure out what to do about old style cdsl. Either we error when we see it
(in the module), or we provide an fsck to find and fix them.
+ - Probably should be handle by the conversion.
* fsck must be able to replay the journal
@@ -13,13 +14,6 @@
* Make bitmap free functions do their job without relocking the bitmaps for
each record.
-* get rid of osb->curr_trans_id as it was never used (always zero)
-
-* get rid of all the:
- changeSeqNum = osb->curr_trans_id;
- DISK_LOCK_SEQNUM(fe) = changeSeqNum;
- code as it is equally useless.
-
* make slabs for: ocfs_journal_handle, and ocfs_journal_copyout and maybe
ocfs_journal_lock
@@ -36,6 +30,8 @@
* Local alloc structure needs to be turned into a file entry so creating an
inode for it is trivial. This way we can use the inodes locking primitives
instead of special casing stuff and using osb->local_alloc_sem.
+ - It's an inode now, but we need to finish making it use inode
+ locking primatives.
* We need to be able to delete stuff from the inode alloc system file(s).
Modified: trunk/configure.in
===================================================================
--- trunk/configure.in 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/configure.in 2004-06-17 21:06:33 UTC (rev 1128)
@@ -60,6 +60,9 @@
i386|i486|i586|i686|i786|k6|k7)
OCFS_PROCESSOR="i686"
;;
+ s390x)
+ OCFS_PROCESSOR="s390x"
+ ;;
esac
AC_SUBST(OCFS_PROCESSOR)
@@ -107,15 +110,6 @@
AC_SUBST(OCFS_MEMDEBUG)
AC_MSG_RESULT($enable_memdebug)
-AC_MSG_CHECKING(for lockres debugging)
-AC_ARG_ENABLE(lockres-debug, [ --enable-lockres-debug=[yes/no] Turn on lock resource debugging [default=no]],,enable_lockres_debug=no)
-OCFS_DBG_LOCKRES=
-if test "x$enable_lockres_debug" = "xyes"; then
- OCFS_DBG_LOCKRES=yes
-fi
-AC_SUBST(OCFS_DBG_LOCKRES)
-AC_MSG_RESULT($enable_lockres_debug)
-
AC_MSG_CHECKING(for tracing)
AC_ARG_ENABLE(trace, [ --enable-trace=[yes/no] Turn on tracing [default=yes]],,enable_trace=yes)
OCFS_TRACE=
Modified: trunk/on_disk_changes
===================================================================
--- trunk/on_disk_changes 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/on_disk_changes 2004-06-17 21:06:33 UTC (rev 1128)
@@ -10,3 +10,12 @@
Dir index became a used/unused bytemap for file entries in a dir node
File entry for bitmap (potentially)
added fe_off field to publish/vote stuff (net and disk)
+
+All fe fields renamed and moved around. There is *no* commonality with
+ocfsv1.
+
+extent_group is now extent_header. There is now no longer a difference
+between EXTENT_DATA and EXTENT_HEADER. All extent headers have
+EXTENT_HEADER_SIGNATURE. h_tree_depth of -1 means that the extent_recs
+point to actual data. Note that the v1->v2 converter must set
+h_tree_depth to -1 on those blocks, as v1 did not.
Modified: trunk/src/Makefile
===================================================================
--- trunk/src/Makefile 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/Makefile 2004-06-17 21:06:33 UTC (rev 1128)
@@ -33,10 +33,6 @@
GLOBAL_DEFINES += -DOCFS_LINUX_MEM_DEBUG -DDEBUG_SLAB_ALLOCS
endif
-ifdef OCFS_DBG_LOCKRES
-GLOBAL_DEFINES += -DOCFS_DBG_LOCKRES
-endif
-
ifdef OCFS_AIO
GLOBAL_DEFINES += -DAIO_ENABLED
endif
@@ -99,7 +95,6 @@
endif
ifeq ($(OCFS_PROCESSOR),ppc64)
- GLOBAL_DEFINES += -D__LP64__
MACH_CFLAGS += -m64 -fsigned-char -fno-builtin -msoft-float -mminimal-toc
LDADD += -m elf64ppc
endif
@@ -114,40 +109,67 @@
ifeq ($(OCFS_PROCESSOR),i586)
GLOBAL_DEFINES += -D__ILP32__
endif
+ifeq ($(OCFS_PROCESSOR),s390x)
+ GLOBAL_DEFINES += -D__LP64__
+endif
DEFINES += $(BASE_DEFINES) $(GLOBAL_DEFINES)
-CFILES = \
- alloc.c \
- bitmap.c \
- dcache.c \
- dir.c \
- dlm.c \
- extmap.c \
- file.c \
- heartbeat.c \
- inode.c \
- io.c \
- ioctl.c \
- journal.c \
- lockres.c \
- namei.c \
- nm.c \
- proc.c \
- super.c \
- symlink.c \
- sysfile.c \
- util.c \
- ver.c \
- volcfg.c \
+CFILES = \
+ alloc.c \
+ bitmap.c \
+ buffer_head_io.c \
+ dcache.c \
+ dir.c \
+ dlm.c \
+ extmap.c \
+ file.c \
+ heartbeat.c \
+ inode.c \
+ ioctl.c \
+ journal.c \
+ lockres.c \
+ namei.c \
+ nm.c \
+ proc.c \
+ super.c \
+ symlink.c \
+ sysfile.c \
+ util.c \
+ ver.c \
+ volcfg.c \
vote.c
-HFILES = \
- inc/ocfs.h \
- inc/ocfs_log.h \
- inc/ocfs_journal.h \
- inc/io.h \
- inc/proto.h
+HFILES = \
+ ocfs2_fs.h \
+ ocfs1_fs_compat.h \
+ ocfs.h \
+ ocfs_log.h \
+ ocfs_compat.h \
+ ocfs_journal.h \
+ ocfs_buffer_head.h \
+ alloc.h \
+ bitmap.h \
+ dcache.h \
+ dir.h \
+ dlm.h \
+ extmap.h \
+ file.h \
+ heartbeat.h \
+ inode.h \
+ ioctl.h \
+ journal.h \
+ lockres.h \
+ namei.h \
+ nm.h \
+ proc.h \
+ super.h \
+ symlink.h \
+ sysfile.h \
+ util.h \
+ ver.h \
+ volcfg.h \
+ vote.h
OBJS = $(subst .c,.o,$(CFILES))
@@ -162,11 +184,14 @@
ifndef KERNEL_26
# Building for a 2.4.x kernel
+# Make dependancies work
+$(OBJS): $(HFILES)
+
ifneq ($(OCFS_PROCESSOR),x86_64)
WARNINGS += -Wmissing-prototypes -Wmissing-declarations
endif
-INCLUDES = -Iinc -I$(KERNELINC) -I$(GCCINC)
+INCLUDES = -I. -I$(KERNELINC) -I$(GCCINC)
CFLAGS = $(OPTS) $(MACH_CFLAGS) -pipe -nostdinc -fno-strict-aliasing \
-fno-common -fomit-frame-pointer $(MODVERSIONS) $(WARNINGS)
@@ -194,12 +219,7 @@
clean-ocfs:
rm -f *.o *.p *.s
-DIST_RULES = dist-subdircreate
-dist-subdircreate:
- $(TOPDIR)/mkinstalldirs $(DIST_DIR)/inc
-
-
else # ifndef KERNEL_26
# The 2.6.x kernel makefile
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/alloc.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* alloc.c
*
* Extent allocs and frees
@@ -24,50 +26,71 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
-#ifdef __KERNEL__
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#else
-#include <debugocfs.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "bitmap.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "inode.h"
+#include "util.h"
+#include "sysfile.h"
+#include "file.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
#ifndef USERSPACE_TOOL
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_ALLOC
-static int ocfs_kill_this_tree(ocfs_super *osb, struct buffer_head *extent_grp_bh,
- ocfs_journal_handle *handle, struct inode *inode);
-static int ocfs_allocate_new_data_node (ocfs_super * osb,
- ocfs_file_entry * FileEntry,
- __u64 actualDiskOffset, __u64 actualLength,
- struct buffer_head * extent_header_bh,
- __u64 * NewExtentOffset,
- ocfs_journal_handle *handle, struct inode *inode);
+static int ocfs_kill_this_tree(ocfs_super *osb,
+ struct buffer_head *extent_grp_bh,
+ ocfs_journal_handle *handle,
+ struct inode *inode);
+static int ocfs_allocate_new_data_node(ocfs_super *osb,
+ ocfs2_dinode *fe,
+ u32 new_clusters,
+ struct buffer_head *eb_bh,
+ u64 *new_eb_blkno,
+ ocfs_journal_handle *handle,
+ struct inode *inode);
-static int ocfs_grow_extent_tree (ocfs_super * osb, struct buffer_head *fe_bh,
- ocfs_journal_handle *handle, __u64 disk_off,
- __u64 length, struct inode *inode);
+static int ocfs_grow_extent_tree(ocfs_super *osb,
+ struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ u64 blkno,
+ u32 new_clusters, struct inode *inode);
-static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr,
- __u8 *freeExtent,
+static int _squish_extent_entries(ocfs_super *osb,
+ ocfs2_extent_rec *extarr,
+ __u16 *freeExtent,
ocfs_journal_handle *handle,
- __u64 FileSize, int flag,
+ u32 num_clusters, int flag,
struct inode *inode);
-static int ocfs_fix_extent_group(ocfs_super *osb, struct buffer_head *group_bh, struct inode *inode);
+static int ocfs_fix_extent_block(ocfs_super *osb,
+ struct buffer_head *eb_bh,
+ struct inode *inode);
static int ocfs_split_this_tree(ocfs_super * osb,
- struct buffer_head *extent_grp_bh,
+ struct buffer_head *eb_bh,
ocfs_journal_handle *handle,
- ocfs_file_entry *fe,
+ ocfs2_dinode *fe,
struct inode *inode);
-static int ocfs_update_last_ext_ptr(ocfs_super *osb, ocfs_file_entry *fe, struct inode *inode);
+static int ocfs_update_last_eb_blk(ocfs_super *osb, ocfs2_dinode *fe,
+ struct inode *inode);
static int ocfs_free_vol_block (ocfs_super * osb, ocfs_journal_handle *handle,
ocfs_free_rec * FreeLog, __u32 NodeNum,
@@ -86,13 +109,27 @@
ocfs_bitmap_free_head **f,
struct buffer_head *local_alloc_bh,
int in_recovery);
-static __u32 ocfs_alloc_count_bits(ocfs_local_alloc *alloc);
-static void ocfs_clear_local_alloc(ocfs_local_alloc *alloc);
-static int ocfs_find_space_from_local(ocfs_super *osb, __u32 bitswanted,
+static __u32 ocfs_alloc_count_bits(ocfs2_dinode *alloc);
+static void ocfs_clear_local_alloc(ocfs2_dinode *alloc);
+static int ocfs_find_space_from_local(ocfs_super *osb, __u32 bitswanted,
__u64 * bitoff, __u64 * bitcount,
ocfs_journal_handle *handle);
-static int ocfs_local_find_clear_bits(ocfs_super *osb, ocfs_local_alloc *alloc, __u32 numbits);
+static int ocfs_local_find_clear_bits(ocfs_super *osb,
+ ocfs2_dinode *alloc,
+ __u32 numbits);
+static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
+ u64 blkno);
+static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
+ u64 blkno)
+{
+ u64 start_blkno = ext->e_blkno;
+
+ start_blkno += ocfs_clusters_to_blocks(inode->i_sb,
+ ext->e_clusters);
+ return (start_blkno == blkno);
+}
+
/* this is a good candidate for an inline function. */
int ocfs_process_bitmap_free_head(ocfs_super *osb, ocfs_bitmap_free_head *f)
{
@@ -191,7 +228,6 @@
struct inode **extnode_inode = NULL;
struct inode *vol_inode = NULL;
__u32 tmp_indx;
- __u64 lock_id;
struct buffer_head *globalbh = NULL;
ocfs_journal_handle *handle = NULL;
int credits = 33; /* one for each potential sysfile fe. This
@@ -210,12 +246,12 @@
} while (0)
ALLOC_BLOCK(free_ext_node,
- OCFS_MAXIMUM_NODES * sizeof (ocfs_free_rec *), status);
+ osb->max_nodes * sizeof (ocfs_free_rec *), status);
ALLOC_BLOCK(extnode_inode,
- OCFS_MAXIMUM_NODES * sizeof (struct inode *), status);
+ osb->max_nodes * sizeof (struct inode *), status);
/* init */
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+ for (i = 0; i < osb->max_nodes; i++) {
free_ext_node[i] = NULL;
extnode_inode[i] = NULL;
}
@@ -251,8 +287,8 @@
}
tmp_log = free_vol_bits;
- credits += 1 + free_log->update[i].length /
- OCFS_BITS_IN_CHUNK;
+ credits += ocfs_blocks_for_bits(osb->sb,
+ free_log->update[i].length);
break;
default:
@@ -289,11 +325,10 @@
/* Get all the locks we need. do global bitmap last to
* preserve lock ordering with extend/create */
- lock_id = (OCFS_FILE_FILE_ALLOC_BITMAP * osb->sect_size) +
- osb->vol_layout.root_int_off;
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) {
+ for (i = 0; i < osb->max_nodes; i++) {
if (free_ext_node[i] != NULL) {
- extnode_inode[i] = ocfs_iget(osb, lock_id);
+ extnode_inode[i] =
+ ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, i);
if (!extnode_inode[i]) {
status = -EINVAL;
LOG_ERROR_STATUS (status);
@@ -318,7 +353,7 @@
}
if (free_vol_bits != NULL) {
- vol_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
+ vol_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
if (!vol_inode) {
status = -EINVAL;
LOG_ERROR_STATUS (status);
@@ -326,9 +361,9 @@
}
down_write(&OCFS_I(vol_inode)->ip_io_sem);
- status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
- FLAG_FILE_CREATE,
- &globalbh, vol_inode);
+ status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+ FLAG_FILE_CREATE,
+ &globalbh, vol_inode);
if (status < 0) {
up_write(&OCFS_I(vol_inode)->ip_io_sem);
iput(vol_inode);
@@ -343,20 +378,21 @@
/* free vol block */
if (free_vol_bits != NULL)
- ocfs_free_vol_block (osb, handle, free_vol_bits, -1,
- DISK_ALLOC_VOLUME);
+ ocfs_free_vol_block(osb, handle, free_vol_bits, -1,
+ DISK_ALLOC_VOLUME);
/* We can potentiallly loose some allocation for dirNodes or extent */
/* nodes but they should not be much... */
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+ for (i = 0; i < osb->max_nodes; i++) {
if (free_ext_node[i] != NULL)
- ocfs_free_vol_block (osb, handle, free_ext_node[i], i,
- DISK_ALLOC_EXTENT_NODE);
+ ocfs_free_vol_block(osb, handle,
+ free_ext_node[i], i,
+ DISK_ALLOC_EXTENT_NODE);
}
/* release all locks */
if (free_vol_bits) {
- ocfs_file_entry *bm_lock;
+ ocfs2_dinode *bm_lock;
status = ocfs_journal_access(handle, globalbh,
OCFS_JOURNAL_ACCESS_WRITE);
@@ -366,7 +402,8 @@
}
bm_lock = OCFS_BH_GET_DATA_WRITE(globalbh);
- bm_lock->u.bitinfo.used_bits = ocfs_count_bits(&osb->cluster_bitmap);
+ bm_lock->id1.bitmap1.i_used =
+ ocfs_count_bits(osb->sb, &osb->cluster_bitmap);
OCFS_BH_PUT_DATA(globalbh);
status = ocfs_journal_dirty(handle, globalbh);
@@ -385,18 +422,16 @@
ocfs_abort_trans(handle);
if (free_vol_bits) {
- status = ocfs_release_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK,
- FLAG_FILE_CREATE, globalbh,
- vol_inode);
+ status = ocfs_release_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+ FLAG_FILE_CREATE, globalbh,
+ vol_inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
}
- lock_id = (OCFS_FILE_FILE_ALLOC_BITMAP * osb->sect_size) +
- osb->vol_layout.root_int_off;
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) {
+ for (i = 0; i < osb->max_nodes; i++) {
if (free_ext_node[i] != NULL) {
status = ocfs_release_lock (osb,
OCFS_DLM_EXCLUSIVE_LOCK,
@@ -413,11 +448,11 @@
finally:
if (extnode_inode) {
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+ for (i = 0; i < osb->max_nodes; i++) {
if (extnode_inode[i]) {
up_write(&OCFS_I(extnode_inode[i])->ip_io_sem);
iput(extnode_inode[i]);
- }
+ }
}
kfree(extnode_inode);
}
@@ -431,7 +466,7 @@
brelse(globalbh);
if (free_ext_node) {
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+ for (i = 0; i < osb->max_nodes; i++) {
if (free_ext_node[i])
kfree(free_ext_node[i]);
}
@@ -458,10 +493,10 @@
bitmap = &osb->cluster_bitmap;
- bitmapblocks = (OCFS_ALIGN(bitmap->validbits, OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+ bitmapblocks = ocfs_blocks_for_bits(osb->sb, bitmap->validbits);
- status = ocfs_read_bhs(osb, osb->vol_layout.bitmap_off,
- bitmapblocks * osb->sect_size,
+ status = ocfs_read_bhs(osb, osb->bitmap_blkno << osb->sb->s_blocksize_bits,
+ bitmapblocks << osb->sb->s_blocksize_bits,
bitmap->chunk, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -469,7 +504,9 @@
}
for (i = 0; i < freelog->num_updates; i++)
- ocfs_clear_bits(handle, bitmap, freelog->update[i].file_off, freelog->update[i].length);
+ ocfs_clear_bits(osb->sb, handle, bitmap,
+ freelog->update[i].file_off,
+ freelog->update[i].length);
status = 0;
bail:
@@ -481,19 +518,22 @@
* ocfs_free_vol_block()
*
*/
-static int ocfs_free_vol_block (ocfs_super * osb, ocfs_journal_handle *handle, ocfs_free_rec * FreeLog, __u32 NodeNum, __u32 Type)
+static int ocfs_free_vol_block(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs_free_rec *FreeLog, __u32 NodeNum,
+ __u32 Type)
{
int status = 0;
__u64 fileSize = 0;
- __u64 offset = 0;
__u64 allocSize = 0;
__u32 foundBit = -1;
__u32 blockSize = 0, blockSizeBits = 0;
- __u32 fileId = 0;
+ int file_type;
__u32 bitmapblocks = 0;
ocfs_alloc_bm AllocBitmap;
ocfs_alloc_bm *tmpbitmap = NULL;
__u32 i;
+ struct inode *inode = NULL;
LOG_ENTRY ();
@@ -506,20 +546,15 @@
switch (Type) {
case DISK_ALLOC_EXTENT_NODE:
- fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
- blockSize = (__u32) osb->vol_layout.file_node_size;
- blockSizeBits = osb->file_alloc_bits;
-
- if (!IS_VALID_NODE_NUM (NodeNum)) {
- LOG_ERROR_STATUS(status = -EINVAL);
- goto leave;
- }
+ file_type = EXTENT_ALLOC_BITMAP_SYSTEM_INODE;
+ blockSize = osb->sb->s_blocksize;
+ blockSizeBits = osb->sb->s_blocksize_bits;
break;
case DISK_ALLOC_INODE:
- fileId = OCFS_INODE_BITMAP + NodeNum;
- blockSize = osb->inode_size;
- blockSizeBits = osb->inode_alloc_bits;
+ file_type = INODE_ALLOC_BITMAP_SYSTEM_INODE;
+ blockSize = osb->sb->s_blocksize;
+ blockSizeBits = osb->sb->s_blocksize_bits;
break;
case DISK_ALLOC_VOLUME:
@@ -536,21 +571,23 @@
goto leave;
}
- /* Read in the bitmap file for the dir alloc and look
- for the required space, if found */
- status = ocfs_get_system_file_size (osb, fileId, &fileSize, &allocSize);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
+ inode = ocfs_get_system_file_inode(osb, file_type, NodeNum);
+ if (!inode) {
+ LOG_ERROR_STATUS (status = -EINVAL);
goto leave;
}
+ fileSize = inode->i_size;
+ allocSize = OCFS_I(inode)->alloc_size;
- ocfs_initialize_bitmap(&AllocBitmap, fileSize * 8, allocSize * 8);
+ ocfs_initialize_bitmap(osb->sb, &AllocBitmap, fileSize * 8,
+ allocSize * 8);
tmpbitmap = &AllocBitmap;
- bitmapblocks = (OCFS_ALIGN(tmpbitmap->validbits, OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+ bitmapblocks = ocfs_blocks_for_bits(osb->sb,
+ tmpbitmap->validbits);
- status = ocfs_read_system_file(osb, fileId, AllocBitmap.chunk,
- bitmapblocks * osb->sect_size,
- offset);
+ status = ocfs_read_system_file(osb, file_type, NodeNum,
+ AllocBitmap.chunk,
+ bitmapblocks << osb->sb->s_blocksize_bits);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -563,14 +600,15 @@
}
foundBit = (__u32) (FreeLog->update[i].file_off >> blockSizeBits);
- ocfs_clear_bits(handle, tmpbitmap, foundBit,
+ ocfs_clear_bits(osb->sb, handle, tmpbitmap, foundBit,
(__u32) FreeLog->update[i].length);
}
leave:
if (tmpbitmap)
ocfs_uninitialize_bitmap(tmpbitmap);
-
+ if (inode)
+ iput(inode);
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_free_vol_block */
@@ -579,32 +617,35 @@
/* ocfs_allocate_new_data_node()
*
*/
-static int ocfs_allocate_new_data_node (ocfs_super * osb,
- ocfs_file_entry * FileEntry,
- __u64 actualDiskOffset, __u64 actualLength,
- struct buffer_head * extent_header_bh,
- __u64 * NewExtentOffset, ocfs_journal_handle *handle, struct inode *inode)
+static int ocfs_allocate_new_data_node(ocfs_super *osb,
+ ocfs2_dinode *fe,
+ u32 new_clusters,
+ struct buffer_head *eb_bh,
+ u64 *new_eb_blkno,
+ ocfs_journal_handle *handle,
+ struct inode *inode)
{
int status = 0;
__u8 *buff = NULL;
__u32 k, i;
__u32 depth;
- __u32 allocSize;
- __u64 upHeaderPtr;
+ int allocSize;
+ u64 parent_blk;
__u64 physicalOffset;
+ u64 phys_blkno;
__u64 fileOffset = 0;
- __u64 numSectorsAlloc = 0;
- __u64 lastExtPointer;
- ocfs_extent_group* extent_header = NULL;
- struct buffer_head **header_bhs = NULL;
+ int new_blocks = 0;
+ ocfs2_extent_block *eb = NULL;
+ ocfs2_extent_list *el1, *el2 = NULL;
+ struct buffer_head **eb_bhs = NULL;
struct buffer_head *bh = NULL;
int bh_locked = 0;
int size;
LOG_ENTRY ();
- if (extent_header_bh) {
- status = ocfs_journal_access(handle, extent_header_bh,
+ if (eb_bh) {
+ status = ocfs_journal_access(handle, eb_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -612,169 +653,171 @@
goto finally;
}
- extent_header = OCFS_BH_GET_DATA_WRITE(extent_header_bh);
+ eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
+ el1 = &eb->h_list;
bh_locked = 1;
}
- if (extent_header != NULL) {
- depth = extent_header->granularity;
- upHeaderPtr = extent_header->this_ext;
- } else {
- depth = FileEntry->granularity;
- upHeaderPtr = FileEntry->this_sector;
- }
+ else
+ el1 = &fe->id2.i_list;
- numSectorsAlloc = NUM_SECTORS_IN_LEAF_NODE + depth;
- allocSize = (numSectorsAlloc * osb->sect_size);
+ depth = el1->l_tree_depth;
+ if (eb != NULL)
+ parent_blk = eb->h_blkno;
+ else
+ parent_blk = fe->i_blkno;
+
+ new_blocks = depth + 1;
+ allocSize = new_blocks << osb->sb->s_blocksize_bits;
+
/* allocate contiguous blocks on disk */
- status = ocfs_alloc_node_block (osb, allocSize, &physicalOffset,
- &fileOffset, osb->node_num,
- DISK_ALLOC_EXTENT_NODE, handle);
+ status = ocfs_alloc_node_block(osb, allocSize, &physicalOffset,
+ &fileOffset, osb->node_num,
+ DISK_ALLOC_EXTENT_NODE, handle);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
+ phys_blkno = physicalOffset >> osb->sb->s_blocksize_bits;
- size = sizeof(struct buffer_head *) * numSectorsAlloc;
- header_bhs = ocfs_malloc(size);
- if (header_bhs == NULL) {
+ size = sizeof(struct buffer_head *) * new_blocks;
+ eb_bhs = kmalloc(size, GFP_KERNEL);
+ if (eb_bhs == NULL) {
status = -ENOMEM;
LOG_ERROR_STATUS(status);
goto finally;
}
- memset(header_bhs, 0, size);
+ memset(eb_bhs, 0, size);
- status = ocfs_read_bhs(osb, physicalOffset, numSectorsAlloc * osb->sect_size, header_bhs, OCFS_BH_CACHED, inode);
+ status = ocfs_read_bhs(osb, physicalOffset,
+ (u64)new_blocks << osb->sb->s_blocksize_bits,
+ eb_bhs, OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
/* zero them all out */
- for(i = 0; i < numSectorsAlloc; i++) {
- status = ocfs_journal_access(handle, header_bhs[i],
+ for(i = 0; i < new_blocks; i++) {
+ status = ocfs_journal_access(handle, eb_bhs[i],
OCFS_JOURNAL_ACCESS_CREATE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
- buff = OCFS_BH_GET_DATA_WRITE(header_bhs[i]);
- memset(buff, 0, osb->sect_size);
- set_buffer_uptodate(header_bhs[i]);
- OCFS_BH_PUT_DATA(header_bhs[i]);
+ buff = OCFS_BH_GET_DATA_WRITE(eb_bhs[i]);
+ memset(buff, 0, osb->sb->s_blocksize);
+ set_buffer_uptodate(eb_bhs[i]);
+ OCFS_BH_PUT_DATA(eb_bhs[i]);
}
- if (extent_header != NULL) {
- k = extent_header->next_free_ext;
- extent_header->extents[k].file_off = FileEntry->alloc_size;
- extent_header->extents[k].num_bytes = actualLength;
- extent_header->extents[k].disk_off = physicalOffset;
- extent_header->next_free_ext++;
- } else {
- k = FileEntry->next_free_ext;
- FileEntry->extents[k].file_off = FileEntry->alloc_size;
- FileEntry->extents[k].num_bytes = actualLength;
- FileEntry->extents[k].disk_off = physicalOffset;
- FileEntry->next_free_ext++;
- }
+ k = el1->l_next_free_rec;
+ el1->l_recs[k].e_cpos = fe->i_clusters;
+ el1->l_recs[k].e_clusters = new_clusters;
+ el1->l_recs[k].e_blkno = phys_blkno;
+ el1->l_next_free_rec++;
-
- lastExtPointer = FileEntry->last_ext_ptr;
-
/* Fill in all the headers and the leaf */
for (i = 0; i <= depth; i++) {
- ocfs_extent_group *ext;
- ext = OCFS_BH_GET_DATA_WRITE(header_bhs[i]);
+ ocfs2_extent_block *eb;
- ext->last_ext_ptr = lastExtPointer;
- ext->up_hdr_node_ptr = upHeaderPtr;
- ext->next_free_ext = 1;
- ext->alloc_file_off = fileOffset + (osb->sect_size * i);
- ext->alloc_node = osb->node_num;
- ext->extents[0].file_off = FileEntry->alloc_size;
- ext->extents[0].num_bytes = actualLength;
- ext->this_ext = physicalOffset + (osb->sect_size * i);
+ eb = OCFS_BH_GET_DATA_WRITE(eb_bhs[i]);
+ eb->h_parent_blk = parent_blk;
+ eb->h_suballoc_blkno =
+ (fileOffset >> osb->sb->s_blocksize_bits) + i;
+ eb->h_suballoc_node = osb->node_num;
+ eb->h_blkno =
+ (physicalOffset >> osb->sb->s_blocksize_bits) + i;
+ strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+
+ el2 = &eb->h_list;
+ el2->l_count = ocfs2_extent_recs_per_eb(osb->sb);
+ el2->l_next_free_rec = 1;
+ el2->l_recs[0].e_cpos = fe->i_clusters;
+ el2->l_recs[0].e_clusters = new_clusters;
+ el2->l_tree_depth = (depth - 1 - i);
+
if (i != depth) {
/* fill in each header */
- ext->type = OCFS_EXTENT_HEADER;
- ext->extents[0].disk_off = physicalOffset + (osb->sect_size * (i + 1));
- ext->granularity = (depth - 1 - i);
- strcpy (ext->signature, OCFS_EXTENT_HEADER_SIGNATURE);
+ el2->l_recs[0].e_blkno = phys_blkno + (i + 1);
} else {
/* fill in the leaf */
- ext->type = OCFS_EXTENT_DATA;
- ext->extents[0].disk_off = actualDiskOffset;
- ext->curr_sect = 1;
- ext->max_sects = NUM_SECTORS_IN_LEAF_NODE;
- strcpy (ext->signature, OCFS_EXTENT_DATA_SIGNATURE);
- (*NewExtentOffset) = FileEntry->last_ext_ptr = ext->this_ext;
+ el2->l_recs[0].e_blkno = phys_blkno;
+ *new_eb_blkno = fe->i_last_eb_blk =
+ eb->h_blkno;
}
- upHeaderPtr = lastExtPointer = ext->this_ext;
- OCFS_BH_PUT_DATA(header_bhs[i]);
+ parent_blk = eb->h_blkno;
+ OCFS_BH_PUT_DATA(eb_bhs[i]);
}
- for(i = 0; i < numSectorsAlloc; i++) {
- status = ocfs_journal_dirty(handle, header_bhs[i]);
+ for(i = 0; i < new_blocks; i++) {
+ status = ocfs_journal_dirty(handle, eb_bhs[i]);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
}
- if (extent_header != NULL) {
+ if (eb != NULL) {
/* both needed below in for loop */
- __u64 up_hdr_node_ptr = extent_header->up_hdr_node_ptr;
- int granularity = extent_header->granularity;
+ u64 tmp_blk = eb->h_parent_blk;
+ int tree_depth = el1->l_tree_depth;
- OCFS_BH_PUT_DATA(extent_header_bh);
+ OCFS_BH_PUT_DATA(eb_bh);
bh_locked = 0;
- extent_header = NULL;
+ eb = NULL;
- status = ocfs_journal_dirty(handle, extent_header_bh);
+ el1 = &fe->id2.i_list;
+
+ status = ocfs_journal_dirty(handle, eb_bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
/* gotta fix up his parent extents now. We totally
- * reuse the extent_header variable now as it's no
- * longer needed for it's original purpose. */
- for (i = granularity + 1; i < FileEntry->granularity; i++) {
+ * reuse the eb and el2 variables now as they're no
+ * longer needed for their original purpose. */
+ for (i = tree_depth + 1; i < el1->l_tree_depth; i++) {
bh = NULL;
- status = ocfs_read_bh (osb, up_hdr_node_ptr, &bh, OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ tmp_blk << osb->sb->s_blocksize_bits,
+ &bh, OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
status = ocfs_journal_access(handle, bh,
- OCFS_JOURNAL_ACCESS_WRITE);
+ OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
- extent_header = OCFS_BH_GET_DATA_WRITE(bh);
- if (!IS_VALID_EXTENT_HEADER(extent_header)) {
+ eb = OCFS_BH_GET_DATA_WRITE(bh);
+ if (!IS_VALID_EXTENT_BLOCK(eb)) {
OCFS_BH_PUT_DATA(bh);
brelse(bh);
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
+ el2 = &eb->h_list;
- if (extent_header->next_free_ext == 0) {
+ if (el2->l_next_free_rec == 0) {
OCFS_BH_PUT_DATA(bh);
brelse(bh);
LOG_ERROR_STATUS (status = -EFAIL);
goto finally;
}
- k = extent_header->next_free_ext - 1;
- extent_header->extents[k].num_bytes += actualLength;
+ k = el2->l_next_free_rec - 1;
+ el2->l_recs[k].e_clusters += new_clusters;
- up_hdr_node_ptr = extent_header->up_hdr_node_ptr;
+ tmp_blk = eb->h_parent_blk;
OCFS_BH_PUT_DATA(bh);
status = ocfs_journal_dirty(handle, bh);
@@ -786,17 +829,17 @@
brelse(bh);
}
- k = FileEntry->next_free_ext - 1;
- FileEntry->extents[k].num_bytes += actualLength;
+ k = el1->l_next_free_rec - 1;
+ el1->l_recs[k].e_clusters += new_clusters;
}
finally:
if (bh_locked)
- OCFS_BH_PUT_DATA(extent_header_bh);
- if (header_bhs) {
- for (i = 0; i < numSectorsAlloc; i++)
- if (header_bhs[i])
- brelse(header_bhs[i]);
- ocfs_free(header_bhs);
+ OCFS_BH_PUT_DATA(eb_bh);
+ if (eb_bhs) {
+ for (i = 0; i < new_blocks; i++)
+ if (eb_bhs[i])
+ brelse(eb_bhs[i]);
+ kfree(eb_bhs);
}
LOG_EXIT_STATUS (status);
return status;
@@ -805,41 +848,46 @@
/* ocfs_grow_extent_tree()
*
*/
-static int ocfs_grow_extent_tree (ocfs_super * osb, struct buffer_head *fe_bh, ocfs_journal_handle *handle, __u64 disk_off, __u64 length, struct inode *inode)
+static int ocfs_grow_extent_tree(ocfs_super *osb,
+ struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ u64 blkno, u32 new_clusters,
+ struct inode *inode)
{
int status = 0;
__s32 k, i;
- ocfs_extent_group *OcfsExtent = NULL;
- ocfs_extent_group *ExtentHeader = NULL;
- ocfs_extent_group *ext = NULL;
+ ocfs2_extent_block *eb1 = NULL;
+ ocfs2_extent_block *eb2 = NULL;
+ ocfs2_extent_list *ebl, *fel;
__u64 physicalOffset;
__u64 fileOffset = 0;
- __u64 upHeaderPtr, lastExtentPtr;
- __u32 AllocSize;
- __u64 new_up_hdr_ptr = 0;
+ u64 phys_blkno, parent_blk, last_eb_blkno;
+ u64 new_parent_blk = 0;
struct buffer_head **bhs = NULL;
int numbhs = 0;
void *buf;
- ocfs_file_entry * fe = NULL, *real_fe = NULL;
+ ocfs2_dinode *fe = NULL, *real_fe = NULL;
- LOG_ENTRY_ARGS("(0x%p, 0x%p, %llu, %llu\n", osb, fe, disk_off, length);
+ LOG_ENTRY_ARGS("(0x%p, 0x%p, %llu, %u\n", osb, fe, blkno,
+ new_clusters);
/* too complicated to deal with both reads and writes to the structure */
/* just save off a copy and replace the fe_bh with the new data at the end */
- fe = ocfs_allocate_file_entry();
+#warning markflar, fix the fe copy
+ fe = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
if (fe == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto finally;
}
real_fe = OCFS_BH_GET_DATA_READ(fe_bh);
- memcpy(fe, real_fe, osb->sect_size);
+ memcpy(fe, real_fe, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(fe_bh);
real_fe = NULL;
- AllocSize = ((fe->granularity + 2) * osb->sect_size);
+ fel = &fe->id2.i_list;
+ numbhs = fel->l_tree_depth + 2;
- numbhs = fe->granularity + 2;
- bhs = ocfs_malloc(numbhs * sizeof(*bhs));
+ bhs = kmalloc(numbhs * sizeof(*bhs), GFP_KERNEL);
if (bhs == NULL) {
status = -ENOMEM;
LOG_ERROR_STATUS(status = -ENOMEM);
@@ -849,129 +897,137 @@
/* Allocate the space from the Extent file. This function should */
/* return contigous disk blocks requested. */
- status = ocfs_alloc_node_block (osb, AllocSize, &physicalOffset,
- &fileOffset, osb->node_num,
- DISK_ALLOC_EXTENT_NODE, handle);
+ status = ocfs_alloc_node_block(osb,
+ numbhs << osb->sb->s_blocksize_bits,
+ &physicalOffset, &fileOffset,
+ osb->node_num,
+ DISK_ALLOC_EXTENT_NODE, handle);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
+ phys_blkno = physicalOffset >> osb->sb->s_blocksize_bits;
for (i = 0; i < numbhs; i++) {
- bhs[i] = getblk(OCFS_GET_BLOCKDEV(osb->sb),
- (physicalOffset + i * osb->sect_size) >>
- osb->sect_size_bits,
- osb->sb->s_blocksize);
+ bhs[i] = sb_getblk(osb->sb, phys_blkno + i);
if (bhs[i] == NULL) {
status = -EIO;
LOG_ERROR_STATUS(status);
goto finally;
}
buf = OCFS_BH_GET_DATA_WRITE(bhs[i]);
- memset(buf, 0, osb->sect_size);
+ memset(buf, 0, osb->sb->s_blocksize);
set_buffer_uptodate(bhs[i]);
OCFS_BH_PUT_DATA(bhs[i]);
}
- if (physicalOffset == 0) {
+ if (phys_blkno == 0) {
LOG_ERROR_STATUS(status = -ENOMEM);
goto finally;
}
- OcfsExtent = OCFS_BH_GET_DATA_WRITE(bhs[0]);
+ eb1 = OCFS_BH_GET_DATA_WRITE(bhs[0]);
/* Copy the File Entry information in to the newly allocated sector */
- for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) {
- OcfsExtent->extents[k].file_off = fe->extents[k].file_off;
- OcfsExtent->extents[k].num_bytes = fe->extents[k].num_bytes;
- OcfsExtent->extents[k].disk_off = fe->extents[k].disk_off;
+ ebl = &eb1->h_list;
+ for (k = 0; k < fel->l_count; k++) {
+ ebl->l_recs[k].e_cpos = fel->l_recs[k].e_cpos;
+ ebl->l_recs[k].e_clusters = fel->l_recs[k].e_clusters;
+ ebl->l_recs[k].e_blkno = fel->l_recs[k].e_blkno;
}
- OcfsExtent->last_ext_ptr = fe->last_ext_ptr;
- lastExtentPtr = fe->last_ext_ptr;
- OcfsExtent->this_ext = new_up_hdr_ptr = physicalOffset;
- OcfsExtent->alloc_file_off = fileOffset;
- OcfsExtent->alloc_node = osb->node_num;
- OcfsExtent->next_data_ext = 0;
- fe->local_ext = 0;
- fe->granularity++;
+ last_eb_blkno = fe->i_last_eb_blk;
+ eb1->h_blkno =
+ physicalOffset >> osb->sb->s_blocksize_bits;
+ new_parent_blk = eb1->h_blkno;
+ eb1->h_suballoc_blkno =
+ fileOffset >> osb->sb->s_blocksize_bits;
+ eb1->h_suballoc_node = osb->node_num;
+ eb1->h_next_leaf_blk = 0;
+ fel->l_tree_depth++;
- LOG_TRACE_ARGS ("Granularity is: %d\n", fe->granularity);
+ LOG_TRACE_ARGS ("Tree depth is: %d\n", fel->l_tree_depth);
OCFS_BH_PUT_DATA(bhs[0]);
- /* If granularity is zero now, the for loop will not execute. */
- /* First time a file is created ,granularity = -1 and local_ext flag */
- /* is set to true */
+ /* If tree_depth is zero now, the for loop will not execute. */
+ /* First time a file is created, tree_depth = -1 */
- upHeaderPtr = fe->this_sector;
+ parent_blk = fe->i_blkno;
- for (i = 0; i < fe->granularity; i++) {
- ExtentHeader = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+ for (i = 0; i < fel->l_tree_depth; i++) {
+ eb2 = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+ ebl = &eb2->h_list;
- ExtentHeader->type = OCFS_EXTENT_HEADER;
- ExtentHeader->granularity = (fe->granularity - 1) - i;
+ ebl->l_tree_depth = (fel->l_tree_depth - 1) - i;
+ ebl->l_count = ocfs2_extent_recs_per_eb(osb->sb);
- strcpy (ExtentHeader->signature, OCFS_EXTENT_HEADER_SIGNATURE);
+ strcpy(eb2->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
if (i == 0) {
- ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS].disk_off = physicalOffset + osb->sect_size;
- ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS].file_off = fe->alloc_size;
- ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS].num_bytes = length;
+ ebl->l_recs[fel->l_count].e_blkno =
+ phys_blkno + 1;
+ ebl->l_recs[fel->l_count].e_cpos =
+ fe->i_clusters;
+ ebl->l_recs[fel->l_count].e_clusters =
+ new_clusters;
+ ebl->l_next_free_rec = fel->l_count + 1;
- ExtentHeader->next_free_ext = OCFS_MAX_FILE_ENTRY_EXTENTS + 1;
- ExtentHeader->this_ext = physicalOffset;
- ExtentHeader->last_ext_ptr = lastExtentPtr;
- ExtentHeader->up_hdr_node_ptr = upHeaderPtr;
+ eb2->h_blkno =
+ physicalOffset >> osb->sb->s_blocksize_bits;
+ eb2->h_parent_blk = parent_blk;
- upHeaderPtr = ExtentHeader->this_ext;
- lastExtentPtr = ExtentHeader->this_ext;
+ parent_blk = last_eb_blkno = eb2->h_blkno;
} else {
- ExtentHeader->extents[0].disk_off = physicalOffset + (osb->sect_size * (i + 1));
- ExtentHeader->extents[0].file_off = fe->alloc_size;
- ExtentHeader->extents[0].num_bytes = length;
- ExtentHeader->next_free_ext = 1;
- ExtentHeader->alloc_file_off = fileOffset + (osb->sect_size * i);
- ExtentHeader->alloc_node = osb->node_num;
- ExtentHeader->this_ext = physicalOffset + (osb->sect_size * i);
- ExtentHeader->up_hdr_node_ptr = upHeaderPtr;
- ExtentHeader->last_ext_ptr = lastExtentPtr;
+ ebl->l_recs[0].e_blkno = phys_blkno + (i + 1);
+ ebl->l_recs[0].e_cpos = fe->i_clusters;
+ ebl->l_recs[0].e_clusters = new_clusters;
+ ebl->l_next_free_rec = 1;
- upHeaderPtr = ExtentHeader->this_ext;
- lastExtentPtr = ExtentHeader->this_ext;
+ eb2->h_suballoc_blkno =
+ (fileOffset >> osb->sb->s_blocksize_bits) + i;
+ eb2->h_suballoc_node = osb->node_num;
+ eb2->h_blkno =
+ (physicalOffset >> osb->sb->s_blocksize_bits) + i;
+ eb2->h_parent_blk = parent_blk;
+
+ parent_blk = last_eb_blkno = eb2->h_blkno;
}
OCFS_BH_PUT_DATA(bhs[i]);
}
/* Update the Data Segment, which is the last one in our array */
- OcfsExtent = OCFS_BH_GET_DATA_WRITE(bhs[fe->granularity]);
+ eb1 = OCFS_BH_GET_DATA_WRITE(bhs[fel->l_tree_depth]);
+ ebl = &eb1->h_list;
- i = (fe->granularity) ? 0 : OCFS_MAX_FILE_ENTRY_EXTENTS;
+ i = (fel->l_tree_depth) ? 0 : fel->l_count;
- LOG_TRACE_ARGS ("EntryAvailable is: %d\n", OcfsExtent->next_free_ext);
+ LOG_TRACE_ARGS ("EntryAvailable is: %d\n", ebl->l_next_free_rec);
/* For the time being we are assuming that the newly allocated Extent */
/* will have one more entry to accomodate the latest allocation */
- strcpy (OcfsExtent->signature, OCFS_EXTENT_DATA_SIGNATURE);
+ strcpy(eb1->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+ ebl->l_tree_depth = -1;
+ ebl->l_count = ocfs2_extent_recs_per_eb(osb->sb);
- OcfsExtent->extents[i].file_off = fe->alloc_size;
- OcfsExtent->extents[i].num_bytes = length;
- OcfsExtent->extents[i].disk_off = disk_off;
- OcfsExtent->curr_sect = 1;
- OcfsExtent->max_sects = NUM_SECTORS_IN_LEAF_NODE;
- OcfsExtent->type = OCFS_EXTENT_DATA;
- OcfsExtent->next_free_ext = i + 1;
- OcfsExtent->alloc_file_off = fileOffset + (fe->granularity * osb->sect_size);
- OcfsExtent->alloc_node = osb->node_num;
- OcfsExtent->this_ext = physicalOffset + (fe->granularity * osb->sect_size);
- OcfsExtent->up_hdr_node_ptr = upHeaderPtr;
- OcfsExtent->last_ext_ptr = lastExtentPtr;
- OcfsExtent->next_data_ext = 0;
+ ebl->l_recs[i].e_cpos = fe->i_clusters;
+ ebl->l_recs[i].e_clusters = new_clusters;
+ ebl->l_recs[i].e_blkno = blkno;
+ ebl->l_next_free_rec = i + 1;
- upHeaderPtr = OcfsExtent->this_ext;
- lastExtentPtr = OcfsExtent->this_ext;
+ eb1->h_suballoc_blkno =
+ (fileOffset >> osb->sb->s_blocksize_bits) +
+ fel->l_tree_depth;
+ eb1->h_suballoc_node = osb->node_num;
+ eb1->h_blkno =
+ (physicalOffset >> osb->sb->s_blocksize_bits) +
+ fel->l_tree_depth;
+ eb1->h_parent_blk = parent_blk;
+ eb1->h_next_leaf_blk = 0;
+
+ parent_blk = last_eb_blkno = eb1->h_blkno;
- OCFS_BH_PUT_DATA(bhs[fe->granularity]);
+ OCFS_BH_PUT_DATA(bhs[fel->l_tree_depth]);
/* This needs to be a sync write OR journalled to be safe. */
status = ocfs_write_bhs(osb, bhs, numbhs, 0, inode);
@@ -982,10 +1038,12 @@
/* Update the Previous Last Data Extent with this new Data
* Extent Pointer */
- if (fe->last_ext_ptr != 0) {
+ if (fe->i_last_eb_blk != 0) {
struct buffer_head *bh = NULL;
- status = ocfs_read_bh(osb, fe->last_ext_ptr, &bh, OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ fe->i_last_eb_blk << osb->sb->s_blocksize_bits,
+ &bh, OCFS_BH_COND_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -998,15 +1056,16 @@
goto finally;
}
- ext = OCFS_BH_GET_DATA_WRITE(bh);
- if (!IS_VALID_EXTENT_DATA(ext)) {
+ eb1 = OCFS_BH_GET_DATA_WRITE(bh);
+ if (!IS_VALID_EXTENT_BLOCK(eb1) ||
+ (eb1->h_list.l_tree_depth != -1)) {
OCFS_BH_PUT_DATA(bh);
brelse(bh);
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
- ext->next_data_ext = lastExtentPtr;
+ eb1->h_next_leaf_blk = last_eb_blkno;
OCFS_BH_PUT_DATA(bh);
status = ocfs_journal_dirty(handle, bh);
@@ -1017,16 +1076,17 @@
}
}
- /* Update the uphdrptr of the three extents pointed to by fe */
- if (fe->granularity > 0) {
+ /* Update the uphdrptr of the extents pointed to by fe */
+ if (fel->l_tree_depth > 0) {
int i;
- __u64 offset;
struct buffer_head *bh = NULL;
status = 0;
- for (i = 0; i < OCFS_MAX_FILE_ENTRY_EXTENTS; ++i) {
- offset = fe->extents[i].disk_off;
- status = ocfs_read_bh(osb, offset, &bh, OCFS_BH_COND_CACHED, inode);
+ for (i = 0; i < fel->l_count; ++i) {
+ status = ocfs_read_bh(osb,
+ fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits,
+ &bh, OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
brelse(bh);
@@ -1040,8 +1100,8 @@
goto finally;
}
- ext = OCFS_BH_GET_DATA_WRITE(bh);
- ext->up_hdr_node_ptr = new_up_hdr_ptr;
+ eb1 = OCFS_BH_GET_DATA_WRITE(bh);
+ eb1->h_parent_blk = new_parent_blk;
OCFS_BH_PUT_DATA(bh);
status = ocfs_journal_dirty(handle, bh);
@@ -1053,31 +1113,31 @@
}
/* Clear all the extent information from File Entry */
- for (i = 0; i < OCFS_MAX_FILE_ENTRY_EXTENTS; i++) {
- fe->extents[i].file_off = 0;
- fe->extents[i].num_bytes = 0;
- fe->extents[i].disk_off = 0;
+ for (i = 0; i < fel->l_count; i++) {
+ fel->l_recs[i].e_cpos = 0;
+ fel->l_recs[i].e_clusters = 0;
+ fel->l_recs[i].e_blkno = 0;
}
/* Update the File Entry Extent */
- fe->local_ext = 0;
- LOG_TRACE_ARGS("fe->alloc_size = %llu\n", fe->alloc_size);
- fe->extents[0].file_off = 0;
- fe->extents[0].num_bytes = fe->alloc_size + length;
- fe->extents[0].disk_off = physicalOffset;
- fe->last_ext_ptr = lastExtentPtr;
- fe->next_free_ext = 1;
+ LOG_TRACE_ARGS("fe->i_clusters = %u\n", fe->i_clusters);
+ fel->l_recs[0].e_cpos = 0; /* FIXME: not needed */
+ fel->l_recs[0].e_clusters = fe->i_clusters + new_clusters;
+ fel->l_recs[0].e_blkno = phys_blkno;
+ fel->l_next_free_rec = 1;
+ fe->i_last_eb_blk = last_eb_blkno;
finally:
if (fe) {
real_fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- memcpy(real_fe, fe, osb->sect_size);
+ memcpy(real_fe, fe, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(fe_bh);
real_fe = NULL;
- ocfs_release_file_entry(fe);
+ kfree(fe);
}
+#warning Leaking bhs here
LOG_EXIT_STATUS (status);
return (status);
} /* ocfs_grow_extent_tree */
@@ -1087,279 +1147,310 @@
*
* You need to be holding node_alloc_sem!
*/
-int ocfs_allocate_extent (ocfs_super * osb, struct buffer_head *fe_bh, ocfs_journal_handle *handle, __u64 actualDiskOffset, __u64 actualLength, struct inode *inode)
+int ocfs_allocate_extent(ocfs_super *osb, struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ u64 blkno, u32 new_clusters,
+ struct inode *inode)
{
int status = 0;
int IncreaseTreeDepth = 0;
int k = 0, i;
- ocfs_extent_group *extent = NULL, *extent_header = NULL;
- struct buffer_head *extent_bh = NULL, *extent_header_bh = NULL;
+ ocfs2_extent_block *eb1 = NULL, *eb2 = NULL;
+ ocfs2_extent_list *fel, *el1 = NULL, *el2 = NULL;
+ struct buffer_head *eb1_bh = NULL, *eb2_bh = NULL;
int UpdateParent = 0;
- __u64 newExtentOff, up_ptr;
- ocfs_file_entry * FileEntry = NULL;
+ u64 parent_blk, new_eb_blkno;
+ ocfs2_dinode *fe = NULL;
- LOG_ENTRY_ARGS("(actualDiskOffset=%llu, actualLength=%llu)\n", actualDiskOffset, actualLength);
+ LOG_ENTRY_ARGS("(blkno=%llu, new_clusters=%u, inode=%llu)\n", blkno, new_clusters,
+ GET_INODE_FEOFF(inode));
- FileEntry = OCFS_BH_GET_DATA_WRITE(fe_bh);
- OCFS_ASSERT (FileEntry);
+ fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
+ OCFS_ASSERT(fe);
- if (!IS_VALID_FILE_ENTRY (FileEntry)) {
+ if (!IS_VALID_FILE_ENTRY (fe)) {
LOG_ERROR_STATUS(status = -EINVAL);
goto finally;
}
+ fel = &fe->id2.i_list;
- if (FileEntry->local_ext) {
- LOG_TRACE_STR("Using local extents");
+ if (fel->l_tree_depth < 0) {
+ LOG_TRACE_ARGS("Using local extents: depth=%d, next_free=%u, l_count=%u\n",
+ fel->l_tree_depth, fel->l_next_free_rec, fel->l_count);
/* We are still using the local extents of File Entry */
- if (FileEntry->next_free_ext > OCFS_MAX_FILE_ENTRY_EXTENTS) {
+ if (fel->l_next_free_rec > fel->l_count) {
LOG_ERROR_STATUS(status = -EINVAL);
goto finally;
}
- k = FileEntry->next_free_ext - 1;
- if (k >= 0 && OCFS_EXTENT_MERGEABLE (&FileEntry->extents[k], actualDiskOffset)) {
+ k = fel->l_next_free_rec - 1;
+ if (k >= 0 &&
+ ocfs_extent_contig(inode, &fel->l_recs[k], blkno)) {
/* See if we can merge the extents and just increase the length */
- LOG_TRACE_ARGS ("Using local_ext for extent Entry = %u\n", k);
- FileEntry->extents[k].num_bytes += actualLength;
+ LOG_TRACE_ARGS ("Using local extent for extent Entry = %u\n", k);
+ fel->l_recs[k].e_clusters += new_clusters;
goto finally;
}
/* We cannot merge try to give him the next extent */
- k = FileEntry->next_free_ext;
- if (k != OCFS_MAX_FILE_ENTRY_EXTENTS) {
+ k = fel->l_next_free_rec;
+ if (k != fel->l_count) {
/* file_off for the new extent will be equal
* to the previous allocation size of file */
- FileEntry->extents[k].file_off = FileEntry->alloc_size;
- FileEntry->extents[k].num_bytes = actualLength;
- FileEntry->extents[k].disk_off = actualDiskOffset;
- FileEntry->next_free_ext++;
+ fel->l_recs[k].e_cpos = fe->i_clusters;
+ fel->l_recs[k].e_clusters = new_clusters;
+ fel->l_recs[k].e_blkno = blkno;
+ fel->l_next_free_rec++;
goto finally;
}
/* We have no more room in the fe, must increase
- * granularity */
+ * tree_depth */
IncreaseTreeDepth = 1;
goto increase_depth;
}
LOG_TRACE_STR("Using NON-local extents");
/*** Nonlocal Extents ***/
- if (FileEntry->granularity > 3)
- LOG_ERROR_ARGS ("granularity=%d", FileEntry->granularity);
+ /* This is now less likely with OCFSv2 extent lists */
+ if (fel->l_tree_depth > 3)
+ LOG_ERROR_ARGS ("tree_depth=%d", fel->l_tree_depth);
/* This File is no longer using Local Extents */
IncreaseTreeDepth = 0;
- status = ocfs_read_bh(osb, FileEntry->last_ext_ptr, &extent_bh, OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ fe->i_last_eb_blk << osb->sb->s_blocksize_bits,
+ &eb1_bh, OCFS_BH_COND_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
- extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
- if (!IS_VALID_EXTENT_DATA(extent)) {
+ eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
+ el1 = &eb1->h_list;
+ if (!IS_VALID_EXTENT_BLOCK(eb1) ||
+ (el1->l_tree_depth != -1)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
- k = extent->next_free_ext - 1;
- LOG_TRACE_ARGS ("Using local_ext for extent Entry = %u\n", k);
- if (extent->next_free_ext < 1)
- LOG_ERROR_ARGS ("next_free_ext=%d", extent->next_free_ext);
+ k = el1->l_next_free_rec - 1;
+ LOG_TRACE_ARGS ("Using local extent for extent Entry = %u\n", k);
+ if (el1->l_next_free_rec < 1)
+ LOG_ERROR_ARGS ("l_next_free_rec=%d",
+ el1->l_next_free_rec);
/* See if we can merge the extents and just increase
* the length */
- if (k >= 0 && OCFS_EXTENT_MERGEABLE (&(extent->extents[k]), actualDiskOffset)) {
- OCFS_BH_PUT_DATA(extent_bh);
+ if (k >= 0 &&
+ ocfs_extent_contig(inode, &(el1->l_recs[k]), blkno)) {
+ OCFS_BH_PUT_DATA(eb1_bh);
- status = ocfs_journal_access(handle, extent_bh,
+ status = ocfs_journal_access(handle, eb1_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
+ eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
+ el1 = &eb1->h_list;
- extent->extents[k].num_bytes += actualLength;
+ el1->l_recs[k].e_clusters += new_clusters;
status = 0;
UpdateParent = 1;
goto do_update_parent;
}
/* We cannot merge, give him the next extent */
- k = extent->next_free_ext;
+ k = el1->l_next_free_rec;
- if (k != OCFS_MAX_DATA_EXTENTS) {
+ if (k != el1->l_count) {
/* we can just add next extent */
- OCFS_BH_PUT_DATA(extent_bh);
+ OCFS_BH_PUT_DATA(eb1_bh);
- status = ocfs_journal_access(handle, extent_bh,
+ status = ocfs_journal_access(handle, eb1_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
+ eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
+ el1 = &eb1->h_list;
- extent->extents[k].file_off = FileEntry->alloc_size;
- extent->extents[k].num_bytes = actualLength;
- extent->extents[k].disk_off = actualDiskOffset;
- extent->next_free_ext++;
+ el1->l_recs[k].e_cpos = fe->i_clusters;
+ el1->l_recs[k].e_clusters = new_clusters;
+ el1->l_recs[k].e_blkno = blkno;
+ el1->l_next_free_rec++;
UpdateParent = 1;
} else {
- __u64 up_hdr_node_ptr = 0;
-
/* Read the last extent and keep traversing
* upward till we find a free extent or we are
* at the top and need to create another
* level. */
- if (FileEntry->granularity > 0)
- up_hdr_node_ptr = extent->up_hdr_node_ptr;
+ if (fel->l_tree_depth > 0)
+ parent_blk = eb1->h_parent_blk;
+ else
+ parent_blk = 0;
- for (i = 0; i < FileEntry->granularity; i++) {
+ for (i = 0; i < fel->l_tree_depth; i++) {
/* if we loop back around */
- if (extent_header) {
- OCFS_BH_PUT_DATA(extent_header_bh);
- brelse(extent_header_bh);
- extent_header = NULL;
- extent_header_bh =NULL;
+ if (eb2) {
+ OCFS_BH_PUT_DATA(eb2_bh);
+ brelse(eb2_bh);
+ eb2 = NULL;
+ el2 = NULL;
+ eb2_bh =NULL;
}
- status = ocfs_read_bh (osb, up_hdr_node_ptr, &extent_header_bh, OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ parent_blk << osb->sb->s_blocksize_bits,
+ &eb2_bh,
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- extent_header = OCFS_BH_GET_DATA_READ(extent_header_bh);
- if (!IS_VALID_EXTENT_HEADER(extent_header)) {
+ eb2 = OCFS_BH_GET_DATA_READ(eb2_bh);
+ el2 = &eb2->h_list;
+ if (!IS_VALID_EXTENT_BLOCK(eb2) ||
+ (el2->l_tree_depth < 0)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
- if (extent_header->granularity != i ||
- extent_header->next_free_ext > OCFS_MAX_DATA_EXTENTS) {
+ if ((el2->l_tree_depth != i) ||
+ (el2->l_next_free_rec > el2->l_count)) {
LOG_ERROR_STATUS(status = -EINVAL);
goto finally;
}
- if (extent_header->next_free_ext != OCFS_MAX_DATA_EXTENTS)
+ if (el2->l_next_free_rec != el2->l_count)
break;
- up_hdr_node_ptr = extent_header->up_hdr_node_ptr;
- } /* for (i = 0; i < FileEntry->granularity; i++) */
+ parent_blk = eb2->h_parent_blk;
+ } /* for (i = 0; i < fe->i_tree_depth; i++) */
- if (extent_header) {
- OCFS_BH_PUT_DATA(extent_header_bh);
- extent_header = NULL;
+ if (eb2) {
+ OCFS_BH_PUT_DATA(eb2_bh);
+ eb2 = NULL;
+ el2 = NULL;
/* we may still need the bh so don't brelse */
}
/* if we got to the top, then we're at the FE. Check
* if the FE is full -- if so, then we need to
- * increase the granularity. */
- if (i == FileEntry->granularity &&
- FileEntry->next_free_ext == OCFS_MAX_FILE_ENTRY_EXTENTS) {
+ * increase the tree_depth. */
+ if ((i == fel->l_tree_depth) &&
+ (fel->l_next_free_rec == fel->l_count)) {
IncreaseTreeDepth = 1;
goto increase_depth;
}
/* ok, we need to add a branch. pass in NULL
* if we need a whole branch, otherwise the
* extent which needs the new leaf */
- status = ocfs_allocate_new_data_node (osb, FileEntry,
- actualDiskOffset,
- actualLength,
- extent_header_bh,
- &newExtentOff, handle, inode);
+ status = ocfs_allocate_new_data_node(osb, fe,
+ new_clusters,
+ eb2_bh,
+ &new_eb_blkno,
+ handle, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
- OCFS_BH_PUT_DATA(extent_bh);
+ OCFS_BH_PUT_DATA(eb1_bh);
- status = ocfs_journal_access(handle, extent_bh,
+ status = ocfs_journal_access(handle, eb1_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- extent = OCFS_BH_GET_DATA_WRITE(extent_bh);
+ eb1 = OCFS_BH_GET_DATA_WRITE(eb1_bh);
- extent->next_data_ext = newExtentOff;
- FileEntry->last_ext_ptr = newExtentOff;
+ eb1->h_next_leaf_blk = fe->i_last_eb_blk = new_eb_blkno;
}
do_update_parent:
- /* before we put the variable away, save off up_ptr as
+ /* before we put the variable away, save off parent_blk as
* we may need it if we update parent */
- up_ptr = extent->up_hdr_node_ptr;
+ parent_blk = eb1->h_parent_blk;
/* gotta put it away to write it ;) */
- OCFS_BH_PUT_DATA(extent_bh);
- extent = NULL;
- status = ocfs_journal_dirty(handle, extent_bh);
+ OCFS_BH_PUT_DATA(eb1_bh);
+ eb1 = NULL;
+ status = ocfs_journal_dirty(handle, eb1_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
if (!IncreaseTreeDepth && UpdateParent) {
- for (i = 0; i < FileEntry->granularity; i++) {
+ for (i = 0; i < fel->l_tree_depth; i++) {
/* next two if's are for loop around */
- if (extent_header_bh) {
- if (extent_header) {
- OCFS_BH_PUT_DATA(extent_header_bh);
- extent_header = NULL;
+ if (eb2_bh) {
+ if (eb2) {
+ OCFS_BH_PUT_DATA(eb2_bh);
+ eb2 = NULL;
+ el2 = NULL;
}
- brelse(extent_header_bh);
- extent_header_bh = NULL;
+ brelse(eb2_bh);
+ eb2_bh = NULL;
}
/* TODO: Can we do a cached read here? */
- status = ocfs_read_bh(osb, up_ptr, &extent_header_bh,
- OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ parent_blk << osb->sb->s_blocksize_bits,
+ &eb2_bh,
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- status = ocfs_journal_access(handle, extent_header_bh,
- OCFS_JOURNAL_ACCESS_WRITE);
+ status = ocfs_journal_access(handle, eb2_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
- extent_header = OCFS_BH_GET_DATA_WRITE(extent_header_bh);
- if (!IS_VALID_EXTENT_HEADER(extent_header)) {
+ eb2 = OCFS_BH_GET_DATA_WRITE(eb2_bh);
+ el2 = &eb2->h_list;
+ if (!IS_VALID_EXTENT_BLOCK(eb2) ||
+ (el2->l_tree_depth < 0)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
- if (extent_header->next_free_ext == 0) {
+ if (el2->l_next_free_rec == 0) {
LOG_ERROR_STATUS (status = -EFAIL);
goto finally;
}
- k = extent_header->next_free_ext - 1;
+ k = el2->l_next_free_rec - 1;
- extent_header->extents[k].num_bytes += actualLength;
+ el2->l_recs[k].e_clusters += new_clusters;
/* gonna need it if we loop around */
- up_ptr = extent_header->up_hdr_node_ptr;
+ parent_blk = eb2->h_parent_blk;
- OCFS_BH_PUT_DATA(extent_header_bh);
- extent_header = NULL;
+ OCFS_BH_PUT_DATA(eb2_bh);
+ eb2 = NULL;
+ el2 = NULL;
- status = ocfs_journal_dirty(handle, extent_header_bh);
+ status = ocfs_journal_dirty(handle, eb2_bh);
if (status < 0) {
goto finally;
}
}
- k = FileEntry->next_free_ext - 1;
+ k = fel->l_next_free_rec - 1;
- FileEntry->extents[k].num_bytes += actualLength;
+ fel->l_recs[k].e_clusters += new_clusters;
}
if (status < 0) {
@@ -1370,17 +1461,21 @@
increase_depth:
if (IncreaseTreeDepth) {
OCFS_BH_PUT_DATA(fe_bh);
- FileEntry = NULL;
- if (extent_bh && extent) {
- OCFS_BH_PUT_DATA(extent_bh);
- extent = NULL;
+ fe = NULL;
+ if (eb1_bh && eb1) {
+ OCFS_BH_PUT_DATA(eb1_bh);
+ eb1 = NULL;
+ el1 = NULL;
}
- if (extent_header_bh && extent_header) {
- OCFS_BH_PUT_DATA(extent_header_bh);
- extent_header = NULL;
+ if (eb2_bh && eb2) {
+ OCFS_BH_PUT_DATA(eb2_bh);
+ eb2 = NULL;
+ el2 = NULL;
}
- status = ocfs_grow_extent_tree(osb, fe_bh, handle, actualDiskOffset, actualLength, inode);
+ status = ocfs_grow_extent_tree(osb, fe_bh, handle,
+ blkno, new_clusters,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -1397,33 +1492,34 @@
* ocfs_add_extent_map_entry merges them into a single
* mapping run.So just adding this entry will be
* fine. */
- if (FileEntry == NULL)
- FileEntry = OCFS_BH_GET_DATA_READ(fe_bh);
+ if (fe == NULL)
+ fe = OCFS_BH_GET_DATA_READ(fe_bh);
- Vbo = FileEntry->alloc_size;
- Lbo = actualDiskOffset;
+ Vbo = (u64)fe->i_clusters << osb->s_clustersize_bits;
+ Lbo = blkno << osb->sb->s_blocksize_bits;
/* Add the Entry to the extent map list */
- if (!ocfs_add_extent_map_entry (osb, &OCFS_I(inode)->map, Vbo,
- Lbo, actualLength))
+ if (!ocfs_add_extent_map_entry(osb, &OCFS_I(inode)->map,
+ Vbo, Lbo,
+ (u64)new_clusters << osb->s_clustersize_bits))
LOG_ERROR_STATUS (status = -EFAIL);
}
/* Buffers are always null if they haven't been mapped and
* non-null if they have.
* Buffer heads are non-NULL if they need to be brelsed */
- if (extent_bh) {
- if (extent)
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
+ if (eb1_bh) {
+ if (eb1)
+ OCFS_BH_PUT_DATA(eb1_bh);
+ brelse(eb1_bh);
}
- if (extent_header_bh) {
- if (extent_header)
- OCFS_BH_PUT_DATA(extent_header_bh);
- brelse(extent_header_bh);
+ if (eb2_bh) {
+ if (eb2)
+ OCFS_BH_PUT_DATA(eb2_bh);
+ brelse(eb2_bh);
}
- if (FileEntry) {
+ if (fe) {
OCFS_BH_PUT_DATA(fe_bh);
}
@@ -1442,16 +1538,21 @@
* 'flag' seems to be an indicator that (if true) tells us that we already know
* we're gonna have to clear out all of extarr.
*/
-static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, __u8 *freeExtent, ocfs_journal_handle *handle, __u64 FileSize, int flag, struct inode *inode)
+static int _squish_extent_entries(ocfs_super *osb,
+ ocfs2_extent_rec *extarr,
+ __u16 *freeExtent,
+ ocfs_journal_handle *handle,
+ u32 num_clusters, int flag,
+ struct inode *inode)
{
int status = 0;
int FirstTime = 1;
- ocfs_alloc_ext *ext;
- __u32 i, csize = osb->cluster_size_bits,
+ __u64 FileSize = (u64)num_clusters << osb->s_clustersize_bits; /* FIXME get rid of this and use num_clusters!! */
+ ocfs2_extent_rec *ext;
+ __u32 i, csize = osb->s_clustersize_bits,
numBitsAllocated = 0, bitmapOffset = 0,
firstfree = *freeExtent;
__u64 bytes, foff, doff,
- dstart = osb->vol_layout.data_start_off,
diskOffsetTobeFreed, lengthTobeFreed = 0,
actualSize = 0, origLength = 0;
@@ -1460,12 +1561,12 @@
firstfree = *freeExtent;
- /* loop through the used alloc_extents */
+ /* loop through the used ocfs2_extent_recs */
for (i = 0; i < firstfree; i++) {
ext = &(extarr[i]);
- bytes = ext->num_bytes;
- foff = ext->file_off;
- doff = ext->disk_off;
+ bytes = (u64)ext->e_clusters << osb->s_clustersize_bits;
+ foff = (u64)ext->e_cpos << osb->s_clustersize_bits;
+ doff = ext->e_blkno << osb->sb->s_blocksize_bits;
actualSize = (bytes + foff);
if (flag || actualSize > FileSize) {
if (flag || foff >= FileSize) {
@@ -1473,23 +1574,26 @@
*freeExtent = i;
FirstTime = 0;
}
- numBitsAllocated = (__u32) (bytes>>csize);
- bitmapOffset = (__u32) ((doff - dstart) >> csize);
- ext->num_bytes = ext->disk_off = ext->file_off = 0;
+ numBitsAllocated = ext->e_clusters;
+ bitmapOffset = (__u32) (doff >> csize);
+ ext->e_clusters = ext->e_blkno =
+ ext->e_cpos = 0;
} else {
if (FirstTime) {
*freeExtent = i + 1;
FirstTime = 0;
}
origLength = bytes;
- ext->num_bytes = bytes = FileSize - foff;
+ bytes = FileSize - foff;
+ ext->e_clusters =
+ (u32)(bytes >> osb->s_clustersize_bits);
lengthTobeFreed = origLength - bytes;
if (lengthTobeFreed == 0) {
continue;
}
numBitsAllocated = (__u32) (lengthTobeFreed >> csize);
diskOffsetTobeFreed = doff + bytes;
- bitmapOffset = (__u32) ((diskOffsetTobeFreed - dstart) >> csize);
+ bitmapOffset = (__u32) (diskOffsetTobeFreed >> csize);
}
status = ocfs_handle_add_commit_bits(handle,
numBitsAllocated,
@@ -1510,7 +1614,7 @@
/* used by ocfs_kill_this_tree and ocfs_split_this_tree */
/* This value needs to be removed in a future version and set to
- * granularity + 1, dynamically */
+ * tree_depth + 1, dynamically */
#define OCFS_TREE_STACK_SIZE 8
/*
@@ -1518,27 +1622,32 @@
*
* Given an extent_group (can be a DAT or header), delete everything,
* including itself, it's children, and any data blocks they point to.
- * Works fine with any granularity (up to 4, in which case we'd need
+ * Works fine with any tree_depth (up to 4, in which case we'd need
* more stack space)
*
* extent_grp_bh will be unchanged, though it will be marked for
* deletion in free_head.
*/
-/* We can't recurse, so we keep a simple stack of ocfs_extent_groups. */
-static int ocfs_kill_this_tree(ocfs_super *osb, struct buffer_head *extent_grp_bh, ocfs_journal_handle *handle, struct inode *inode)
+/*
+ * We can't recurse, so we keep a simple stack of ocfs2_extent_blocks.
+ */
+static int ocfs_kill_this_tree(ocfs_super *osb,
+ struct buffer_head *extent_grp_bh,
+ ocfs_journal_handle *handle,
+ struct inode *inode)
{
int status = -EFAIL;
int i;
__u32 victim;
- __u32 csize = osb->cluster_size_bits;
- __u64 dstart = osb->vol_layout.data_start_off;
__u64 tmp_off;
- __u32 num_sectors = 0, bitmap_offset = 0;
- ocfs_alloc_ext *ext;
+ u32 num_clusters = 0;
+ __u32 bitmap_offset = 0;
+ ocfs2_extent_rec *ext;
struct buffer_head *tmp_bh = NULL;
char * stack[OCFS_TREE_STACK_SIZE];
- ocfs_extent_group * cur_extent; /* convenience, points to TOS */
+ ocfs2_extent_block *cur_eb; /* convenience, points to TOS */
+ ocfs2_extent_list *cur_el;
int tos = 0;
LOG_ENTRY();
@@ -1546,28 +1655,29 @@
for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
stack[i] = NULL;
- stack[tos] = ocfs_malloc(osb->sect_size);
+ stack[tos] = kmalloc(osb->sb->s_blocksize, GFP_KERNEL);
memcpy(stack[tos], OCFS_BH_GET_DATA_READ(extent_grp_bh),
- osb->sect_size);
+ osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(extent_grp_bh);
do {
- cur_extent = (ocfs_extent_group *) stack[tos];
-
- if (!IS_VALID_EXTENT_DATA(cur_extent) &&
- !IS_VALID_EXTENT_HEADER(cur_extent)) {
- LOG_ERROR_STR("Invalid extent group!");
+ cur_eb = (ocfs2_extent_block *) stack[tos];
+ if (!IS_VALID_EXTENT_BLOCK(cur_eb)) {
+ LOG_ERROR_STR("Invalid extent block!");
goto bail;
}
- if (IS_VALID_EXTENT_DATA(cur_extent)) {
- LOG_TRACE_ARGS("found some data to free (%llu)\n", cur_extent->this_ext);
- for(i = 0; i < cur_extent->next_free_ext; i++) {
+ cur_el = &cur_eb->h_list;
+
+ if (cur_el->l_tree_depth == -1) {
+ LOG_TRACE_ARGS("found some data to free (%llu)\n", cur_eb->h_blkno);
+ for(i = 0; i < cur_el->l_next_free_rec; i++) {
/* Free the data associated with each header */
- ext = &cur_extent->extents[i];
- num_sectors = (__u32) (ext->num_bytes >> csize);
- bitmap_offset = (__u32) ((ext->disk_off - dstart) >> csize);
- status = ocfs_handle_add_commit_bits(handle, num_sectors, bitmap_offset, -1, DISK_ALLOC_VOLUME);
+ ext = &cur_el->l_recs[i];
+ num_clusters = ext->e_clusters;
+ bitmap_offset =
+ (u32)((ext->e_blkno << osb->sb->s_blocksize_bits) >> osb->s_clustersize_bits);
+ status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, DISK_ALLOC_VOLUME);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -1578,30 +1688,33 @@
/* Did we already kill all his children, or
* are they already dead? */
- if (cur_extent->next_free_ext == 0) {
- LOG_TRACE_ARGS("Popping this header (%llu)\n", cur_extent->this_ext);
+ if (cur_el->l_next_free_rec == 0) {
+ LOG_TRACE_ARGS("Popping this header (%llu)\n", cur_eb->h_blkno);
goto free_meta;
}
/* We're gonna read in our last used extent
* and put him at the top of the stack. We
- * also update our next_free_ext so that next
+ * also update our h_next_free_rec so that next
* time we read in the next to last one and so
* on until we've finished all of them
*/
/* grow the stack, gotta save off a couple
* things 1st. */
- victim = cur_extent->next_free_ext - 1;
- tmp_off = cur_extent->extents[victim].disk_off;
- cur_extent->next_free_ext--;
+ victim = cur_el->l_next_free_rec - 1;
+ tmp_off = cur_el->l_recs[victim].e_blkno;
+ tmp_off <<= osb->sb->s_blocksize_bits;
+ cur_el->l_next_free_rec--;
- cur_extent = NULL;
+ cur_eb = NULL;
+ cur_el = NULL;
tos++;
/* should already be null, but we can do this
* just in case. */
- stack[tos] = ocfs_malloc(osb->sect_size);
+ stack[tos] = kmalloc(osb->sb->s_blocksize,
+ GFP_KERNEL);
status = ocfs_read_bh(osb, tmp_off, &tmp_bh,
OCFS_BH_COND_CACHED, inode);
@@ -1612,7 +1725,7 @@
memcpy(stack[tos],
OCFS_BH_GET_DATA_READ(tmp_bh),
- osb->sect_size);
+ osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(tmp_bh);
brelse(tmp_bh);
tmp_bh = NULL;
@@ -1622,15 +1735,19 @@
free_meta:
/* Free the metadata associated with this extent group */
- status = ocfs_handle_add_commit_bits(handle, 1, cur_extent->alloc_file_off, cur_extent->alloc_node, DISK_ALLOC_EXTENT_NODE);
+ status = ocfs_handle_add_commit_bits(handle, 1,
+ cur_eb->h_suballoc_blkno << osb->sb->s_blocksize_bits,
+ cur_eb->h_suballoc_node,
+ DISK_ALLOC_EXTENT_NODE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
/* Pop one off the stack */
- ocfs_free(stack[tos]);
+ kfree(stack[tos]);
stack[tos] = NULL;
- cur_extent = NULL;
+ cur_eb = NULL;
+ cur_el = NULL;
tos--;
} while (tos >= 0);
@@ -1638,7 +1755,7 @@
bail:
for(i = 0; i < OCFS_TREE_STACK_SIZE; i++)
if (stack[i])
- ocfs_free(stack[i]);
+ kfree(stack[i]);
LOG_EXIT_STATUS (status);
return(status);
@@ -1646,43 +1763,44 @@
#ifndef USERSPACE_TOOL
-static int ocfs_fix_extent_group(ocfs_super *osb, struct buffer_head *group_bh, struct inode *inode)
+static int ocfs_fix_extent_block(ocfs_super *osb,
+ struct buffer_head *eb_bh,
+ struct inode *inode)
{
- ocfs_alloc_ext *ext;
- ocfs_extent_group *group = NULL;
- int status=-EFAIL;
+ ocfs2_extent_block *eb = NULL;
+ ocfs2_extent_list *el;
+ int status = -EFAIL;
int i;
- LOG_ENTRY ();
+ LOG_ENTRY();
- if (!group_bh) {
- LOG_ERROR_STR("Invalid extent group bh (NULL)!");
+ if (!eb_bh) {
+ LOG_ERROR_STR("Invalid extent block bh (NULL)!");
goto bail;
}
- group = OCFS_BH_GET_DATA_WRITE(group_bh);
+ eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
- if (!IS_VALID_EXTENT_DATA(group) &&
- !IS_VALID_EXTENT_HEADER(group)) {
- LOG_ERROR_STR("Invalid extent group!");
+ if (!IS_VALID_EXTENT_BLOCK(eb)) {
+ LOG_ERROR_STR("Invalid extent block!");
goto bail;
}
- ext = group->extents;
+ el = &eb->h_list;
- for(i=group->next_free_ext; i < OCFS_MAX_DATA_EXTENTS; i++) {
- ext[i].num_bytes = 0;
- ext[i].disk_off = 0;
- ext[i].file_off = 0;
+ for(i = el->l_next_free_rec; i < el->l_count; i++) {
+ el->l_recs[i].e_clusters = 0;
+ el->l_recs[i].e_blkno = 0;
+ el->l_recs[i].e_cpos = 0;
}
- OCFS_BH_PUT_DATA(group_bh);
- group = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
status=0;
bail:
- if (group)
- OCFS_BH_PUT_DATA(group_bh);
+ if (eb)
+ OCFS_BH_PUT_DATA(eb_bh);
LOG_EXIT_STATUS (status);
return(status);
@@ -1699,81 +1817,93 @@
* write it so that it can.
* TODO: This function should be split up into a couple smaller ones.
*/
-static int ocfs_split_this_tree(ocfs_super * osb, struct buffer_head *extent_grp_bh, ocfs_journal_handle *handle, ocfs_file_entry *fe, struct inode *inode)
+static int ocfs_split_this_tree(ocfs_super *osb,
+ struct buffer_head *eb_bh,
+ ocfs_journal_handle *handle,
+ ocfs2_dinode *fe,
+ struct inode *inode)
{
int status = -EFAIL;
- __u64 newsize = fe->alloc_size;
- ocfs_alloc_ext *ext;
+ ocfs2_extent_rec *rec;
struct buffer_head * bh_stack[OCFS_TREE_STACK_SIZE];
- ocfs_extent_group * AllocExtent = NULL;/* convenience, points to TOS */
- ocfs_extent_group *extent_grp;
+ ocfs2_extent_block *alloc_eb = NULL; /* convenience, points to TOS */
+ ocfs2_extent_block *eb;
+ ocfs2_extent_list *el;
struct buffer_head *tmp_bh = NULL, *tmp_bh2 = NULL;
int tos = 0;
int i, victim;
- __u64 bytes, foff, doff, orig_bytes, dstart = osb->vol_layout.data_start_off, total_bytes, tmp_off;
- __u32 csize = osb->cluster_size_bits;
- __u32 num_sectors, bitmap_offset;
+ __u64 bytes, doff, orig_bytes, tmp_off;
+ __u64 total_bytes; /* FIXME needs to be clusters!!! */
+ __u32 num_clusters, bitmap_offset;
int done = 0;
- int gran = fe->granularity;
+ int depth = fe->id2.i_list.l_tree_depth;
int needs_brelse = 0;
LOG_ENTRY();
- /* This is a similar hack to the one below, untested for gran = 3 files
+ /* This is a similar hack to the one below, untested for depth = 3 files
because I can't recreate one. */
- if (gran == 3) {
- LOG_ERROR_STR("Truncating file with granularity 3, this is not tested and may be unsafe!");
- LOG_TRACE_STR("Found a granularity 3 tree, trimming it.\n");
+ if (depth == 3) {
+ LOG_ERROR_STR("Truncating file with tree_depth 3, this is not tested and may be unsafe!");
+ LOG_TRACE_STR("Found a tree_depth 3 tree, trimming it.\n");
- status = ocfs_journal_access(handle, extent_grp_bh,
+ status = ocfs_journal_access(handle, eb_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- extent_grp = OCFS_BH_GET_DATA_WRITE(extent_grp_bh);
- for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
- ext = &extent_grp->extents[i];
+ eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
+ el = &eb->h_list;
+ for (i = (el->l_next_free_rec - 1); i >= 0; i--) {
+ rec = &el->l_recs[i];
if (tmp_bh2)
brelse(tmp_bh2);
tmp_bh2 = NULL;
- status = ocfs_read_bh(osb, ext->disk_off, &tmp_bh2,
+ status = ocfs_read_bh(osb,
+ rec->e_blkno << osb->sb->s_blocksize_bits,
+ &tmp_bh2,
OCFS_BH_COND_CACHED, inode);
if (status < 0) {
- OCFS_BH_PUT_DATA(extent_grp_bh);
- extent_grp = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
brelse(tmp_bh2);
LOG_ERROR_STATUS (status);
goto bail;
}
- if (ext->file_off >= newsize) {
+ if (rec->e_cpos >= fe->i_clusters) {
/* Trim this whole subtree */
- status = ocfs_kill_this_tree(osb, tmp_bh2,
- handle, inode);
+ status = ocfs_kill_this_tree(osb,
+ tmp_bh2,
+ handle,
+ inode);
if (status < 0) {
- OCFS_BH_PUT_DATA(extent_grp_bh);
- extent_grp = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
+ el = NULL;
brelse(tmp_bh2);
LOG_ERROR_STATUS (status);
goto bail;
}
- ext->file_off = 0;
- ext->disk_off = 0;
- ext->num_bytes = 0;
- extent_grp->next_free_ext = i;
+ rec->e_cpos = 0;
+ rec->e_blkno = 0;
+ rec->e_clusters = 0;
+ el->l_next_free_rec = i;
} else { /* This is the one we want to split. */
- ext->num_bytes = newsize - ext->file_off;
+ rec->e_clusters =
+ fe->i_clusters - rec->e_cpos;
break;
}
}
/* Write out our new top of the tree duder */
- OCFS_BH_PUT_DATA(extent_grp_bh);
- extent_grp = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
+ el = NULL;
- status = ocfs_journal_dirty(handle, extent_grp_bh);
+ status = ocfs_journal_dirty(handle, eb_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1784,69 +1914,79 @@
LOG_ERROR_STATUS(-EFAIL);
goto bail;
}
- extent_grp_bh = tmp_bh2;
+ eb_bh = tmp_bh2;
/* We want to do the next bit of stuff too */
- gran = 2;
+ depth = 2;
needs_brelse = 1;
}
/* This is a hack, but i have little time to make this function right*/
/* get rid of everything from the top level HDR that we can, then
- proceeed as if we're granularity 1 (which we know works) */
- if (gran == 2) {
- LOG_TRACE_STR("Found a granularity 2 tree, trimming it.\n");
+ proceeed as if we're tree_depth 1 (which we know works) */
+ if (depth == 2) {
+ LOG_TRACE_STR("Found a tree_depth 2 tree, trimming it.\n");
- status = ocfs_journal_access(handle, extent_grp_bh,
+ status = ocfs_journal_access(handle, eb_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- extent_grp = OCFS_BH_GET_DATA_WRITE(extent_grp_bh);
- for(i = (extent_grp->next_free_ext - 1); i>=0; i--) {
- ext = &extent_grp->extents[i];
+ eb = OCFS_BH_GET_DATA_WRITE(eb_bh);
+ el = &eb->h_list;
+ for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
+ rec = &el->l_recs[i];
if (tmp_bh)
brelse(tmp_bh);
tmp_bh = NULL;
- status = ocfs_read_bh(osb, ext->disk_off, &tmp_bh,
- OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ rec->e_blkno << osb->sb->s_blocksize_bits,
+ &tmp_bh,
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
- OCFS_BH_PUT_DATA(extent_grp_bh);
- extent_grp = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
+ el = NULL;
brelse(tmp_bh);
LOG_ERROR_STATUS (status);
goto bail;
}
- if (ext->file_off >= newsize) {
+ if (rec->e_cpos >= fe->i_clusters) {
/* Trim this whole subtree */
- status = ocfs_kill_this_tree(osb, tmp_bh,
- handle, inode);
+ status = ocfs_kill_this_tree(osb,
+ tmp_bh,
+ handle,
+ inode);
if (status < 0) {
- OCFS_BH_PUT_DATA(extent_grp_bh);
- extent_grp = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
+ el = NULL;
brelse(tmp_bh);
LOG_ERROR_STATUS (status);
goto bail;
}
- ext->file_off = 0;
- ext->disk_off = 0;
- ext->num_bytes = 0;
- extent_grp->next_free_ext = i;
+ rec->e_cpos = 0;
+ rec->e_blkno = 0;
+ rec->e_clusters = 0;
+ el->l_next_free_rec = i;
} else { /* This is the one we want to split. */
- ext->num_bytes = newsize - ext->file_off;
+ rec->e_clusters =
+ fe->i_clusters - rec->e_cpos;
break;
}
}
/* Write out our new top of the tree duder */
- OCFS_BH_PUT_DATA(extent_grp_bh);
- extent_grp = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
+ el = NULL;
- status = ocfs_journal_dirty(handle, extent_grp_bh);
+ status = ocfs_journal_dirty(handle, eb_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1858,11 +1998,11 @@
goto bail;
}
- extent_grp_bh = tmp_bh;
+ eb_bh = tmp_bh;
- /* Right now, we don't use 'gran' below here, but just
+ /* Right now, we don't use 'depth' below here, but just
* in case */
- gran = 1;
+ depth = 1;
if (needs_brelse)
brelse(tmp_bh2);
needs_brelse = 1;
@@ -1871,7 +2011,7 @@
for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
bh_stack[i] = NULL;
- bh_stack[tos] = extent_grp_bh;
+ bh_stack[tos] = eb_bh;
/* Ok, find the splitting point (can be a DAT or HDR) */
do {
@@ -1885,36 +2025,40 @@
goto bail;
}
- AllocExtent = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
-
- if (!IS_VALID_EXTENT_DATA(AllocExtent) &&
- !IS_VALID_EXTENT_HEADER(AllocExtent)) {
- LOG_ERROR_STR("Invalid extent group!");
+ alloc_eb = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+ if (!IS_VALID_EXTENT_BLOCK(alloc_eb)) {
+ LOG_ERROR_STR("Invalid extent block!");
goto bail;
}
- if (IS_VALID_EXTENT_DATA(AllocExtent)) {
+ el = &alloc_eb->h_list;
+
+ if (el->l_tree_depth == -1) {
/* shall we just do away with him? */
- if (AllocExtent->extents[0].file_off >= newsize) {
- LOG_TRACE_ARGS("Killing this data extent (%llu)\n", AllocExtent->this_ext);
+ if (el->l_recs[0].e_cpos >= fe->i_clusters) {
+ LOG_TRACE_ARGS("Killing this data extent (%llu)\n", alloc_eb->h_blkno);
/* Boundary case - what if this guy is
* the last DAT we should delete
* (i.e., split no more ;) */
OCFS_BH_PUT_DATA(bh_stack[tos]);
- AllocExtent = NULL;
- status = ocfs_kill_this_tree(osb, bh_stack[tos], handle, inode);
+ alloc_eb = NULL;
+ el = NULL;
+ status = ocfs_kill_this_tree(osb,
+ bh_stack[tos],
+ handle,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
/* silly, but what to do? */
- AllocExtent = OCFS_BH_GET_DATA_READ(bh_stack[tos]);
+ alloc_eb = OCFS_BH_GET_DATA_READ(bh_stack[tos]);
} else {
/* Alright, we know for sure that
* we're splitting in this guy. */
- LOG_TRACE_ARGS("Splitting this data extent (%llu)\n", AllocExtent->this_ext);
- fe->last_ext_ptr = AllocExtent->this_ext;
- AllocExtent->next_data_ext = 0;
+ LOG_TRACE_ARGS("Splitting this data extent (%llu)\n", alloc_eb->h_blkno);
+ fe->i_last_eb_blk = alloc_eb->h_blkno;
+ alloc_eb->h_next_leaf_blk = 0;
/* total_bytes is used below to know
* how much total we've whacked off
* this extent*/
@@ -1923,37 +2067,43 @@
/* there is a chance the split is at a
* header boundary. this will catch
* it: */
- ext = &AllocExtent->extents[AllocExtent->next_free_ext - 1];
- if ((ext->file_off + ext->num_bytes)==newsize){
+ rec = &el->l_recs[el->l_next_free_rec - 1];
+ if ((rec->e_cpos + rec->e_clusters) == fe->i_clusters) {
LOG_TRACE_STR("Ok, hit that boundary in the DAT");
goto fix_headers;
}
/* Either kill the data or resize it */
- for(i = (AllocExtent->next_free_ext - 1); i>=0; i--) {
- ext = &AllocExtent->extents[i];
+ for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
+ rec = &el->l_recs[i];
/* changed this from > to >= */
/* Do we delete it completely? */
- if (ext->file_off >= newsize) {
- total_bytes+=ext->num_bytes;
+ if (rec->e_cpos >= fe->i_clusters) {
+ total_bytes += (u64)rec->e_clusters << osb->s_clustersize_bits;
- num_sectors = (__u32) (ext->num_bytes >> csize);
- bitmap_offset = (__u32) ((ext->disk_off - dstart) >> csize);
- ext->file_off = 0;
- ext->num_bytes = 0;
- ext->disk_off = 0;
- } else if ((ext->file_off + ext->num_bytes) > newsize) {
+ num_clusters = rec->e_clusters;
+ bitmap_offset = (u32)(((rec->e_blkno << osb->sb->s_blocksize_bits)) >> osb->s_clustersize_bits);
+ rec->e_cpos = 0;
+ rec->e_clusters = 0;
+ rec->e_blkno = 0;
+ } else if ((rec->e_cpos + rec->e_clusters) > fe->i_clusters) {
/* Do we shrink it? */
- orig_bytes = ext->num_bytes;
- doff = ext->disk_off;
- foff = ext->file_off;
- bytes = ext->num_bytes = newsize - foff;
- num_sectors = (__u32) ((orig_bytes - bytes) >> csize);
- bitmap_offset = (__u32) (((doff + bytes) - dstart) >> csize);
+ /* FIXME
+ * this is soooo a
+ * hodge-podge of bytes
+ * and clusters
+ */
+ orig_bytes = (u64)rec->e_clusters << osb->s_clustersize_bits;
+ num_clusters = rec->e_clusters;
+ doff = rec->e_blkno << osb->sb->s_blocksize_bits;
+ rec->e_clusters = fe->i_clusters - rec->e_cpos;
+ bytes = (u64)rec->e_clusters << osb->s_clustersize_bits;
+ num_clusters -= rec->e_clusters;
+ bitmap_offset = (u32)((doff + bytes) >> osb->s_clustersize_bits);
/* we want to exit the
* for loop now */
- total_bytes+= (orig_bytes - bytes);
+ total_bytes += (orig_bytes - bytes);
done = 1;
} else {
/* if we get here,
@@ -1961,11 +2111,11 @@
* to actually delete
* *anything* from
* this extent. */
- LOG_TRACE_ARGS("Not deleting extent %d, disk_off = %llu, num_bytes = %llu, file_off = %llu\n", i, ext->disk_off, ext->num_bytes, ext->file_off);
+ LOG_TRACE_ARGS("Not deleting extent %d, e_blkno = %llu, e_clusters = %u, e_cpos = %u\n", i, rec->e_blkno, rec->e_clusters, rec->e_cpos);
done = 1;
goto skip_bitmap_add;
}
- status = ocfs_handle_add_commit_bits(handle, num_sectors, bitmap_offset, -1, DISK_ALLOC_VOLUME);
+ status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, DISK_ALLOC_VOLUME);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -1973,16 +2123,19 @@
skip_bitmap_add:
if (done) {
- AllocExtent->next_free_ext=i+1;
+ el->l_next_free_rec =
+ i + 1;
break;
}
} /* For loop */
/* Either way, we need to write this back out*/
OCFS_BH_PUT_DATA(bh_stack[tos]);
- AllocExtent = NULL;
+ alloc_eb = NULL;
+ el = NULL;
- status = ocfs_journal_dirty(handle, bh_stack[tos]);
+ status = ocfs_journal_dirty(handle,
+ bh_stack[tos]);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -2000,23 +2153,26 @@
goto bail;
}
- AllocExtent = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
- victim = AllocExtent->next_free_ext;
- AllocExtent->next_free_ext++;
+ alloc_eb = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+ el = &alloc_eb->h_list;
+ victim = el->l_next_free_rec;
+ el->l_next_free_rec++;
/* need to also update
* numbytes on these guys */
- ext = &AllocExtent->extents[victim];
- ext->num_bytes-= total_bytes;
+ rec = &el->l_recs[victim];
+ rec->e_clusters -=
+ (u32)(total_bytes >> osb->s_clustersize_bits);
OCFS_BH_PUT_DATA(bh_stack[tos]);
- AllocExtent = NULL;
- status = ocfs_fix_extent_group(osb,
+ alloc_eb = NULL;
+ el = NULL;
+ status = ocfs_fix_extent_block(osb,
bh_stack[tos], inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
status = ocfs_journal_dirty(handle,
- bh_stack[tos]);
+ bh_stack[tos]);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -2031,38 +2187,40 @@
/* Did we already kill all his children, or
* are they already dead? */
- if (AllocExtent->next_free_ext == 0) {
+ if (el->l_next_free_rec == 0) {
/*Ok, we're done with this guy, pop the stack*/
LOG_TRACE_ARGS("Popping this header (%llu)\n",
- AllocExtent->this_ext);
+ alloc_eb->h_blkno);
- status = ocfs_handle_add_commit_bits(handle, 1, AllocExtent->alloc_file_off, AllocExtent->alloc_node, DISK_ALLOC_EXTENT_NODE);
+ status = ocfs_handle_add_commit_bits(handle, 1, alloc_eb->h_suballoc_blkno << osb->sb->s_blocksize_bits, alloc_eb->h_suballoc_node, DISK_ALLOC_EXTENT_NODE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
OCFS_BH_PUT_DATA(bh_stack[tos]);
brelse(bh_stack[tos]);
- AllocExtent = NULL;
+ alloc_eb = NULL;
+ el = NULL;
bh_stack[tos] = NULL;
tos--;
continue;
}
/* changed this from > to >= */
/* Do we just delete this whole part of the tree? */
- if (AllocExtent->extents[0].file_off >= newsize) {
+ if (el->l_recs[0].e_cpos >= fe->i_clusters) {
LOG_TRACE_ARGS("whacking this tree: (%llu)\n",
- AllocExtent->this_ext);
+ alloc_eb->h_blkno);
- if (AllocExtent->extents[0].file_off ==newsize)
+ if (el->l_recs[0].e_cpos == fe->i_clusters)
done = 1;
OCFS_BH_PUT_DATA(bh_stack[tos]);
- AllocExtent = NULL;
+ alloc_eb = NULL;
ocfs_kill_this_tree(osb, bh_stack[tos],
handle, inode);
brelse(bh_stack[tos]);
- AllocExtent = NULL;
+ alloc_eb = NULL;
+ el = NULL;
bh_stack[tos] = NULL;
tos--;
if (tos < 0) {
@@ -2071,7 +2229,7 @@
}
/* I just have to fix my parent,
* right? Yes, but only because our
- * max granularity is 2. if it were
+ * max tree_depth is 2. if it were
* more, we'd have to fix his
* parents parent. */
status = ocfs_journal_access(handle, bh_stack[tos], OCFS_JOURNAL_ACCESS_WRITE);
@@ -2080,15 +2238,17 @@
goto bail;
}
- AllocExtent = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+ alloc_eb = OCFS_BH_GET_DATA_WRITE(bh_stack[tos]);
+ el = &alloc_eb->h_list;
- victim = AllocExtent->next_free_ext;
- AllocExtent->extents[victim].file_off = 0;
- AllocExtent->extents[victim].num_bytes = 0;
- AllocExtent->extents[victim].disk_off = 0;
- AllocExtent->next_free_ext--;
+ victim = el->l_next_free_rec;
+ el->l_recs[victim].e_cpos = 0;
+ el->l_recs[victim].e_clusters = 0;
+ el->l_recs[victim].e_blkno = 0;
+ el->l_next_free_rec--;
OCFS_BH_PUT_DATA(bh_stack[tos]);
- AllocExtent = NULL;
+ alloc_eb = NULL;
+ el = NULL;
/* Here's an interesting boundary
* case. What if we're truncating on a
* boundary between two headers and
@@ -2096,12 +2256,12 @@
* that case we're done, but need to
* write the parent out before we leave
* again, this bit of code depends on
- * granularity of 2. */
+ * tree_depth of 2. */
if (done) {
LOG_TRACE_STR("Found a boundary " \
"header, almost done " \
" (gonna quit)");
- status = ocfs_fix_extent_group(osb,
+ status = ocfs_fix_extent_block(osb,
bh_stack[tos], inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -2135,14 +2295,16 @@
continue;
}
- AllocExtent->next_free_ext--;
- victim = AllocExtent->next_free_ext;
- ext = &AllocExtent->extents[victim];
- tmp_off = ext->disk_off;
+ el->l_next_free_rec--;
+ victim = el->l_next_free_rec;
+ rec = &el->l_recs[victim];
+ tmp_off = rec->e_blkno << osb->sb->s_blocksize_bits;
OCFS_BH_PUT_DATA(bh_stack[tos]);
- AllocExtent = NULL;
+ alloc_eb = NULL;
+ el = NULL;
- status = ocfs_journal_dirty(handle, bh_stack[tos]);
+ status = ocfs_journal_dirty(handle,
+ bh_stack[tos]);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -2155,8 +2317,10 @@
LOG_ERROR_STR("uhoh, not brelsing a buffer " \
"on our stack!\n");
- status = ocfs_read_bh(osb, tmp_off, &bh_stack[tos],
- OCFS_BH_COND_CACHED, inode);
+ status = ocfs_read_bh(osb, tmp_off,
+ &bh_stack[tos],
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -2165,11 +2329,12 @@
/* We only want to free on our way up the tree */
continue;
}
- if (AllocExtent)
+ if (alloc_eb)
OCFS_BH_PUT_DATA(bh_stack[tos]);
brelse(bh_stack[tos]);
bh_stack[tos] = NULL;
- AllocExtent = NULL;
+ alloc_eb = NULL;
+ el = NULL;
tos--;
} while (tos >= 0);
@@ -2181,7 +2346,7 @@
bail:
/* brelse the stack. We only brelse the bottom of the stack if
* we know for sure that it wasn't passed from the caller */
- if (AllocExtent)
+ if (alloc_eb)
OCFS_BH_PUT_DATA(bh_stack[tos]);
if (needs_brelse)
brelse(bh_stack[0]);
@@ -2195,82 +2360,98 @@
/*
- * ocfs_update_last_ext_ptr
+ * ocfs_update_last_eb_blk
*
- * Travel all the way to the rightmost DAT and set fe->last_ext_ptr
+ * Travel all the way to the rightmost DAT and set fe->i_last_eb_blk
* to it.
*
* We do cached reads here because we ought to have already read the
* various ext headers and dats off the system previously in the
* truncate path.
*/
-static int ocfs_update_last_ext_ptr(ocfs_super *osb, ocfs_file_entry *fe, struct inode *inode)
+static int ocfs_update_last_eb_blk(ocfs_super *osb,
+ ocfs2_dinode *fe,
+ struct inode *inode)
{
int status = -EFAIL;
- ocfs_extent_group *extent = NULL;
- struct buffer_head *extent_bh = NULL;
- __u64 next_ext;
+ struct buffer_head *eb_bh = NULL;
+ ocfs2_extent_block *eb = NULL;
+ ocfs2_extent_list *el, *fel;
+ u64 next_blk;
int victim;
LOG_ENTRY ();
- if (fe->next_free_ext == 0) {
+ fel = &fe->id2.i_list;
+ if (fel->l_next_free_rec == 0) {
LOG_TRACE_STR("setting to zero as there isn't any used extents");
- fe->last_ext_ptr = 0;
+ fe->i_last_eb_blk = 0;
status = 0;
goto bail;
}
- victim = fe->next_free_ext - 1;
- status = ocfs_read_bh(osb, fe->extents[victim].disk_off, &extent_bh,
+ /* Can't be called with local extents */
+ if (fel->l_tree_depth < 0)
+ BUG();
+
+ /* Ugly magic -1 */
+ victim = fel->l_next_free_rec - 1;
+ status = ocfs_read_bh(osb,
+ fel->l_recs[victim].e_blkno << osb->sb->s_blocksize_bits,
+ &eb_bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- extent = OCFS_BH_GET_DATA_READ(extent_bh);
+ eb = OCFS_BH_GET_DATA_READ(eb_bh);
- if (!IS_VALID_EXTENT_DATA(extent) &&
- !IS_VALID_EXTENT_HEADER(extent)) {
- LOG_ERROR_STR("Invalid extent group!");
+ if (!IS_VALID_EXTENT_BLOCK(eb)) {
+ LOG_ERROR_STR("Invalid extent block!");
goto bail;
}
- while (!IS_VALID_EXTENT_DATA(extent)) {
- if (!IS_VALID_EXTENT_HEADER(extent)) {
- LOG_ERROR_STR("Invalid extent group!");
+ el = &eb->h_list;
+
+ while (el->l_tree_depth >= 0) {
+ if (!IS_VALID_EXTENT_BLOCK(eb)) {
+ LOG_ERROR_STR("Invalid extent block!");
goto bail;
}
- next_ext = extent->extents[extent->next_free_ext - 1].disk_off;
+ el = &eb->h_list;
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
- extent = NULL;
- extent_bh = NULL;
+ next_blk = el->l_recs[el->l_next_free_rec - 1].e_blkno;
- status = ocfs_read_bh(osb, next_ext, &extent_bh,
+ OCFS_BH_PUT_DATA(eb_bh);
+ brelse(eb_bh);
+ eb = NULL;
+ eb_bh = NULL;
+
+ status = ocfs_read_bh(osb,
+ next_blk << osb->sb->s_blocksize_bits,
+ &eb_bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- extent = OCFS_BH_GET_DATA_READ(extent_bh);
+ eb = OCFS_BH_GET_DATA_READ(eb_bh);
}
- fe->last_ext_ptr = extent->this_ext;
+ fe->i_last_eb_blk = eb->h_blkno;
status = 0;
bail:
- if (extent_bh) {
- if (extent)
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
+ if (eb_bh) {
+ if (eb)
+ OCFS_BH_PUT_DATA(eb_bh);
+ brelse(eb_bh);
}
LOG_EXIT_STATUS(status);
return(status);
-} /* ocfs_update_last_ext_ptr */
+} /* ocfs_update_last_eb_blk */
/*
* ocfs_free_extents_for_truncate()
@@ -2279,25 +2460,28 @@
* before leaving this function, but that's pretty much up to the
* caller!
*/
-int ocfs_free_extents_for_truncate (ocfs_super * osb, ocfs_file_entry * FileEntry, ocfs_journal_handle *handle, struct inode *inode)
+int ocfs_free_extents_for_truncate(ocfs_super *osb,
+ ocfs2_dinode *fe,
+ ocfs_journal_handle *handle,
+ struct inode *inode)
{
int status = 0;
struct buffer_head *extent_bh = NULL;
- __u64 alloc_size;
int i, j;
- int updated_lep; /* used to mark whether fe->last_ext_ptr has
+ ocfs2_extent_list *fel;
+ int updated_leb; /* used to mark whether fe->i_last_eb_blk has
* been updated */
LOG_ENTRY ();
- alloc_size = FileEntry->alloc_size;
+ fel = &fe->id2.i_list;
/* local extents */
- if (FileEntry->local_ext) {
- status = _squish_extent_entries(osb, FileEntry->extents,
- &FileEntry->next_free_ext,
- handle, alloc_size, 0,
- inode);
+ if (fel->l_tree_depth < 0) {
+ status = _squish_extent_entries(osb, fel->l_recs,
+ &fel->l_next_free_rec,
+ handle, fe->i_clusters,
+ 0, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -2305,39 +2489,42 @@
goto finally;
}
- LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to alloc_size of (%llu)\n", alloc_size);
+ LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to i_clusters of (%u)\n", fe->i_clusters);
/* non-local extents */
- updated_lep = 0;
+ updated_leb = 0;
- /* Loop backwards through only the used free extent headers here */
- for (i = (FileEntry->next_free_ext - 1); i >= 0; i--) {
+ /* Loop backwards through only the used free extent block here */
+ for (i = (fel->l_next_free_rec - 1); i >= 0; i--) {
LOG_TRACE_ARGS("at top of loop, i = %d\n", i);
/* Go ahead and read that bit of the tree - we'll need it. */
- status = ocfs_read_bh(osb, FileEntry->extents[i].disk_off,
- &extent_bh, OCFS_BH_CACHED, inode);
+ status = ocfs_read_bh(osb,
+ fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits,
+ &extent_bh, OCFS_BH_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
/* Figure out, do we want to kill this whole tree? */
- if (FileEntry->extents[i].file_off >= alloc_size) {
+ if (fel->l_recs[i].e_cpos >= fe->i_clusters) {
LOG_TRACE_ARGS("Found an entire tree to delete!\n");
- status = ocfs_kill_this_tree(osb, extent_bh, handle, inode);
+ status = ocfs_kill_this_tree(osb, extent_bh,
+ handle, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
- /* Ok, update the FileEntry */
- FileEntry->extents[i].file_off = 0;
- FileEntry->extents[i].disk_off = 0;
- FileEntry->extents[i].num_bytes = 0;
- FileEntry->next_free_ext = i;
+ /* Ok, update the fe */
+ fel->l_recs[i].e_cpos = 0;
+ fel->l_recs[i].e_blkno = 0;
+ fel->l_recs[i].e_clusters = 0;
+ fel->l_next_free_rec = i;
} else { /* Ok, we only want part of it. */
LOG_TRACE_ARGS("Splitting this tree!\n");
status = ocfs_split_this_tree(osb, extent_bh,
- handle, FileEntry,
+ handle, fe,
inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -2345,15 +2532,19 @@
}
/* Ok, update the FileEntry */
- LOG_TRACE_ARGS("Alright. num_bytes = (%llu), alloc_size = (%llu) file_off = (%llu)\n", FileEntry->extents[i].num_bytes, alloc_size, FileEntry->extents[i].file_off);
- FileEntry->extents[i].num_bytes = alloc_size;
+ LOG_TRACE_ARGS("Alright. e_clusters = (%u), i_clusters = (%u) e_cpos = (%u)\n",
+ fel->l_recs[i].e_clusters,
+ fe->i_clusters,
+ fel->l_recs[i].e_cpos);
+ fel->l_recs[i].e_clusters = fe->i_clusters;
for (j=0; j < i; j++)
- FileEntry->extents[i].num_bytes += FileEntry->extents[j].num_bytes;
+ fel->l_recs[i].e_clusters +=
+ fel->l_recs[j].e_clusters;
- FileEntry->next_free_ext = i + 1;
+ fel->l_next_free_rec = i + 1;
/* We're done - we can't split more than one
* parts of the tree. */
- updated_lep = 1;
+ updated_leb = 1;
break;
}
brelse(extent_bh);
@@ -2361,15 +2552,14 @@
}
/* Ok, trunc to zero is a special case, doofus */
- if (alloc_size == 0) {
- FileEntry->last_ext_ptr = 0;
- FileEntry->granularity = -1;
- FileEntry->local_ext = 1;
- updated_lep = 1;
+ if (fe->i_clusters == 0) {
+ fe->i_last_eb_blk = 0;
+ fel->l_tree_depth = -1;
+ updated_leb = 1;
}
- if (!updated_lep) {
- status = ocfs_update_last_ext_ptr(osb, FileEntry, inode);
+ if (!updated_leb) {
+ status = ocfs_update_last_eb_blk(osb, fe, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -2395,23 +2585,24 @@
* decoded and updated in the extent map.
*
*/
-int ocfs_lookup_file_allocation (ocfs_super * osb, __s64 Vbo, __s64 * Lbo,
- __u32 sectors, u32 *sector_count,
- struct inode *inode, int locked)
+int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
+ __s64 *Lbo, __u32 blocks,
+ __s64 *contig_bytes,
+ struct inode *inode, int locked)
{
int status = -EFAIL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
struct buffer_head *fe_bh = NULL;
- ocfs_extent_group *OcfsExtent = NULL;
- struct buffer_head *ext_bh = NULL;
- __u64 next_data_ext;
+ ocfs2_extent_block *eb = NULL;
+ struct buffer_head *eb_bh = NULL;
+ u64 next_leaf;
__u64 remainingLength = 0;
__s64 localVbo;
__u64 cnt;
__u32 NumIndex;
int have_io_sem = 0;
- LOG_ENTRY_ARGS("(vbo=%llu, sectors=%u, inode=%llu)\n", Vbo, sectors,
+ LOG_ENTRY_ARGS("(vbo=%llu, blocks=%u, inode=%llu)\n", Vbo, blocks,
GET_INODE_FEOFF(inode));
OCFS_ASSERT (osb);
@@ -2443,10 +2634,11 @@
if (!locked)
up(&(OCFS_I(inode)->priv_sem));
- if (status && cnt >= sectors) {
+ if (status &&
+ (cnt >= (u64)blocks << osb->sb->s_blocksize_bits)) {
/* Found a what we were looking for. */
status = 0;
- goto finally;
+ goto success;
}
/* Ok, we didn't find it in the extent map (or we need to
@@ -2470,11 +2662,13 @@
if (!locked)
have_io_sem = 1;
- remainingLength = sectors;
+ remainingLength = blocks << osb->sb->s_blocksize_bits;
localVbo = Vbo;
- /* We are looking for a Vbo, but it is not in the Map or not Valid. */
- /* Thus we have to go to the disk, and update the Map */
+ /*
+ * We are looking for a Vbo, but it is not in the Map or not
+ * Valid. Thus we have to go to the disk, and update the Map
+ */
/* Read the file Entry corresponding to this */
status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &fe_bh,
@@ -2491,19 +2685,20 @@
goto finally;
}
- if (Vbo >= (__s64) fe->alloc_size) {
- LOG_ERROR_ARGS ("vbo=%llu, fe->alloc_sz=%llu alloc_size=%llu",
- Vbo, fe->alloc_size,
+ if (Vbo >= (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) {
+ LOG_ERROR_ARGS ("vbo=%llu, fe->i_clusters=%u alloc_size=%llu",
+ Vbo, fe->i_clusters,
OCFS_I(inode)->alloc_size);
status = -EFAIL;
goto finally;
}
- if (fe->local_ext) {
+ if (fe->id2.i_list.l_tree_depth < 0) {
if (!locked)
down(&(OCFS_I(inode)->priv_sem));
- status = ocfs_update_extent_map (osb, &OCFS_I(inode)->map, fe,
- NULL, NULL, LOCAL_EXT);
+ status = ocfs_update_extent_map(osb,
+ &OCFS_I(inode)->map, fe,
+ NULL, NULL, LOCAL_EXT);
if (!locked)
up(&(OCFS_I(inode)->priv_sem));
if (status < 0) {
@@ -2514,23 +2709,22 @@
/* Extents are branched and we are no longer using
* Local Extents for this File Entry. */
- status = ocfs_get_leaf_extent(osb, fe, localVbo, &ext_bh,
- inode);
+ status = ocfs_get_leaf_extent(osb, fe, localVbo,
+ &eb_bh, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- OcfsExtent = OCFS_BH_GET_DATA_READ(ext_bh);
+ eb = OCFS_BH_GET_DATA_READ(eb_bh);
while (1) {
if (!locked)
down(&(OCFS_I(inode)->priv_sem));
- status = ocfs_update_extent_map (osb,
- &OCFS_I(inode)->map,
- OcfsExtent,
- &localVbo,
- &remainingLength,
- NONLOCAL_EXT);
+ status = ocfs_update_extent_map(osb,
+ &OCFS_I(inode)->map,
+ eb, &localVbo,
+ &remainingLength,
+ NONLOCAL_EXT);
if (!locked)
up(&(OCFS_I(inode)->priv_sem));
if (status < 0) {
@@ -2539,34 +2733,36 @@
}
if (remainingLength > 0) {
- if (!OcfsExtent->next_data_ext) {
+ if (!eb->h_next_leaf_blk) {
LOG_ERROR_ARGS ("localVbo=%llu, "
"alloc_size=%llu, "
" thisext=%llu",
localVbo,
OCFS_I(inode)->alloc_size,
- OcfsExtent->this_ext);
+ eb->h_blkno);
status = -EFAIL;
goto finally;
}
- next_data_ext = OcfsExtent->next_data_ext;
+ next_leaf = eb->h_next_leaf_blk;
- OCFS_BH_PUT_DATA(ext_bh);
- brelse(ext_bh);
- ext_bh = NULL;
- OcfsExtent = NULL;
+ OCFS_BH_PUT_DATA(eb_bh);
+ brelse(eb_bh);
+ eb_bh = NULL;
+ eb = NULL;
- status = ocfs_read_bh(osb, next_data_ext,
- &ext_bh,
+ status = ocfs_read_bh(osb,
+ next_leaf << osb->sb->s_blocksize_bits,
+ &eb_bh,
OCFS_BH_COND_CACHED,
inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
}
- OcfsExtent = OCFS_BH_GET_DATA_READ(ext_bh);
- if (!IS_VALID_EXTENT_DATA(OcfsExtent)) {
+ eb = OCFS_BH_GET_DATA_READ(eb_bh);
+ if (!IS_VALID_EXTENT_BLOCK(eb) ||
+ (eb->h_list.l_tree_depth != -1)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
@@ -2578,18 +2774,24 @@
if (!locked)
down(&(OCFS_I(inode)->priv_sem));
- if (ocfs_lookup_extent_map_entry (osb, &(OCFS_I(inode)->map), Vbo, Lbo,
- &cnt, &NumIndex) && cnt >= sectors) {
+ if (ocfs_lookup_extent_map_entry(osb, &(OCFS_I(inode)->map),
+ Vbo, Lbo, &cnt, &NumIndex) &&
+ (cnt >= (u64)blocks << osb->sb->s_blocksize_bits)) {
status = 0;
- } else
+ } else {
status = -EFAIL;
+ printk("failed to lookup extmap... vbo=%llu mapcount=%d\n", Vbo, OCFS_I(inode)->map.count);
+ }
if (!locked)
up(&(OCFS_I(inode)->priv_sem));
+success:
/* want to return cnt only if asked for it */
- if (sector_count)
- *sector_count = (u32) cnt;
+ if (contig_bytes)
+ *contig_bytes = cnt;
+ LOG_TRACE_ARGS("returning contig_bytes=%lld, lbo=%lld\n", cnt, *Lbo);
+
finally:
if (have_io_sem)
up_read(&OCFS_I(inode)->ip_io_sem);
@@ -2600,10 +2802,10 @@
brelse(fe_bh);
}
- if (ext_bh) {
- if (OcfsExtent)
- OCFS_BH_PUT_DATA(ext_bh);
- brelse(ext_bh);
+ if (eb_bh) {
+ if (eb)
+ OCFS_BH_PUT_DATA(eb_bh);
+ brelse(eb_bh);
}
LOG_EXIT_STATUS (status);
return (status);
@@ -2612,21 +2814,25 @@
/* ocfs_get_leaf_extent()
* '*data_exent_bh' should be NULL.
*/
-int ocfs_get_leaf_extent (ocfs_super * osb, ocfs_file_entry * FileEntry, __s64 Vbo, struct buffer_head **data_extent_bh, struct inode *inode)
+int ocfs_get_leaf_extent(ocfs_super *osb, ocfs2_dinode *fe,
+ __s64 Vbo, struct buffer_head **data_extent_bh,
+ struct inode *inode)
{
int status = 0, tempstat;
__u32 i, j;
- ocfs_extent_group *ExtentHeader = NULL;
struct buffer_head *ext_bh = NULL;
- ocfs_extent_group *tmp = NULL;
+ ocfs2_extent_block *eb = NULL;
+ ocfs2_extent_list *el, *fel;
+ ocfs2_extent_block *tmp = NULL;
__u64 childDiskOffset = 0;
LOG_ENTRY ();
- for (i = 0; i < FileEntry->next_free_ext; i++) {
- if ((__s64) (FileEntry->extents[i].file_off +
- FileEntry->extents[i].num_bytes) > Vbo) {
- childDiskOffset = FileEntry->extents[i].disk_off;
+ fel = &fe->id2.i_list;
+
+ for (i = 0; i < fel->l_next_free_rec; i++) {
+ if ((__s64)((u64)(fel->l_recs[i].e_cpos + fel->l_recs[i].e_clusters) << osb->s_clustersize_bits) > Vbo) {
+ childDiskOffset = fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits;
break;
}
}
@@ -2636,25 +2842,26 @@
goto finally;
}
- for (i = 0; i < FileEntry->granularity; i++) {
- tempstat = ocfs_read_bh(osb, childDiskOffset, &ext_bh, OCFS_BH_COND_CACHED, inode);
+ for (i = 0; i < fel->l_tree_depth; i++) {
+ tempstat = ocfs_read_bh(osb, childDiskOffset, &ext_bh,
+ OCFS_BH_COND_CACHED, inode);
if (tempstat < 0) {
LOG_ERROR_STATUS (status = tempstat);
goto finally;
}
- ExtentHeader = OCFS_BH_GET_DATA_READ(ext_bh);
- if (!IS_VALID_EXTENT_HEADER(ExtentHeader)) {
+ eb = OCFS_BH_GET_DATA_READ(ext_bh);
+ if (!IS_VALID_EXTENT_BLOCK(eb)) {
LOG_ERROR_STATUS (status = -EINVAL);
goto finally;
}
+ el = &eb->h_list;
- for (j = 0; j < ExtentHeader->next_free_ext; j++) {
- if ((__s64) (ExtentHeader->extents[j].file_off +
- ExtentHeader->extents[j].num_bytes) > Vbo)
+ for (j = 0; j < el->l_next_free_rec; j++) {
+ if ((__s64)((u64)(el->l_recs[j].e_cpos + el->l_recs[j].e_clusters) << osb->s_clustersize_bits) > Vbo)
{
childDiskOffset =
- ExtentHeader->extents[j].disk_off;
+ el->l_recs[j].e_blkno << osb->sb->s_blocksize_bits;
break;
}
}
@@ -2662,7 +2869,7 @@
OCFS_BH_PUT_DATA(ext_bh);
brelse(ext_bh);
ext_bh = NULL;
- ExtentHeader = NULL;
+ eb = NULL;
}
if (*data_extent_bh) {
@@ -2678,7 +2885,8 @@
}
tmp = OCFS_BH_GET_DATA_READ(*data_extent_bh);
- if (!IS_VALID_EXTENT_DATA(tmp)) {
+ if (!IS_VALID_EXTENT_BLOCK(tmp) ||
+ (tmp->h_list.l_tree_depth != -1)) {
LOG_ERROR_STATUS (status = -EINVAL);
OCFS_BH_PUT_DATA(*data_extent_bh);
brelse(*data_extent_bh);
@@ -2689,7 +2897,7 @@
OCFS_BH_PUT_DATA(*data_extent_bh);
finally:
if (ext_bh) {
- if (ExtentHeader)
+ if (eb)
OCFS_BH_PUT_DATA(ext_bh);
brelse(ext_bh);
}
@@ -2714,7 +2922,14 @@
* Pass in 'lock_bh' and bitmap_inode only if you've already taken the
* vol_alloc semaphore, and you've done the acquire_lock on the bitmap.
*/
-int ocfs_find_contiguous_space_from_bitmap (ocfs_super * osb, ocfs_journal_handle *handle, __u64 file_size, __u64 * cluster_off, __u64 * cluster_count, int sysfile, struct buffer_head *lock_bh, struct inode *bitmap_inode)
+int ocfs_find_contiguous_space_from_bitmap(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ __u64 file_size,
+ __u64 *cluster_off,
+ __u64 *cluster_count,
+ int sysfile,
+ struct buffer_head *lock_bh,
+ struct inode *bitmap_inode)
{
int status = 0, startbh, numblocks;
__u32 bitoffset = 0, ClusterCount = 0;
@@ -2723,11 +2938,11 @@
static __u32 LargeAllocOffset = 0;
static __u32 SmallAllocOffset = 0;
struct buffer_head *bh = NULL;
- ocfs_file_entry *bm_lock = NULL;
+ ocfs2_dinode *bm_lock = NULL;
__u32 bitmapblocks; /* we only care about the valid blocks */
int local_lock = 0;
int local_inode = 0;
- __u32 five_percent;
+ __u32 five_percent, free_bits;
LOG_ENTRY ();
@@ -2737,7 +2952,7 @@
BUG();
if (!bitmap_inode) {
- bitmap_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
+ bitmap_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
if (!bitmap_inode) {
status = -EINVAL;
LOG_ERROR_STR("Could not get bitmap inode!");
@@ -2754,8 +2969,8 @@
ocfs_handle_add_inode(handle, bitmap_inode);
/* Get the allocation lock here */
- status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK, 0,
- &bh, bitmap_inode);
+ status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+ 0, &bh, bitmap_inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
@@ -2773,24 +2988,27 @@
bm_lock = OCFS_BH_GET_DATA_WRITE(bh);
- ClusterCount = (__u32) ((__u64) (file_size + (osb->vol_layout.cluster_size-1)) >>
- osb->cluster_size_bits);
+ ClusterCount = (__u32) ((__u64) (file_size + (osb->s_clustersize-1)) >>
+ osb->s_clustersize_bits);
if (ClusterCount == 0) {
LOG_ERROR_STR ("DISK_FULL?: ClusterCount==0");
status = 0;
goto leave;
}
- ByteCount = ClusterCount << osb->cluster_size_bits;
+ ByteCount = (u64)ClusterCount << osb->s_clustersize_bits;
if (ByteCount == 0) {
LOG_ERROR_STR ("DISK_FULL?: Bytecount==0");
status = 0;
goto leave;
}
- if (sysfile ? (ClusterCount > osb->vol_layout.num_clusters) :
- (ClusterCount > (osb->vol_layout.num_clusters -
- ((8 * ONE_MEGA_BYTE) >> osb->cluster_size_bits)))){
- LOG_ERROR_STR ("Disk Full");
+ free_bits = le32_to_cpu(bm_lock->id1.bitmap1.i_total) -
+ le32_to_cpu(bm_lock->id1.bitmap1.i_used);
+ if ( (sysfile && ClusterCount > free_bits) ||
+ (!sysfile && ClusterCount >
+ (free_bits - ((8 * ONE_MEGA_BYTE) >> osb->s_clustersize_bits))) ){
+ LOG_ERROR_ARGS("Disk Full: ClusterCount=%u, free_bits=%u, sysfile=%s\n",
+ ClusterCount, free_bits, sysfile?"yes":"no");
status = -ENOSPC;
goto leave;
}
@@ -2804,19 +3022,19 @@
/* allocation at 5%, so that small files stay in the beginning
* as much as possible */
- // ok because num_clusters should not even be a __u64
- five_percent = ((__u32)osb->vol_layout.num_clusters) / 20;
+ five_percent = le32_to_cpu(bm_lock->id1.bitmap1.i_total) / 20;
if (ClusterCount > five_percent) {
LargeAlloc = 1;
LargeAllocOffset = five_percent;
}
- bitmapblocks = (OCFS_ALIGN(osb->cluster_bitmap.validbits,
- OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+ bitmapblocks =
+ ocfs_blocks_for_bits(osb->sb,
+ osb->cluster_bitmap.validbits);
/* Ok, somewhat lame, but we submit the whole bitmap for reading here*/
- if (ocfs_read_bhs(osb, osb->vol_layout.bitmap_off,
- bitmapblocks * osb->sect_size,
+ if (ocfs_read_bhs(osb, osb->bitmap_blkno << osb->sb->s_blocksize_bits,
+ bitmapblocks << osb->sb->s_blocksize_bits,
osb->cluster_bitmap.chunk, 0, bitmap_inode)) {
LOG_ERROR_STATUS(-EIO);
goto leave;
@@ -2827,7 +3045,7 @@
LargeAlloc ? LargeAllocOffset :
SmallAllocOffset, sysfile ? 0 :
((8 * ONE_MEGA_BYTE) >>
- osb->cluster_size_bits));
+ osb->s_clustersize_bits));
/* if fails we should try again from the beginning of the disk. */
/* in the end we pass # of bits we want to keep for system
@@ -2841,8 +3059,8 @@
bitoffset = ocfs_find_clear_bits (osb, &osb->cluster_bitmap,
ClusterCount, 0,
sysfile ? 0 :
- ((8 * ONE_MEGA_BYTE) /
- osb->vol_layout.cluster_size));
+ ((8 * ONE_MEGA_BYTE) >>
+ osb->s_clustersize_bits));
}
/* It returns -1 on failure, otherwise bitoffset points at the */
@@ -2857,20 +3075,24 @@
goto leave;
}
- LOG_TRACE_ARGS ("setting at bit offset=%u\n", bitoffset);
+ LOG_TRACE_ARGS ("setting %u bits at bit offset=%u\n", ClusterCount, bitoffset);
- ocfs_set_bits (handle, &osb->cluster_bitmap, bitoffset, ClusterCount);
+ ocfs_set_bits(osb->sb, handle, &osb->cluster_bitmap, bitoffset,
+ ClusterCount);
/* Ok, write out the bitmap now. We optimize only by writing
* out the bitmap blocks which have changed, and not all of
* them like before. */
- startbh = OCFS_GLOBAL_OFF_TO_CHUNK(bitoffset);
- numblocks = OCFS_GLOBAL_OFF_TO_CHUNK(bitoffset + ClusterCount - 1) - startbh + 1;
+ numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+ bitoffset,
+ ClusterCount,
+ &startbh);
LOG_TRACE_ARGS("bitoffset = %u, ClusterCount = %u, startbh = %u, numblocks = %u\n", bitoffset, ClusterCount, startbh, numblocks);
/* write the bitmap size info to the lock sector */
- bm_lock->u.bitinfo.used_bits = ocfs_count_bits(&osb->cluster_bitmap);
+ bm_lock->id1.bitmap1.i_used =
+ ocfs_count_bits(osb->sb, &osb->cluster_bitmap);
OCFS_BH_PUT_DATA(bh);
bm_lock = NULL;
@@ -2903,12 +3125,14 @@
*
* You need to be holding node_alloc_sem!
*/
-int ocfs_alloc_node_block (ocfs_super * osb, __u64 FileSize, __u64 * DiskOffset, __u64 * file_off, __u32 NodeNum, __u32 Type, ocfs_journal_handle *handle)
+int ocfs_alloc_node_block(ocfs_super *osb, __u64 bytes_wanted,
+ __u64 *DiskOffset, __u64 *file_off,
+ __u32 NodeNum, __u32 Type,
+ ocfs_journal_handle *handle)
{
int status = 0;
int startbh, numblocks;
__u64 fileSize = 0;
- __u64 offset = 0;
__u64 numBytes = 0;
__u64 allocSize = 0;
__u64 prevFileSize = 0;
@@ -2919,31 +3143,32 @@
__u32 numBits = 0;
__u32 foundBit = -1;
__u32 blockSize = 0, blockSizeBits = 0;
- __u32 bm_file = 0;
- __u32 alloc_file = 0;
+ int bm_file = 0;
+ int alloc_file = 0;
struct buffer_head *bh = NULL;
- ocfs_file_entry *fe = NULL;
+ struct buffer_head *alloc_bh = NULL;
+ ocfs2_dinode *fe = NULL;
+ ocfs2_dinode *alloc_fe = NULL;
int needs_uninit = 0;
int delay_lockrel = 0;
struct inode *inode = NULL; /* alloc bitmap file inode */
+ struct inode *alloc_inode = NULL; /* alloc file inode */
- LOG_ENTRY_ARGS("(FileSize = (%llu), Type=%d)\n", FileSize,Type);
+ LOG_ENTRY_ARGS("(bytes_wanted = (%llu), Type=%d)\n", bytes_wanted,Type);
switch (Type) {
case DISK_ALLOC_EXTENT_NODE:
- bm_file = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum;
- alloc_file = OCFS_FILE_FILE_ALLOC + NodeNum;
- blockSize = (__u32) osb->vol_layout.file_node_size;
- blockSizeBits = osb->file_alloc_bits;
+ bm_file = EXTENT_ALLOC_BITMAP_SYSTEM_INODE;
+ alloc_file = EXTENT_ALLOC_SYSTEM_INODE;
+ blockSize = osb->sb->s_blocksize;
+ blockSizeBits = osb->sb->s_blocksize_bits;
atomic_inc(&osb->alloc_stats.ext_allocs);
- inode = igrab(osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE]);
break;
case DISK_ALLOC_INODE:
- bm_file = OCFS_INODE_BITMAP + NodeNum;
- alloc_file = OCFS_INODE_FILE + NodeNum;
- blockSize = osb->inode_size;
- blockSizeBits = osb->inode_alloc_bits;
- inode = igrab(osb->system_inodes[INODE_ALLOC_BITMAP_SYSTEM_INODE]);
+ bm_file = INODE_ALLOC_BITMAP_SYSTEM_INODE;
+ alloc_file = INODE_ALLOC_SYSTEM_INODE;
+ blockSize = osb->sb->s_blocksize;
+ blockSizeBits = osb->sb->s_blocksize_bits;
break;
default:
status = -EINVAL;
@@ -2951,11 +3176,16 @@
goto leave;
}
+ inode = ocfs_get_system_file_inode(osb, bm_file, NodeNum);
if (!inode) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
+ LOG_ERROR_STATUS(status=-EINVAL);
goto leave;
}
+ alloc_inode = ocfs_get_system_file_inode(osb, alloc_file, NodeNum);
+ if (!alloc_inode) {
+ LOG_ERROR_STATUS(status=-EINVAL);
+ goto leave;
+ }
/* Allocate a block of size blocksize from the relevant file/bitmap */
OCFS_ASSERT (blockSize);
@@ -2968,38 +3198,35 @@
}
ocfs_handle_add_lock(handle, OCFS_DLM_EXCLUSIVE_LOCK,
- FLAG_FILE_CREATE,
- bh, inode, 1);
+ FLAG_FILE_CREATE,
+ bh, inode, 1);
- numBits = ((FileSize + (blockSize-1)) >> blockSizeBits);
- numBytes = numBits << blockSizeBits;
+ numBits = ((bytes_wanted + (blockSize-1)) >> blockSizeBits);
+ numBytes = (u64)numBits << blockSizeBits;
- /* Read in the bitmap file for the dir alloc and look for the
+ /* Read in the bitmap file for the alloc and look for the
* required space, if found */
fe = OCFS_BH_GET_DATA_READ(bh);
- fileSize = fe->file_size;
- allocSize = fe->alloc_size;
+ prevFileSize = fileSize = fe->i_size;
+ allocSize = (u64)fe->i_clusters << osb->s_clustersize_bits;
OCFS_BH_PUT_DATA(bh);
-
- prevFileSize = fileSize;
+
+
if ((fileSize != 0) && (allocSize != 0)) {
- /* Round this off to dirnodesize */
- ocfs_initialize_bitmap (&bitmap, (__u32) fileSize * 8, (__u32) allocSize * 8);
+ ocfs_initialize_bitmap(osb->sb, &bitmap,
+ (__u32)fileSize * 8,
+ (__u32)allocSize * 8);
needs_uninit = 1;
- status = ocfs_read_system_file (osb, bm_file, bitmap.chunk,
- allocSize, offset);
+ status = ocfs_read_system_file(osb, bm_file, NodeNum,
+ bitmap.chunk, allocSize);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- /* Find the requisite number of bits... */
-
- /* This function will check for clear bits in the Bitmap for */
- /* consective clear bits equal to ClusterCount */
- foundBit = ocfs_find_clear_bits (osb, &bitmap, numBits, 0, 0);
+ foundBit = ocfs_find_clear_bits(osb, &bitmap, numBits, 0, 0);
}
/* It returns -1 on failure , otherwise ByteOffset points at the */
@@ -3008,24 +3235,45 @@
if (foundBit == -1) {
/* if not found add more allocation to the file and try again. */
+ //extent = ONE_MEGA_BYTE;
extent = ( ((numBits * blockSize) + (ONE_MEGA_BYTE-1)) >> 20 ) << 20;
-
- status = ocfs_get_system_file_size (osb, alloc_file,
- &newFileSize, &allocSize);
+
+#warning maybe take this out and put a verifyupdateinode in here
+ status = ocfs_read_bh(osb, GET_INODE_FEOFF(alloc_inode),
+ &alloc_bh, OCFS_BH_CACHED, alloc_inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
-
+
+ alloc_fe = OCFS_BH_GET_DATA_READ(alloc_bh);
+ newFileSize = alloc_fe->i_size;
+ allocSize = (u64)alloc_fe->i_clusters << osb->s_clustersize_bits;
+ if (newFileSize != alloc_inode->i_size ||
+ allocSize != OCFS_I(alloc_inode)->alloc_size) {
+ LOG_ERROR_ARGS("aha! alloc inode was out of date! "
+ "newFileSize=%llu, i_size=%llu, "
+ "allocSize=%llu, alloc_size=%llu\n",
+ newFileSize, alloc_inode->i_size,
+ allocSize, OCFS_I(alloc_inode)->alloc_size);
+ }
+ OCFS_BH_PUT_DATA(alloc_bh);
+
+ //newFileSize = alloc_inode->i_size;
+ //allocSize = OCFS_I(alloc_inode)->alloc_size;
+
/* This is for OUI optimzation to allocate more disk
* space for directory allocations */
if (allocSize > 0)
extent *= 2;
- status = ocfs_extend_system_file (osb, alloc_file,
- newFileSize + extent, NULL,
- handle, 0);
+ LOG_TRACE_ARGS("extending the alloc file to %llu\n",
+ newFileSize + extent);
+ status = ocfs_extend_file(osb, newFileSize + extent,
+ GET_INODE_FEOFF(alloc_inode),
+ handle, alloc_inode, NULL, 1,
+ NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -3037,9 +3285,10 @@
/* Does this need the buffer_head? if so, we need to
* do a put_data first! */
/* Calculate the new bitmap size */
-
- status = ocfs_extend_system_file (osb, bm_file, bitMapSize, bh,
- handle, 1);
+
+ LOG_TRACE_ARGS("extending the bitmap file to %llu\n", bitMapSize);
+ status = ocfs_extend_file (osb, bitMapSize, GET_INODE_FEOFF(inode),
+ handle, inode, NULL, 1, bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -3047,46 +3296,56 @@
fe = OCFS_BH_GET_DATA_READ(bh);
/* we wrote it back out in ocfs_extend_system_file so
* we can trust the sizes here */
- fileSize = fe->file_size;
- allocSize = fe->alloc_size;
+ fileSize = fe->i_size;
+ allocSize = (u64)fe->i_clusters << osb->s_clustersize_bits;
+ LOG_TRACE_ARGS("fileSize=%llu, allocSize=%llu\n",
+ fileSize, allocSize);
OCFS_BH_PUT_DATA(bh);
if (needs_uninit)
- ocfs_reinitialize_bitmap(&bitmap, fileSize * 8,
+ ocfs_reinitialize_bitmap(osb->sb, &bitmap,
+ fileSize * 8,
allocSize * 8);
else
- ocfs_initialize_bitmap(&bitmap, fileSize * 8,
+ ocfs_initialize_bitmap(osb->sb, &bitmap,
+ fileSize * 8,
allocSize * 8);
needs_uninit = 1;
- status = ocfs_read_system_file (osb, bm_file, bitmap.chunk,
- allocSize, offset);
+ status = ocfs_read_system_file(osb, bm_file, NodeNum,
+ bitmap.chunk, allocSize);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- foundBit = prevFileSize * 8;
+ foundBit = ocfs_find_clear_bits(osb, &bitmap, numBits, 0, 0);
delay_lockrel = 1;
if (Type == DISK_ALLOC_EXTENT_NODE)
atomic_inc(&osb->alloc_stats.ext_extends);
}
- LOG_TRACE_ARGS ("byte offset=%d\n", foundBit);
+ LOG_TRACE_ARGS ("bit offset=%d, num=%d\n", foundBit, numBits);
- ocfs_set_bits (handle, &bitmap, foundBit, numBits);
+ ocfs_set_bits(osb->sb, handle, &bitmap, foundBit, numBits);
/* only write out what has changed... */
- startbh = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit);
- numblocks = OCFS_GLOBAL_OFF_TO_CHUNK(foundBit + numBits - 1) - startbh + 1;
+ numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+ foundBit,
+ numBits,
+ &startbh);
LOG_TRACE_ARGS ("offset=%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
foundBit * blockSize, Type, blockSize, foundBit, alloc_file);
- *DiskOffset = ocfs_file_to_disk_off (osb, (alloc_file),
- (foundBit * blockSize));
- if (*DiskOffset == 0) {
- LOG_ERROR_STATUS(status = -EFAIL);
+
+ status = ocfs_lookup_file_allocation(osb, foundBit * blockSize,
+ DiskOffset, 1, NULL,
+ alloc_inode, 1);
+ if (status < 0 || *DiskOffset == 0) {
+ if (!status)
+ status = -EINVAL;
+ LOG_ERROR_STATUS(status);
goto leave;
}
@@ -3104,9 +3363,13 @@
if (inode)
iput(inode);
+ if (alloc_inode)
+ iput(alloc_inode);
if (bh != NULL)
brelse(bh);
+ if (alloc_bh != NULL)
+ brelse(alloc_bh);
LOG_EXIT_STATUS (status);
return status;
@@ -3116,22 +3379,24 @@
* ocfs_free_file_extents()
*
*/
-int ocfs_free_file_extents (ocfs_super * osb, struct buffer_head *fe_bh,
- ocfs_journal_handle *handle, struct inode *inode)
+int ocfs_free_file_extents(ocfs_super *osb, struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ struct inode *inode)
{
int status = 0;
__u32 i;
__u32 numBitsAllocated = 0, bitmapOffset = 0;
- ocfs_extent_group *extent = NULL;
+ ocfs2_extent_block *extent = NULL;
struct buffer_head *extent_bh = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_extent_list *fel;
+ ocfs2_dinode *fe = NULL;
__u64 offset;
LOG_ENTRY ();
offset = GET_INODE_FEOFF(inode);
fe = OCFS_BH_GET_DATA_READ(fe_bh);
- if (offset != fe->this_sector)
+ if (offset != (fe->i_blkno << osb->sb->s_blocksize_bits))
BUG();
OCFS_BH_PUT_DATA(fe_bh);
@@ -3139,31 +3404,34 @@
SET_BH_SEQNUM(inode, fe_bh);
fe = OCFS_BH_GET_DATA_READ(fe_bh);
+ fel = &fe->id2.i_list;
- if (fe->local_ext) {
- for (i = 0; i < fe->next_free_ext; i++) {
- numBitsAllocated = (__u32) (fe->extents[i].num_bytes >>
- osb->cluster_size_bits);
+ if (fel->l_tree_depth < 0) {
+ for (i = 0; i < fel->l_next_free_rec; i++) {
+ numBitsAllocated = fel->l_recs[i].e_clusters;
bitmapOffset =
- (__u32) ((fe->extents[i].disk_off -
- osb->vol_layout.data_start_off) >>
- osb->cluster_size_bits);
+ (__u32)(((fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits)) >>
+ osb->s_clustersize_bits);
ocfs_handle_add_commit_bits(handle, numBitsAllocated,
bitmapOffset, -1,
DISK_ALLOC_VOLUME);
}
} else {
- for (i = 0; i < fe->next_free_ext; i++) {
- status = ocfs_read_bh(osb, fe->extents[i].disk_off,
- &extent_bh, OCFS_BH_COND_CACHED, inode);
+ for (i = 0; i < fel->l_next_free_rec; i++) {
+ status = ocfs_read_bh(osb,
+ fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits,
+ &extent_bh,
+ OCFS_BH_COND_CACHED,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
extent = OCFS_BH_GET_DATA_READ(extent_bh);
- if ((fe->granularity && (!IS_VALID_EXTENT_HEADER(extent))) && !IS_VALID_EXTENT_DATA(extent)) {
+ if (fel->l_tree_depth &&
+ !IS_VALID_EXTENT_BLOCK(extent)) {
status = -EINVAL;
LOG_ERROR_STATUS(status);
goto leave;
@@ -3216,7 +3484,7 @@
{
int numbits;
- switch (osb->vol_layout.cluster_size) {
+ switch (osb->s_clustersize) {
case (4*1024):
numbits = 1024;
break;
@@ -3239,7 +3507,7 @@
/*
* ocfs_alloc_count_bits
*/
-static __u32 ocfs_alloc_count_bits(ocfs_local_alloc *alloc)
+static __u32 ocfs_alloc_count_bits(ocfs2_dinode *alloc)
{
int i;
__u8 tmp;
@@ -3248,8 +3516,8 @@
LOG_ENTRY();
- buffer = &(alloc->bitmap[0]);
- for (i = 0; i < 256; i++) {
+ buffer = LOCAL_ALLOC(alloc)->la_bitmap;
+ for (i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++) {
memcpy(&tmp, buffer, 1);
count+= BITCOUNT(tmp);
buffer++;
@@ -3262,16 +3530,16 @@
/*
* ocfs_clear_local_alloc
*/
-static void ocfs_clear_local_alloc(ocfs_local_alloc *alloc)
+static void ocfs_clear_local_alloc(ocfs2_dinode *alloc)
{
int i;
LOG_ENTRY();
- alloc->alloc_size = 0;
- alloc->num_used = 0;
- alloc->bitmap_start = 0;
- for(i = 0; i < 256; i++)
- alloc->bitmap[i] = 0;
+ LOCAL_ALLOC(alloc)->la_bm_bits = 0;
+ LOCAL_ALLOC(alloc)->la_bits_set = 0;
+ LOCAL_ALLOC(alloc)->la_bm_off = 0;
+ for(i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++)
+ LOCAL_ALLOC(alloc)->la_bitmap[i] = 0;
LOG_EXIT();
return;
@@ -3287,7 +3555,7 @@
*/
static int ocfs_sync_local_to_main(ocfs_super *osb,
ocfs_journal_handle *handle,
- ocfs_local_alloc *alloc,
+ ocfs2_dinode *alloc,
struct inode *main_bm_inode)
{
int status = 0;
@@ -3295,41 +3563,45 @@
void *bitmap;
unsigned int start, numblocks, bitmapblocks;
- LOG_ENTRY_ARGS("alloc->alloc_size = %u, COUNT = %u, num_used = %u\n",
- alloc->alloc_size, ocfs_alloc_count_bits(alloc),
- alloc->num_used);
+ LOG_ENTRY_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits,
+ ocfs_alloc_count_bits(alloc),
+ LOCAL_ALLOC(alloc)->la_bits_set);
- if (alloc->alloc_size == 0) {
+ if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
LOG_TRACE_STR("nothing to sync!");
goto bail;
}
- bitmapblocks = (OCFS_ALIGN(osb->cluster_bitmap.validbits,
- OCFS_BITS_IN_CHUNK) / OCFS_BITS_IN_CHUNK);
+ bitmapblocks =
+ ocfs_blocks_for_bits(osb->sb,
+ osb->cluster_bitmap.validbits);
+
/* figure out which block in the bitmap to start on and the
* maximum number of blocks we can span over -- we don't need
* to read any more as that's the most we'll be touching... */
- start = OCFS_GLOBAL_OFF_TO_CHUNK(alloc->bitmap_start);
- numblocks = 2 + OCFS_GLOBAL_OFF_TO_CHUNK(alloc->bitmap_start + alloc->alloc_size) - start;
+ numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+ LOCAL_ALLOC(alloc)->la_bm_off,
+ LOCAL_ALLOC(alloc)->la_bits_set,
+ &start);
- if ((start + numblocks) > bitmapblocks)
- numblocks--;
if ((start + numblocks) > bitmapblocks) {
printk("uhoh, bitmap calculation is bad!\n");
- printk("alloc->alloc_size = %u, COUNT = %u, num_used = %u"
- "start=%u, bitmap_start = %u, numblocks=%u, "
+ printk("alloc->la_bm_bits = %u, COUNT = %u, alloc->la_bits_set = %u"
+ "start=%u, alloc->la_bm_off = %u, numblocks=%u, "
"bitmapblocks = %u\n",
- alloc->alloc_size, ocfs_alloc_count_bits(alloc),
- alloc->num_used, start, alloc->bitmap_start, numblocks,
+ LOCAL_ALLOC(alloc)->la_bm_bits, ocfs_alloc_count_bits(alloc),
+ LOCAL_ALLOC(alloc)->la_bits_set, start, LOCAL_ALLOC(alloc)->la_bm_off, numblocks,
bitmapblocks);
BUG();
}
- LOG_TRACE_ARGS("start=%u, bitmap_start = %u, numblocks=%u\n", start,
- alloc->bitmap_start, numblocks);
- status = ocfs_read_bhs(osb, osb->vol_layout.bitmap_off,
- numblocks * osb->sect_size,
+ LOG_TRACE_ARGS("start=%u, alloc->la_bm_off = %u, numblocks=%u\n", start,
+ LOCAL_ALLOC(alloc)->la_bm_off, numblocks);
+ status = ocfs_read_bhs(osb,
+ (osb->bitmap_blkno + start) << osb->sb->s_blocksize_bits,
+ numblocks << osb->sb->s_blocksize_bits,
&osb->cluster_bitmap.chunk[start], 0,
main_bm_inode);
if (status < 0) {
@@ -3337,10 +3609,10 @@
goto bail;
}
- bitmap = alloc->bitmap;
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
/* any unset bits in local alloc need to be unset in bitmap. */
bit_off = 0;
- left = alloc->alloc_size;
+ left = LOCAL_ALLOC(alloc)->la_bm_bits;
while ((bit_off = find_next_zero_bit(bitmap, left, bit_off))
!= -1) {
if (bit_off >= left) {
@@ -3349,9 +3621,10 @@
}
LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n",
- bit_off + alloc->bitmap_start);
- ocfs_clear_bits(handle, &osb->cluster_bitmap,
- bit_off + alloc->bitmap_start, 1);
+ bit_off + LOCAL_ALLOC(alloc)->la_bm_off);
+ ocfs_clear_bits(osb->sb, handle, &osb->cluster_bitmap,
+ bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
+ 1);
bit_off++;
}
@@ -3366,13 +3639,13 @@
* without a journal handle -- used during shutdown and recovery.
*/
static int ocfs_sync_local_from_shutdown(ocfs_super *osb,
- ocfs_bitmap_free_head **f,
- struct buffer_head *local_alloc_bh,
- int in_recovery)
+ ocfs_bitmap_free_head **f,
+ struct buffer_head *local_alloc_bh,
+ int in_recovery)
{
int status = 0;
int bit_off, left;
- ocfs_local_alloc *alloc = NULL;
+ ocfs2_dinode *alloc = NULL;
void *bitmap;
LOG_ENTRY();
@@ -3381,7 +3654,7 @@
BUG();
alloc = OCFS_BH_GET_DATA_READ(local_alloc_bh);
- if (alloc->alloc_size == 0) {
+ if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
OCFS_BH_PUT_DATA(local_alloc_bh);
LOG_TRACE_STR("nothing to sync!");
goto bail;
@@ -3398,15 +3671,16 @@
alloc = OCFS_BH_GET_DATA_READ(local_alloc_bh);
- LOG_TRACE_ARGS("alloc->alloc_size = %u, COUNT = %u, num_used = %u\n",
- alloc->alloc_size, ocfs_alloc_count_bits(alloc),
- alloc->num_used);
+ LOG_TRACE_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits,
+ ocfs_alloc_count_bits(alloc),
+ LOCAL_ALLOC(alloc)->la_bits_set);
- bitmap = alloc->bitmap;
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
/* any unset bits in local alloc need to be unset in bitmap. */
bit_off = 0;
- left = alloc->alloc_size;
+ left = LOCAL_ALLOC(alloc)->la_bm_bits;
while ((bit_off = find_next_zero_bit(bitmap, left, bit_off))
!= -1) {
if (bit_off >= left) {
@@ -3416,7 +3690,7 @@
/* LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n", bit_off);*/
status = ocfs_add_to_bitmap_free_head(osb,
*f, 1,
- bit_off + alloc->bitmap_start,
+ bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
-1, DISK_ALLOC_VOLUME);
if (status < 0) {
ocfs_free_bitmap_free_head(*f);
@@ -3444,18 +3718,18 @@
{
int status = 0;
__u64 alloc_bytes, cluster_off, cluster_count;
- ocfs_local_alloc *alloc = NULL;
+ ocfs2_dinode *alloc = NULL;
LOG_ENTRY();
alloc = OCFS_BH_GET_DATA_READ(osb->local_alloc_bh);
- if (alloc->alloc_size != 0)
+ if (LOCAL_ALLOC(alloc)->la_bm_bits != 0)
LOG_TRACE_STR("asking me to alloc a new window over a"
" non-empty one");
OCFS_BH_PUT_DATA(osb->local_alloc_bh);
/* we try to use find_contig_space_from_bitmap here for now. */
- alloc_bytes = ocfs_local_alloc_window_bits(osb) << osb->cluster_size_bits;
+ alloc_bytes = (u64)ocfs_local_alloc_window_bits(osb) << osb->s_clustersize_bits;
LOG_TRACE_ARGS("Allocating %llu bytes (%u clusters) for a "
"new window.\n", alloc_bytes,
ocfs_local_alloc_window_bits(osb));
@@ -3473,18 +3747,21 @@
alloc = OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh);
- alloc->bitmap_start = cluster_off;
- alloc->alloc_size = cluster_count;
+ LOCAL_ALLOC(alloc)->la_bm_off = cluster_off;
+ LOCAL_ALLOC(alloc)->la_bm_bits = cluster_count;
/* just in case... In the future when we find space ourselves,
* we don't have to get all contiguous -- but we'll have to
* set all previously used bits in bitmap and update
- * num_used before setting the bits in the main bitmap. */
- alloc->num_used = 0;
- memset(alloc->bitmap, 0, 256);
+ * la_bits_set before setting the bits in the main bitmap. */
+ LOCAL_ALLOC(alloc)->la_bits_set = 0;
+ memset(LOCAL_ALLOC(alloc)->la_bitmap, 0,
+ LOCAL_ALLOC(alloc)->la_size);
LOG_TRACE_STR("New window allocated:");
- LOG_TRACE_ARGS("window bitmap_start = %u\n", alloc->bitmap_start);
- LOG_TRACE_ARGS("window alloc_size = %u\n", alloc->alloc_size);
+ LOG_TRACE_ARGS("window la_bm_off = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_off);
+ LOG_TRACE_ARGS("window la_bm_bits = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits);
OCFS_BH_PUT_DATA(osb->local_alloc_bh);
@@ -3496,7 +3773,8 @@
/*
* ocfs_local_find_clear_bits
*/
-static int ocfs_local_find_clear_bits(ocfs_super *osb, ocfs_local_alloc *alloc,
+static int ocfs_local_find_clear_bits(ocfs_super *osb,
+ ocfs2_dinode *alloc,
__u32 numbits)
{
int numfound, bitoff, left, startoff, lastzero;
@@ -3504,11 +3782,11 @@
LOG_ENTRY_ARGS("(numbits wanted = %u)\n", numbits);
- bitmap = alloc->bitmap;
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
numfound = bitoff = startoff = 0;
lastzero = -1;
- left = alloc->alloc_size;
+ left = LOCAL_ALLOC(alloc)->la_bm_bits;
while ((bitoff = find_next_zero_bit(bitmap, left, startoff)) != -1) {
if (bitoff == left) {
/* LOG_TRACE_ARGS("bitoff (%d) == left", bitoff); */
@@ -3554,7 +3832,7 @@
__u64 * bitoff, __u64 * bitcount,
ocfs_journal_handle *handle)
{
- ocfs_local_alloc *alloc = NULL;
+ ocfs2_dinode *alloc = NULL;
int status = 0, tmpstat;
int startoff, tmpoff;
__u32 tmpwanted;
@@ -3584,7 +3862,7 @@
alloc = OCFS_BH_GET_DATA_WRITE(osb->local_alloc_bh);
tryagain:
/* If we need to initialize a new window, do so now. */
- if (alloc->alloc_size == 0) {
+ if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
OCFS_BH_PUT_DATA(osb->local_alloc_bh);
alloc = NULL;
LOG_TRACE_STR("Allocating a new window...");
@@ -3627,7 +3905,7 @@
alloc = NULL;
if (!main_bm_inode)
- main_bm_inode = igrab(osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE]);
+ main_bm_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
if (!main_bm_inode) {
status = -EINVAL;
@@ -3639,9 +3917,9 @@
ocfs_handle_add_inode(handle, main_bm_inode);
/* Get the allocation lock here */
- status = ocfs_acquire_lock (osb, OCFS_DLM_EXCLUSIVE_LOCK, 0,
- &main_bm_bh,
- main_bm_inode);
+ status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+ 0, &main_bm_bh,
+ main_bm_inode);
if (status < 0) {
main_bm_bh = NULL;
if (status != -EINTR)
@@ -3676,16 +3954,16 @@
/* Ok, if we've got this far then the search suceeded and we
* can mark the bitmap. */
- bitmap = alloc->bitmap;
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
tmpoff = startoff;
tmpwanted = bitswanted;
while(tmpwanted--) {
/* LOG_TRACE_ARGS("setting bit %d\n", tmpoff); */
set_bit(tmpoff++, bitmap);
}
- alloc->num_used += bitswanted;
+ LOCAL_ALLOC(alloc)->la_bits_set += bitswanted;
- *bitoff = alloc->bitmap_start + startoff;
+ *bitoff = LOCAL_ALLOC(alloc)->la_bm_off + startoff;
*bitcount = bitswanted;
OCFS_BH_PUT_DATA(osb->local_alloc_bh);
@@ -3746,10 +4024,14 @@
* We *always* use global bitmap for clustersize > 128k,
* file_size > 2mb, so force it under these conditions. */
if ( (handle && osb->have_local_alloc)
- && (osb->vol_layout.cluster_size <= OCFS_LOCAL_ALLOC_MAX_CSIZE)
+ && (osb->s_clustersize <= OCFS_LOCAL_ALLOC_MAX_CSIZE)
&& (file_size <= OCFS_LOCAL_ALLOC_MAX_ALLOC) )
use_global = 0;
+ /* TODO: let's find a way to use local alloc for system files someday ;-) */
+ if (sysfile)
+ use_global = 1;
+
if (!use_global) {
if (handle->flags & OCFS_HANDLE_LOCAL_ALLOC) {
printk("whoa, I already have local alloc sem!?!\n");
@@ -3800,17 +4082,20 @@
int ocfs_load_local_alloc(ocfs_super *osb)
{
int status = 0;
- ocfs_local_alloc *alloc = NULL;
+ ocfs2_dinode *alloc = NULL;
struct buffer_head *alloc_bh = NULL;
- __u64 offset;
__u32 num_used;
+ struct inode *inode = NULL;
LOG_ENTRY();
/* read the alloc off disk */
- offset = ((OCFS_LOCAL_ALLOC_FILE + osb->node_num) * osb->sect_size) +
- osb->vol_layout.root_int_off;
- status = ocfs_read_bh(osb, offset, &alloc_bh, 0, NULL);
+ inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->node_num);
+ if (!inode) {
+ LOG_ERROR_STATUS(status=-EINVAL);
+ goto bail;
+ }
+ status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &alloc_bh, 0, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -3821,9 +4106,10 @@
/* do a little verification. */
num_used = ocfs_alloc_count_bits(alloc);
- if (num_used != alloc->num_used) {
- LOG_TRACE_ARGS("num_used = %u, alloc->num_used = %u!\n",
- num_used, alloc->num_used);
+ if (num_used != LOCAL_ALLOC(alloc)->la_bits_set) {
+ LOG_TRACE_ARGS("num_used = %u, alloc->la_bits_set = %u!\n",
+ num_used,
+ LOCAL_ALLOC(alloc)->la_bits_set);
OCFS_BH_PUT_DATA(alloc_bh);
status = -EFAIL;
goto bail;
@@ -3837,6 +4123,8 @@
if (status < 0)
if (alloc_bh)
brelse(alloc_bh);
+ if (inode)
+ iput(inode);
LOG_EXIT_STATUS(status);
return(status);
@@ -3857,7 +4145,7 @@
int in_recovery)
{
int status;
- ocfs_local_alloc *alloc = NULL;
+ ocfs2_dinode *alloc = NULL;
ocfs_bitmap_free_head *f = NULL;
struct buffer_head *bh = NULL;
@@ -3914,13 +4202,17 @@
{
int status = 0;
struct buffer_head *alloc_bh = NULL;
- __u64 offset;
+ struct inode *inode = NULL;
LOG_ENTRY_ARGS("(node_num = %d)\n", node_num);
- offset = ((OCFS_LOCAL_ALLOC_FILE + node_num) * osb->sect_size) +
- osb->vol_layout.root_int_off;
- status = ocfs_read_bh(osb, offset, &alloc_bh, 0, NULL);
+ inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, node_num);
+ if (!inode) {
+ LOG_ERROR_STATUS(status=-EINVAL);
+ goto bail;
+ }
+
+ status = ocfs_read_bh(osb, GET_INODE_FEOFF(inode), &alloc_bh, 0, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -3932,6 +4224,9 @@
if (alloc_bh)
brelse(alloc_bh);
+ if (inode)
+ iput(inode);
+
LOG_EXIT_STATUS(status);
return(status);
} /* ocfs_recover_local_alloc */
Added: trunk/src/alloc.h
===================================================================
--- trunk/src/alloc.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/alloc.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,79 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * alloc.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_ALLOC_H
+#define OCFS2_ALLOC_H
+
+int ocfs_add_to_bitmap_free_head(ocfs_super *osb,
+ ocfs_bitmap_free_head *f,
+ __u32 len, __u32 fileoff,
+ __u32 nodenum, __u32 type);
+int ocfs_allocate_extent(ocfs_super *osb, struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ u64 blkno, u32 new_clusters,
+ struct inode *inode);
+int ocfs_alloc_node_block(ocfs_super *osb, __u64 FileSize,
+ __u64 *DiskOffset, __u64 *file_off,
+ __u32 NodeNum, __u32 Type,
+ ocfs_journal_handle *handle);
+int ocfs_find_contiguous_space_from_bitmap(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ __u64 file_size,
+ __u64 *cluster_off,
+ __u64 *cluster_count,
+ int sysfile,
+ struct buffer_head *lock_bh,
+ struct inode *bitmap_inode);
+int ocfs_find_space(ocfs_super *osb, __u64 file_size,
+ __u64 *cluster_off, __u64 *cluster_count,
+ int sysfile, ocfs_journal_handle *handle);
+int ocfs_free_extents_for_truncate(ocfs_super *osb,
+ ocfs2_dinode *fe,
+ ocfs_journal_handle *handle,
+ struct inode *inode);
+int ocfs_free_file_extents(ocfs_super *osb, struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ struct inode *inode);
+int ocfs_get_leaf_extent(ocfs_super *osb, ocfs2_dinode *fe,
+ __s64 Vbo, struct buffer_head **data_extent_bh,
+ struct inode *inode);
+int ocfs_load_local_alloc(ocfs_super *osb);
+int ocfs_lookup_file_allocation(ocfs_super *osb, __s64 Vbo,
+ __s64 *Lbo, __u32 blocks,
+ __s64 *contig_bytes,
+ struct inode *inode,
+ int locked);
+int ocfs_process_bitmap_free_head(ocfs_super *osb,
+ ocfs_bitmap_free_head *f);
+int ocfs_recover_local_alloc(ocfs_super *osb, int node_num);
+void ocfs_shutdown_local_alloc(ocfs_super *osb,
+ struct buffer_head **local_alloc_bh,
+ int sync,
+ int in_recovery);
+
+#endif /* OCFS2_ALLOC_H */
Modified: trunk/src/bitmap.c
===================================================================
--- trunk/src/bitmap.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/bitmap.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* bitmap.c
*
* Bitmap infrastructure code
@@ -24,17 +26,23 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
-#if defined(__KERNEL__)
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#else
-#include <asm/bitops.h>
-#include <dummy.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "bitmap.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_BITMAP
@@ -42,19 +50,19 @@
* ocfs_initialize_bitmap()
*
*/
-void ocfs_initialize_bitmap (ocfs_alloc_bm * bitmap, __u32 validbits, __u32 allocbits)
+void ocfs_initialize_bitmap(struct super_block *sb,
+ ocfs_alloc_bm *bitmap, __u32 validbits,
+ __u32 allocbits)
{
- __u32 tmp;
+ LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u)\n", sb, bitmap, validbits,
+ allocbits);
- LOG_ENTRY_ARGS ("(0x%p, %u, %u)\n", bitmap, validbits, allocbits);
-
bitmap->validbits = validbits;
bitmap->allocbits = allocbits;
bitmap->failed = 0;
bitmap->ok_retries = 0;
- tmp = OCFS_ALIGN(allocbits, OCFS_BITS_IN_CHUNK);
- bitmap->numbh = (tmp / OCFS_BITS_IN_CHUNK);
+ bitmap->numbh = ocfs_blocks_for_bits(sb, bitmap->allocbits);
bitmap->chunk = ocfs_malloc(bitmap->numbh * sizeof(struct buffer_head *));
if (!bitmap->chunk)
BUG();
@@ -75,7 +83,7 @@
if (bitmap->chunk[i])
brelse(bitmap->chunk[i]);
- ocfs_free(bitmap->chunk);
+ kfree(bitmap->chunk);
bitmap->chunk = NULL;
bitmap->numbh = 0;
bitmap->validbits = 0;
@@ -88,9 +96,10 @@
}
/* we only handle if the bitmap has grown, not shrunk. */
-void ocfs_reinitialize_bitmap(ocfs_alloc_bm *bitmap, __u32 validbits, __u32 allocbits)
+void ocfs_reinitialize_bitmap(struct super_block *sb,
+ ocfs_alloc_bm *bitmap, __u32 validbits,
+ __u32 allocbits)
{
- __u32 tmp;
__u32 newnumbh;
struct buffer_head **newchunk;
@@ -105,8 +114,7 @@
if (allocbits == bitmap->allocbits)
goto skipbhs;
- tmp = OCFS_ALIGN(allocbits, OCFS_BITS_IN_CHUNK);
- newnumbh = (tmp / OCFS_BITS_IN_CHUNK);
+ newnumbh = ocfs_blocks_for_bits(sb, allocbits);
newchunk = ocfs_malloc(bitmap->numbh * sizeof(struct buffer_head *));
if (!newchunk)
@@ -116,7 +124,7 @@
memcpy(newchunk, bitmap->chunk, (bitmap->numbh * sizeof(struct buffer_head *)));
- ocfs_free(bitmap->chunk);
+ kfree(bitmap->chunk);
bitmap->numbh = newnumbh;
bitmap->chunk = newchunk;
@@ -161,9 +169,12 @@
* in case we have a disk full.
*
*/
-int ocfs_find_clear_bits (ocfs_super *osb, ocfs_alloc_bm * bitmap, __u32 numBits, __u32 offset, __u32 sysonly)
+int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm * bitmap,
+ __u32 numBits, __u32 offset, __u32 sysonly)
{
- __u32 globalsize, globaloff, localstart, lastbh, size, bitoff, count;
+ __u32 globalsize, globaloff, localstart, lastbh;
+ __u32 size = OCFS_BITS_IN_CHUNK(osb->sb);
+ __u32 bitoff = 0, count = 0;
void *buffer;
int c;
struct buffer_head *currbh = NULL;
@@ -172,19 +183,14 @@
offset, sysonly);
globalsize = bitmap->validbits - sysonly;
- bitoff = count = 0;
- lastbh = (OCFS_ALIGN(globalsize, OCFS_BITS_IN_CHUNK) /
- OCFS_BITS_IN_CHUNK) - 1;
+ lastbh = ocfs_blocks_for_bits(osb->sb, globalsize) - 1;
globaloff = offset;
- localstart = OCFS_GLOBAL_OFF_TO_LOCAL(globaloff);
+ ocfs_bitmap_block_for_off(osb->sb, globaloff, &c, &localstart);
if (lastbh == 0)
size = globalsize;
- else
- size = OCFS_BITS_IN_CHUNK;
- c = OCFS_GLOBAL_OFF_TO_CHUNK(localstart);
currbh = bitmap->chunk[c];
/*LOG_TRACE_ARGS("globalsize=%u, bitmap->validbits=%u, sysonly=%u\n",
@@ -193,7 +199,8 @@
"localstart=%u\n", c, lastbh, size, localstart);*/
buffer = OCFS_BH_GET_DATA_READ(currbh);
- while ((bitoff = find_next_zero_bit(buffer, OCFS_BITS_IN_CHUNK,
+ while ((bitoff = find_next_zero_bit(buffer,
+ OCFS_BITS_IN_CHUNK(osb->sb),
localstart)) != -1) {
/*LOG_TRACE_ARGS("c=%u, globaloff=%u, bitoff=%u, "
"localstart=%u\n", c, globaloff, bitoff,
@@ -223,8 +230,10 @@
currbh = bitmap->chunk[c];
buffer = OCFS_BH_GET_DATA_READ(currbh);
if (c == lastbh)
- size = globalsize-(OCFS_BITS_IN_CHUNK*lastbh);
- globaloff = c * OCFS_BITS_IN_CHUNK;
+ size = globalsize -
+ (OCFS_BITS_IN_CHUNK(osb->sb)
+ * lastbh);
+ globaloff = c * OCFS_BITS_IN_CHUNK(osb->sb);
continue;
}
@@ -237,7 +246,9 @@
if (localstart >= size)
goto nextbh;
- globaloff = OCFS_CHUNK_TO_GLOBAL_OFF(c, bitoff) + 1;
+ globaloff =
+ ocfs_bitmap_off_for_block(osb->sb, c,
+ bitoff) + 1;
} else if (bitoff == localstart) {
/*LOG_TRACE_ARGS("bitoff == localstart (%u)\n",
bitoff);*/
@@ -250,7 +261,9 @@
bitoff, localstart);*/
/* we had to skip over some ones */
count = 1;
- globaloff = OCFS_CHUNK_TO_GLOBAL_OFF(c, bitoff) + 1;
+ globaloff =
+ ocfs_bitmap_off_for_block(osb->sb, c,
+ bitoff) + 1;
localstart = bitoff + 1;
}
@@ -276,7 +289,7 @@
* ocfs_count_bits()
*
*/
-int ocfs_count_bits (ocfs_alloc_bm * bitmap)
+int ocfs_count_bits(struct super_block *sb, ocfs_alloc_bm *bitmap)
{
__u32 count = 0, i, j;
struct buffer_head *currbh;
@@ -287,14 +300,17 @@
LOG_ENTRY ();
size = (bitmap->validbits >> 3);
+ validbytes = size;
+ if (validbytes > sb->s_blocksize)
+ validbytes = sb->s_blocksize;
- for (i = 0, validbytes = (size >= OCFS_BITMAP_CHUNK ? OCFS_BITMAP_CHUNK : size);
- size > 0; size -= validbytes, i++) {
+ for (i = 0; size > 0; size -= validbytes, i++) {
currbh = bitmap->chunk[i];
buffer = OCFS_BH_GET_DATA_READ(currbh);
for (j = 0; j < validbytes; j++) {
- memcpy (&tmp, buffer, 1);
- count += BITCOUNT (tmp);
+ /* U-U-U-GLY */
+ memcpy(&tmp, buffer, 1);
+ count += BITCOUNT(tmp);
buffer++;
}
OCFS_BH_PUT_DATA(currbh);
@@ -312,15 +328,16 @@
* ocfs_set_bits()
*
*/
-void ocfs_set_bits (ocfs_journal_handle *handle, ocfs_alloc_bm * bitmap,
- __u32 start, __u32 num)
+void ocfs_set_bits(struct super_block *sb, ocfs_journal_handle *handle,
+ ocfs_alloc_bm * bitmap, __u32 start, __u32 num)
{
struct buffer_head *currbh = NULL;
void *buff;
int i, local;
int status;
- LOG_ENTRY_ARGS ("(0x%p, %u, %u)\n", bitmap, start, num);
+ LOG_ENTRY_ARGS ("(0x%p, 0x%p, 0x%p, %u, %u)\n", sb, handle,
+ bitmap, start, num);
if ((start + num) > bitmap->validbits) {
LOG_ERROR_ARGS("bitmap->validbits = %u but start = %u and " \
@@ -328,8 +345,7 @@
goto bail;
}
- i = OCFS_GLOBAL_OFF_TO_CHUNK(start);
- local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
+ ocfs_bitmap_block_for_off(sb, start, &i, &local);
currbh = bitmap->chunk[i];
status = ocfs_journal_access(handle, currbh, OCFS_JOURNAL_ACCESS_UNDO);
@@ -342,7 +358,7 @@
while (num--) {
set_bit (local++, buff);
- if (local >= OCFS_BITS_IN_CHUNK && num != 0) {
+ if ((local >= OCFS_BITS_IN_CHUNK(sb)) && (num != 0)) {
local = 0;
OCFS_BH_PUT_DATA(currbh);
status = ocfs_journal_dirty(handle, currbh);
@@ -381,7 +397,8 @@
* ocfs_clear_bits()
*
*/
-void ocfs_clear_bits (ocfs_journal_handle *handle, ocfs_alloc_bm * bitmap,
+void ocfs_clear_bits(struct super_block *sb,
+ ocfs_journal_handle *handle, ocfs_alloc_bm *bitmap,
__u32 start, __u32 num)
{
struct buffer_head *currbh = NULL;
@@ -389,7 +406,8 @@
int i, local;
int status;
- LOG_ENTRY_ARGS ("(0x%p, %u, %u)\n", bitmap, start, num);
+ LOG_ENTRY_ARGS ("(0x%p, 0x%p, 0x%p, %u, %u)\n", sb, handle,
+ bitmap, start, num);
if ((start + num) > bitmap->validbits) {
LOG_ERROR_ARGS("bitmap->validbits = %u but start = %u and " \
@@ -397,8 +415,7 @@
goto bail;
}
- i = OCFS_GLOBAL_OFF_TO_CHUNK(start);
- local = OCFS_GLOBAL_OFF_TO_LOCAL(start);
+ ocfs_bitmap_block_for_off(sb, start, &i, &local);
currbh = bitmap->chunk[i];
status = ocfs_journal_access(handle, currbh, OCFS_JOURNAL_ACCESS_UNDO);
@@ -415,7 +432,7 @@
(unsigned long *)bh2jh(currbh)->b_committed_data);
local++;
- if (local >= OCFS_BITS_IN_CHUNK && num != 0) {
+ if ((local >= OCFS_BITS_IN_CHUNK(sb)) && (num != 0)) {
local = 0;
OCFS_BH_PUT_DATA(currbh);
Added: trunk/src/bitmap.h
===================================================================
--- trunk/src/bitmap.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/bitmap.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,48 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * bitmap.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_BITMAP_H
+#define OCFS2_BITMAP_H
+
+int ocfs_count_bits(struct super_block *sb, ocfs_alloc_bm *bitmap);
+void ocfs_clear_bits(struct super_block *sb,
+ ocfs_journal_handle *handle, ocfs_alloc_bm *bitmap,
+ __u32 start, __u32 num);
+int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm *bitmap,
+ __u32 numBits, __u32 offset, __u32 sysonly);
+void ocfs_initialize_bitmap(struct super_block *sb,
+ ocfs_alloc_bm *bitmap, __u32 validbits,
+ __u32 allocbits);
+void ocfs_reinitialize_bitmap(struct super_block *sb,
+ ocfs_alloc_bm *bitmap, __u32 validbits,
+ __u32 allocbits);
+void ocfs_set_bits(struct super_block *sb, ocfs_journal_handle *handle,
+ ocfs_alloc_bm *bitmap, __u32 start, __u32 num);
+void ocfs_uninitialize_bitmap(ocfs_alloc_bm *bitmap);
+
+#endif /* OCFS2_BITMAP_H */
Added: trunk/src/buffer_head_io.c
===================================================================
--- trunk/src/buffer_head_io.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/buffer_head_io.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,328 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * io.c
+ *
+ * Buffer cache handling
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_IO
+
+static void ocfs_end_buffer_io_sync(struct buffer_head *bh,
+ int uptodate)
+{
+// LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
+// uptodate);
+
+ if (!uptodate)
+ LOG_ERROR_STATUS(-EIO);
+
+ if (uptodate)
+ set_buffer_uptodate(bh);
+ else
+ clear_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+// LOG_EXIT();
+ return;
+}
+
+
+int ocfs_write_bhs (ocfs_super * osb, struct buffer_head *bhs[],
+ int nr, int flags, struct inode *inode)
+{
+ int status = 0;
+ int i;
+ struct super_block *sb;
+ struct buffer_head *bh;
+
+#ifdef OCFS_DBG_TIMING
+ my_timing_t begin, end;
+#endif
+
+ LOG_ENTRY_ARGS("(bh[0]->b_blocknr = %llu, nr=%d, flags=%u, inode=%p)\n",
+ (unsigned long long)bhs[0]->b_blocknr, nr, flags, inode);
+#ifdef OCFS_DBG_TIMING
+ rdtsc (begin.lohi[0], begin.lohi[1]);
+#endif
+
+ if (osb == NULL || osb->sb == NULL || bhs == NULL) {
+ LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == "
+ "NULL");
+ status = -EINVAL;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (nr > 256)
+ LOG_TRACE_ARGS ("Getting write for %d blocks\n", nr);
+
+ sb = osb->sb;
+
+ /* we don't ever want cached writes -- those should go to the
+ * journal so we can control when they actually hit disk and
+ * so we can make sure they never get overwritten by a
+ * subsequent read. */
+ if ((flags & OCFS_BH_CACHED) || (flags & OCFS_BH_COND_CACHED)) {
+ LOG_TRACE_STR("asking for a cached write!");
+ status = -EINVAL;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ for (i = 0 ; i < nr ; i++) {
+ bh = bhs[i];
+ if (bh == NULL) {
+ LOG_TRACE_STR("bh == NULL");
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (check_block_zero_write(bh) < 0) {
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (flags & OCFS_BH_CONCURRENT_WRITE)
+ goto skip_modified_check;
+
+skip_modified_check:
+ if (!(flags & OCFS_BH_IGNORE_JBD) && buffer_jbd(bh)) {
+#ifdef VERBOSE_BH_JBD_TRACE
+ LOG_TRACE_ARGS("trying to write a jbd managed bh "
+ "(blocknr = %llu), nr=%d\n",
+ (unsigned long long)bh->b_blocknr, nr);
+#endif
+ continue;
+ }
+
+ lock_buffer(bh);
+
+ set_buffer_uptodate(bh);
+ /* remove from dirty list before I/O. */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ /*
+ * mark_buffer_clean() doesn't exist in 2.6.x kernels.
+ * Not many places actually used mark_buffer_clean, but
+ * at least reiserfs uses clear_buffer_dirty() as
+ * a replacment.
+ */
+ clear_buffer_dirty(bh);
+#else
+ mark_buffer_clean(bh);
+#endif
+
+ bh->b_end_io = ocfs_end_buffer_io_sync;
+ submit_bh(WRITE, bh);
+ }
+
+ for (i = (nr-1) ; i >= 0; i--) {
+ bh = bhs[i];
+
+ wait_on_buffer(bh);
+
+ if (inode)
+ SET_BH_SEQNUM(inode, bh);
+ else
+ CLEAR_BH_SEQNUM(bh);
+ }
+
+bail:
+
+#ifdef OCFS_DBG_TIMING
+ IO_FUNC_TIMING_PRINT("ocfs_write_bhs", status);
+#endif
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+/*
+ * ocfs_read_bhs()
+ *
+ */
+int ocfs_read_bhs(ocfs_super *osb, __u64 off, __u64 len,
+ struct buffer_head *bhs[], int flags,
+ struct inode *inode)
+{
+ int status = 0;
+ struct super_block *sb;
+ int nr, i, ignore_cache = 0;
+ __u64 blocknum;
+ struct buffer_head *bh;
+
+#ifdef OCFS_DBG_TIMING
+ my_timing_t begin, end;
+#endif
+ LOG_ENTRY_ARGS("(off=(%llu), len=(%llu), flags=%d, inode=%p)\n", off,
+ len, flags, inode);
+#ifdef OCFS_DBG_TIMING
+ rdtsc (begin.lohi[0], begin.lohi[1]);
+#endif
+
+ if (len & (u64)(osb->sb->s_blocksize - 1)) {
+ LOG_TRACE_ARGS("len %% blocksize (len=%llu)\n", len);
+ status = -EINVAL;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (osb == NULL || osb->sb == NULL || bhs == NULL) {
+ LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == NULL "
+ "|| num == NULL");
+ status = -EINVAL;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if ((flags & OCFS_BH_COND_CACHED) && !IS_SYSTEM_FILE_INODE(inode))
+ flags |= OCFS_BH_CACHED;
+
+ if ((flags & OCFS_BH_CACHED) && IS_SYSTEM_FILE_INODE(inode)) {
+ LOG_TRACE_STR("hey bozo you are trying to write "
+ "a system thingy cached!");
+ flags &= ~OCFS_BH_CACHED;
+ }
+
+ sb = osb->sb;
+ blocknum = off >> sb->s_blocksize_bits;
+
+ nr = (len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ if (nr == 0) {
+ LOG_TRACE_STR("No buffers will be read!!!");
+ LOG_TRACE_ARGS("Len=%llu Off=%llu numbuffers=%u "
+ "blocknum=%llu\n", len, off,
+ nr, blocknum);
+ status = 0;
+ goto bail;
+ }
+
+ for (i = 0 ; i < nr ; i++) {
+ if (bhs[i] == NULL) {
+ bhs[i] = sb_getblk(sb, blocknum++);
+ if (bhs[i] == NULL) {
+ LOG_TRACE_STR("bh == NULL");
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+ bh = bhs[i];
+ ignore_cache = 0;
+
+ /* Lock everyone else out of this bh */
+ OCFS_BH_GET_DATA_READ(bh);
+
+ if (flags & OCFS_BH_CACHED && inode &&
+ !TEST_BH_SEQNUM(inode, bh)) {
+#ifdef VERBOSE_BH_SEQNUM_TRACE
+ LOG_TRACE_ARGS("(read) bh (%llu) seqnum (%lu) does not "
+ "match inode (%u)\n",
+ (unsigned long long)bh->b_blocknr,
+ (bh->b_state & STATE_BIT_MASK) >> 19,
+ atomic_read(GET_INODE_CLEAN_SEQ(inode)));
+#endif
+ ignore_cache = 1;
+ }
+
+ if ((flags & OCFS_BH_CACHED) && (!buffer_uptodate(bh)))
+ ignore_cache = 1;
+
+ if (buffer_jbd(bh)) {
+#ifdef VERBOSE_BH_JBD_TRACE
+ if (!(flags & OCFS_BH_CACHED) || ignore_cache)
+ LOG_TRACE_ARGS("trying to sync read a jbd "
+ "managed bh (blocknr = %llu)\n",
+ (unsigned long long)bh->b_blocknr);
+#endif
+ continue;
+ }
+
+ if (!(flags & OCFS_BH_CACHED) || ignore_cache) {
+ if (buffer_dirty(bh)) {
+ /* This should probably be a BUG, or
+ * at least return an error. */
+ LOG_TRACE_ARGS("asking me to sync read a "
+ "dirty buffer! (blocknr = %llu)\n",
+ (unsigned long long)bh->b_blocknr);
+ continue;
+ }
+
+ lock_buffer(bh);
+ clear_buffer_uptodate(bh);
+ bh->b_end_io = ocfs_end_buffer_io_sync;
+ if (flags & OCFS_BH_READAHEAD)
+ submit_bh(READA, bh);
+ else
+ submit_bh(READ, bh);
+ continue;
+ }
+ }
+
+ status = 0;
+
+ for (i = (nr-1); i >= 0; i--) {
+ bh = bhs[i];
+
+ wait_on_buffer(bh);
+
+ if (inode)
+ SET_BH_SEQNUM(inode, bh);
+ else
+ CLEAR_BH_SEQNUM(bh);
+
+ OCFS_BH_PUT_DATA(bh);
+ }
+ LOG_TRACE_ARGS("off=(%llu), len=(%llu), cached=%s\n", off, len,
+ (!(flags & OCFS_BH_CACHED) || ignore_cache) ? "no" : "yes");
+
+bail:
+
+#ifdef OCFS_DBG_TIMING
+ IO_FUNC_TIMING_PRINT("ocfs_read_bhs", status);
+#endif
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+
+
Modified: trunk/src/dcache.c
===================================================================
--- trunk/src/dcache.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dcache.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* dcache.c
*
* dentry cache handling code
@@ -24,32 +26,29 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <linux/namei.h>
+#endif
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "dcache.h"
+#include "vote.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DCACHE
-static int ocfs_empty_func(struct dentry *dentry, void *ignore);
-
-/*
- * ocfs_dentry_revalidate()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-int ocfs_dentry_revalidate (struct dentry *dentry, struct nameidata *nd)
-#else
-int ocfs_dentry_revalidate (struct dentry *dentry, int flags)
-#endif
+static int ocfs_dentry_revalidate24(struct dentry *dentry, int flags)
{
- int ret = 0; /* if all else fails, just return false */
struct inode *inode = dentry->d_inode;
+ int ret = 0; /* if all else fails, just return false */
ocfs_super *osb;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- int flags = nd ? nd->flags : 0;
-#endif
LOG_ENTRY_ARGS ("(0x%p, %d, '%*s')\n", dentry, flags,
dentry->d_name.len, dentry->d_name.name);
@@ -73,7 +72,7 @@
goto bail;
}
- if (osb->publ_map == (1 << osb->node_num)) {
+ if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
LOG_TRACE_STR ("Only node alive. revalidate=true.");
ret = 1;
goto bail;
@@ -94,24 +93,21 @@
LOG_EXIT_INT (ret);
return ret;
-} /* ocfs_dentry_revalidate */
-
-
-/* exits if it finds any hashed dentries */
-static int ocfs_empty_func(struct dentry *dentry, void *ignore)
-{
- return 1;
}
-
-/*
- * ocfs_empty()
- *
- */
-int ocfs_empty (struct dentry *dentry)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static int ocfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- return ocfs_foreach_child (dentry, ocfs_empty_func, NULL);
+ return ocfs_dentry_revalidate24(dentry, nd ? nd->flags : 0);
}
+struct dentry_operations ocfs_dentry_ops = {
+ .d_revalidate = ocfs_dentry_revalidate,
+};
+#else
+struct dentry_operations ocfs_dentry_ops = {
+ .d_revalidate = ocfs_dentry_revalidate24,
+};
+#endif
/*
* ocfs_foreach_child()
@@ -145,6 +141,4 @@
LOG_EXIT_INT (ret);
return ret;
-} /* ocfs_foreach_child */
-
-
+}
Added: trunk/src/dcache.h
===================================================================
--- trunk/src/dcache.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dcache.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,38 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dcache.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_DCACHE_H
+#define OCFS2_DCACHE_H
+
+extern struct dentry_operations ocfs_dentry_ops;
+
+int ocfs_foreach_child(struct dentry *dentry,
+ int (*func)(struct dentry *, void *),
+ void *data);
+
+#endif /* OCFS2_DCACHE_H */
Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dir.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,10 +1,25 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* dir.c
*
* Creates, reads, walks and deletes directory-nodes
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
*
+ * Portions of this code from linux/fs/ext3/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card at masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linux Torvalds
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
@@ -24,11 +39,24 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "dir.h"
+#include "dlm.h"
+#include "inode.h"
+#include "namei.h"
+#include "util.h"
+
+#include "ocfs_buffer_head.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DIR
static unsigned char ocfs_filetype_table[] = {
@@ -58,7 +86,7 @@
offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) {
- blk = (filp->f_pos) >> OCFS_SB(sb)->sect_size_bits;
+ blk = (filp->f_pos) >> sb->s_blocksize_bits;
bh = ocfs_bread (0, inode, blk, 0, &err, 0);
if (!bh) {
LOG_ERROR_ARGS ("directory #%llu contains a hole at offset %lu\n",
@@ -71,7 +99,7 @@
* Do the readahead (8k)
*/
if (!offset) {
- for (i = 16 >> (OCFS_SB(sb)->sect_size_bits - 9), num = 0;
+ for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
i > 0; i--) {
tmp = ocfs_bread (NULL, inode, ++blk, 0, &err, 1);
brelse (tmp);
@@ -100,7 +128,7 @@
* failure will be detected in the
* dirent test below. */
if (le16_to_cpu(de->rec_len) <
- OCFS_DIR_REC_LEN(1))
+ OCFS2_DIR_REC_LEN(1))
break;
i += le16_to_cpu(de->rec_len);
}
@@ -137,7 +165,7 @@
unsigned long version = filp->f_version;
unsigned char d_type = DT_UNKNOWN;
- if (de->file_type < OCFS_FT_MAX)
+ if (de->file_type < OCFS2_FT_MAX)
d_type = ocfs_filetype_table[de->file_type];
error = filldir(dirent, de->name,
de->name_len,
@@ -170,7 +198,12 @@
/* ocfs_find_files_on_disk()
* NOTE: this should always be called with parent dir ip_io_sem taken!
*/
-int ocfs_find_files_on_disk (ocfs_super * osb, const char *name, int namelen, __u64 *fe_off, struct inode *inode, int take_lock, struct buffer_head **dirent_bh, struct ocfs2_dir_entry **dirent)
+/* parent off changed to file entry offset of parent! */
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+ int namelen, __u64 *fe_off,
+ struct inode *inode, int take_lock,
+ struct buffer_head **dirent_bh,
+ struct ocfs2_dir_entry **dirent)
{
int status = -ENOENT;
int tmpstat;
@@ -241,14 +274,15 @@
int err;
sb = inode->i_sb;
- if (inode->i_size < OCFS_DIR_REC_LEN(1) + OCFS_DIR_REC_LEN(2) ||
+ if ((inode->i_size <
+ (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
!(bh = ocfs_bread (NULL, inode, 0, 0, &err, 0))) {
LOG_ERROR_ARGS ("bad directory (dir #%llu) - no data block\n",
GET_INODE_FEOFF(inode));
return 1;
}
- de = OCFS_BH_GET_DATA_READ(bh);
+ de = (struct ocfs2_dir_entry *) OCFS_BH_GET_DATA_READ(bh);
de1 = (struct ocfs2_dir_entry *)
((char *) de + le16_to_cpu(de->rec_len));
if (le64_to_cpu(de->inode) != GET_INODE_FEOFF(inode) ||
@@ -264,10 +298,11 @@
offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
de = (struct ocfs2_dir_entry *) ((char *) de1 + le16_to_cpu(de1->rec_len));
while (offset < inode->i_size ) {
- if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+ if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) {
OCFS_BH_PUT_DATA(bh);
brelse (bh);
- bh = ocfs_bread (NULL, inode, offset >> sb->s_blocksize_bits, 0, &err, 0);
+ bh = ocfs_bread(NULL, inode,
+ offset >> sb->s_blocksize_bits, 0, &err, 0);
if (!bh) {
LOG_ERROR_ARGS ("directory #%llu contains a hole at offset %lu\n",
GET_INODE_FEOFF(inode), offset);
Added: trunk/src/dir.h
===================================================================
--- trunk/src/dir.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dir.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,40 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dir.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_DIR_H
+#define OCFS2_DIR_H
+
+int empty_dir(struct inode *inode); /* FIXME: to namei.c */
+int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
+ int namelen, __u64 *fe_off,
+ struct inode *inode, int take_lock,
+ struct buffer_head **dirent_bh,
+ struct ocfs2_dir_entry **dirent);
+int ocfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
+
+#endif /* OCFS2_DIR_H */
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dlm.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* dlm.c
*
* Distributed lock manager. Requests and processes lock votes.
@@ -24,12 +26,28 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/random.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "dcache.h"
+#include "dlm.h"
+#include "lockres.h"
+#include "nm.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
extern __u32 comm_voting;
#define WAIT_FOR_VOTE_INCREMENT 200
@@ -41,16 +59,46 @@
static inline int ocfs_wait_for_readonly_drop(ocfs_super *osb, struct inode *inode);
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u64 *open_map);
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 *vote_map, __u32 time_to_wait, __u64 lock_seq_num, __u64 *open_map);
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, ocfs_node_map *open_map);
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map);
static int ocfs_reset_voting (ocfs_super * osb);
-static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * got_vote_map, __u64 vote_map, __u64 lock_seq_num, __u64 * oin_open_map);
+static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map);
int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode);
static int ocfs_zap_child_buffers_func(struct dentry *dentry, void *data);
+#warning change these later
+void ocfs_set_publish_vote_map(ocfs_super *osb, ocfs_publish *publish, ocfs_node_map *vote_map)
+{
+ // publish->vote_map; // this can only hold 64 bit
+ // publish->reserved1; // this can hold __u8[32] = 256, our current max ;-)
+ ocfs_node_map_set_to_disk(publish->reserved1, vote_map);
+}
+
+void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish, ocfs_node_map *vote_map)
+{
+ ocfs_node_map_init(osb, vote_map);
+ ocfs_node_map_set_from_disk(vote_map, publish->reserved1);
+}
+
+void ocfs_set_disk_lock_open_map(ocfs_super *osb,
+ ocfs2_disk_lock *lock,
+ ocfs_node_map *open_map)
+{
+ ocfs_node_map_set_to_disk(lock->dl_node_map, open_map);
+}
+
+void ocfs_get_disk_lock_open_map(ocfs_super *osb,
+ ocfs2_disk_lock *lock,
+ ocfs_node_map *open_map)
+{
+ ocfs_node_map_init(osb, open_map);
+ ocfs_node_map_set_from_disk(open_map, lock->dl_node_map);
+}
+
+
/* inode is definitely non NULL */
-static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, __u64 *open_map)
+static int ocfs_disk_request_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * vote_map, __u64 * lock_seq_num, __u32 time_to_wait, struct inode *inode, ocfs_node_map *open_map)
{
int status = 0;
__u64 offset = 0;
@@ -60,21 +108,18 @@
__u32 i;
ocfs_publish *pubsect = NULL;
__u64 largestseqno = 0;
- __u64 pubmap = 0;
__u32 wait;
int publish_flag = 0;
struct buffer_head **bhs = NULL;
- LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, vm=0x%08x)\n",
- osb, lock_id, lock_type, flags,
- (__u32)(*vote_map & 0xFFFFFFFFULL));
+ LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u)\n",
+ osb, lock_id, lock_type, flags);
- *vote_map &= ~(1 << osb->node_num);
+ ocfs_node_map_clear_bit(vote_map, osb->node_num);
- pubmap = osb->publ_map;
- offset = osb->vol_layout.publ_sect_off;
- numnodes = OCFS_MAXIMUM_NODES;
- size = (numnodes * osb->sect_size);
+ offset = osb->publish_blkno << osb->sb->s_blocksize_bits;
+ numnodes = osb->max_nodes;
+ size = (numnodes << osb->sb->s_blocksize_bits);
/* take lock to prevent overwrites by vote_reset and nm thread */
down_with_flag (&(osb->publish_lock), publish_flag);
@@ -101,8 +146,8 @@
largestseqno = pubsect->publ_seq_num;
if (pubsect->dirty) {
OCFS_BH_PUT_DATA(bhs[i]);
- if (!IS_NODE_ALIVE (pubmap, i, numnodes) ||
- TEST_NODE_IN_RECOVERY(osb, i)) {
+ if (!ocfs_node_is_alive(&osb->publ_map, i) ||
+ ocfs_node_is_recovering(osb, i)) {
LOG_TRACE_STR("Node is in recovery, trying"
" again.");
} else {
@@ -130,16 +175,16 @@
pubsect->publ_seq_num = largestseqno;
pubsect->dirty = 1;
pubsect->vote = FLAG_VOTE_NODE;
- pubsect->vote_map = *vote_map;
+ ocfs_set_publish_vote_map(osb, pubsect, vote_map);
pubsect->vote_type = flags;
- pubsect->dir_ent = lock_id;
+ pubsect->lock_id = lock_id;
OCFS_BH_PUT_DATA(bhs[osb->node_num]);
#ifdef VERBOSE_LOCKING_TRACE
printk("ocfs_disk_request_vote: lockid = %llu\n",lock_id);
#endif
- pub_off = osb->vol_layout.publ_sect_off + (osb->node_num * osb->sect_size);
+ pub_off = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
status = ocfs_write_bh (osb, bhs[osb->node_num], 0, NULL);
if (status < 0) {
@@ -183,11 +228,11 @@
int status = -ETIMEDOUT;
int tmpstat = -ETIMEDOUT;
__u32 timewaited = 0;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
struct buffer_head *bh = NULL;
__u32 curr_master;
__u8 lock_level;
- int is_dir = 0, disk_vote = 0;
+ int disk_vote = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
LOG_ENTRY_ARGS ("(0x%p, %llu, %u, 0x%p, %u)\n",
@@ -204,9 +249,8 @@
goto finally;
}
fe = OCFS_BH_GET_DATA_READ(bh);
- curr_master = DISK_LOCK_CURRENT_MASTER (fe);
- lock_level = DISK_LOCK_FILE_LOCK (fe);
- is_dir = (fe->attribs & OCFS_ATTRIB_DIRECTORY);
+ curr_master = DISK_LOCK(fe)->dl_master;
+ lock_level = DISK_LOCK(fe)->dl_level;
OCFS_BH_PUT_DATA(bh);
if ((curr_master == OCFS_INVALID_NODE_NUM) ||
@@ -214,13 +258,13 @@
goto got_it;
}
- if ((!IS_NODE_ALIVE (osb->publ_map, curr_master, OCFS_MAXIMUM_NODES)) &&
- (!TEST_NODE_IN_RECOVERY(osb, curr_master))) {
+ if ((!ocfs_node_is_alive(&osb->publ_map, curr_master)) &&
+ (!ocfs_node_is_recovering(osb, curr_master))) {
/* Reset the lock as not owned and return success?? */
/* This needs to be under some sort of cluster wide lock, */
fe = OCFS_BH_GET_DATA_WRITE(bh);
- DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
- DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ DISK_LOCK(fe)->dl_master = OCFS_INVALID_NODE_NUM;
+ DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
OCFS_BH_PUT_DATA(bh);
tmpstat = ocfs_write_bh (osb, bh, 0, NULL);
if (tmpstat < 0) {
@@ -235,7 +279,7 @@
lockres->lock_type = lock_level;
lockres->master_node_num = curr_master;
- if (is_dir) {
+ if (S_ISDIR(fe->i_mode)) {
if (lockres->readonly_node != OCFS_INVALID_NODE_NUM) {
if (lockres->readonly_node == curr_master) {
// readonly cachelock already on this dir
@@ -298,7 +342,7 @@
}
LOG_TRACE_ARGS("broke cache lock, setting to NO_LOCK\n");
fe = OCFS_BH_GET_DATA_WRITE(bh);
- DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
lock_level = OCFS_DLM_NO_LOCK;
OCFS_BH_PUT_DATA(bh);
tmpstat = ocfs_write_bh (osb, bh, 0, inode);
@@ -341,7 +385,7 @@
* ocfs_get_vote_on_disk()
*
*/
-static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * got_vote_map, __u64 vote_map, __u64 lock_seq_num, __u64 * oin_open_map)
+static int ocfs_get_vote_on_disk (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map * got_vote_map, ocfs_node_map * vote_map, __u64 lock_seq_num, ocfs_node_map * oin_open_map)
{
int status = 0;
__u32 length = 0;
@@ -350,29 +394,28 @@
ocfs_vote *vote;
struct buffer_head **bhs = NULL;
- LOG_ENTRY_ARGS ("(lockid=%llu, locktype=%u, votemap=0x%08x)\n",
- lock_id, lock_type,
- (__u32)(vote_map & 0xFFFFFFFFULL));
+ LOG_ENTRY_ARGS ("(lockid=%llu, locktype=%u)\n", lock_id, lock_type);
- numnodes = OCFS_MAXIMUM_NODES;
+ numnodes = osb->max_nodes;
/* Read the vote sectors of all the nodes */
- length = numnodes * osb->sect_size;
+ length = numnodes << osb->sb->s_blocksize_bits;
bhs = ocfs_malloc(numnodes * sizeof(struct buffer_head *));
if (bhs == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto finally;
}
memset(bhs, 0, numnodes * sizeof(struct buffer_head *));
- status = ocfs_read_bhs (osb, osb->vol_layout.vote_sect_off, length, bhs, 0, NULL);
+ status = ocfs_read_bhs (osb, osb->vote_blkno << osb->sb->s_blocksize_bits,
+ length, bhs, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
for (i = 0; i < numnodes; i++) {
- int node_alive = (IS_NODE_ALIVE (osb->publ_map, i, numnodes));
- int node_in_map = (IS_NODE_ALIVE (vote_map, i, numnodes));
+ int node_alive = ocfs_node_is_alive(&osb->publ_map, i);
+ int node_in_map = ocfs_node_is_alive(vote_map, i);
status = 0;
vote = OCFS_BH_GET_DATA_READ(bhs[i]);
@@ -381,12 +424,12 @@
if (!node_alive) {
if (node_in_map) {
if (flags & FLAG_FILE_UPDATE_OIN)
- (*got_vote_map) |= 1 << i;
+ ocfs_node_map_set_bit(got_vote_map, i);
else
status = -EAGAIN;
}
} else if (node_in_map && vote->vote_seq_num == lock_seq_num &&
- vote->dir_ent == lock_id) {
+ vote->lock_id == lock_id) {
ocfs_vote_reply_ctxt ctxt;
ctxt.reply_method = DISK_VOTE;
ctxt.got_vote_map = got_vote_map;
@@ -394,6 +437,9 @@
ctxt.status = &status;
ctxt.flags = flags;
ctxt.u.vote = vote;
+ if (vote->node != osb->node_num)
+ LOG_ERROR_ARGS("vote->node(%u) != this(%u)\n",
+ vote->node, osb->node_num);
ocfs_process_one_vote_reply(osb, &ctxt, i);
}
@@ -417,18 +463,19 @@
* ocfs_wait_for_vote()
*
*/
-static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 *vote_map, __u32 time_to_wait, __u64 lock_seq_num, __u64 *open_map)
+static int ocfs_wait_for_vote (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *vote_map, __u32 time_to_wait, __u64 lock_seq_num, ocfs_node_map *open_map)
{
int status = -EAGAIN;
__u32 timewaited = 0;
- __u64 gotvotemap = 0;
+ ocfs_node_map gotvotemap;
- LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, vm=0x%08x, "
- "sq:%llu)\n", osb, lock_id, lock_type, flags,
- (__u32)(*vote_map & 0xFFFFFFFFULL), lock_seq_num);
+ LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, sq:%llu)\n",
+ osb, lock_id, lock_type, flags, lock_seq_num);
- *vote_map &= ~(1 << osb->node_num);
+ ocfs_node_map_init(osb, &gotvotemap);
+ ocfs_node_map_clear_bit(vote_map, osb->node_num);
+
while (time_to_wait > timewaited) {
ocfs_sleep (WAIT_FOR_VOTE_INCREMENT);
@@ -439,15 +486,14 @@
}
status = ocfs_get_vote_on_disk (osb, lock_id, lock_type, flags,
- &gotvotemap, *vote_map, lock_seq_num, open_map);
+ &gotvotemap, vote_map, lock_seq_num, open_map);
if (status < 0) {
if (status != -EAGAIN)
LOG_ERROR_STATUS (status);
goto bail;
}
-
- *vote_map &= ~gotvotemap;
- if (*vote_map == 0ULL) {
+ ocfs_node_map_clear_bits(vote_map, &gotvotemap);
+ if (ocfs_node_map_is_empty(vote_map)) {
status = 0;
goto bail;
}
@@ -478,6 +524,7 @@
ocfs_publish *pubsect = NULL;
__u64 offset = 0;
struct buffer_head *bh = NULL;
+ ocfs_node_map vote_map;
LOG_ENTRY ();
@@ -485,7 +532,7 @@
down (&(osb->publish_lock));
/* Read node's publish sector */
- offset = osb->vol_layout.publ_sect_off + (osb->node_num * osb->sect_size);
+ offset = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh (osb, offset, &bh, 0, NULL);
if (status < 0) {
@@ -497,8 +544,10 @@
pubsect->dirty = 0;
pubsect->vote = 0;
pubsect->vote_type = 0;
- pubsect->vote_map = 0;
- pubsect->dir_ent = 0;
+ pubsect->lock_id = 0;
+ /* clear vote map */
+ ocfs_node_map_init(osb, &vote_map);
+ ocfs_set_publish_vote_map(osb, pubsect, &vote_map);
/* Write it back */
OCFS_BH_PUT_DATA(bh);
@@ -531,7 +580,7 @@
{
int status = 0;
__u64 offset = 0;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
int changed = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
@@ -556,46 +605,52 @@
fe = OCFS_BH_GET_DATA_READ(*bh);
if (flags & DLOCK_FLAG_MASTER &&
- DISK_LOCK_CURRENT_MASTER (fe) != lockres->master_node_num) {
+ DISK_LOCK(fe)->dl_master != lockres->master_node_num) {
if (!changed) {
OCFS_BH_PUT_DATA(*bh);
fe = OCFS_BH_GET_DATA_WRITE(*bh);
}
changed = 1;
- DISK_LOCK_CURRENT_MASTER (fe) = lockres->master_node_num;
+ DISK_LOCK(fe)->dl_master = lockres->master_node_num;
}
if (flags & DLOCK_FLAG_LOCK &&
- DISK_LOCK_FILE_LOCK (fe) != lockres->lock_type) {
+ DISK_LOCK(fe)->dl_level != lockres->lock_type) {
if (!changed) {
OCFS_BH_PUT_DATA(*bh);
fe = OCFS_BH_GET_DATA_WRITE(*bh);
}
changed = 1;
- DISK_LOCK_FILE_LOCK (fe) = lockres->lock_type;
+ DISK_LOCK(fe)->dl_level = lockres->lock_type;
}
if (flags & DLOCK_FLAG_OPEN_MAP) {
- if (flags & DLOCK_FLAG_ADD_SELF)
- lockres->oin_openmap |= (1ULL << osb->node_num);
- if (DISK_LOCK_OIN_MAP (fe) != lockres->oin_openmap) {
+ ocfs_node_map disk_map;
+
+ ocfs_node_map_init(osb, &disk_map);
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+
+ if (flags & DLOCK_FLAG_ADD_SELF)
+ ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
+
+ if (!ocfs_node_map_is_equal(&disk_map, &lockres->oin_openmap)) {
if (!changed) {
OCFS_BH_PUT_DATA(*bh);
fe = OCFS_BH_GET_DATA_WRITE(*bh);
}
changed = 1;
- DISK_LOCK_OIN_MAP (fe) = lockres->oin_openmap;
+ ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
}
}
if (flags & DLOCK_FLAG_SEQ_NUM &&
- DISK_LOCK_SEQNUM (fe) != lockres->last_upd_seq_num) {
+ DISK_LOCK(fe)->dl_seq_num != lockres->last_upd_seq_num) {
if (!changed) {
OCFS_BH_PUT_DATA(*bh);
fe = OCFS_BH_GET_DATA_WRITE(*bh);
}
changed = 1;
- DISK_LOCK_SEQNUM (fe) = lockres->last_upd_seq_num;
+ DISK_LOCK(fe)->dl_seq_num = lockres->last_upd_seq_num;
}
OCFS_BH_PUT_DATA(*bh);
@@ -634,14 +689,15 @@
/* no need to alert master for these cases */
if (S_ISDIR (inode->i_mode) ||
lockres->master_node_num == OCFS_INVALID_NODE_NUM ||
- IS_NODE_ALIVE (lockres->oin_openmap, osb->node_num, OCFS_MAXIMUM_NODES)) {
+ ocfs_node_is_alive(&lockres->oin_openmap,
+ osb->node_num)) {
status = 0;
goto bail;
}
while (status == -EAGAIN) {
- if (!IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num,
- OCFS_MAXIMUM_NODES)) {
+ if (!ocfs_node_is_alive(&osb->publ_map,
+ lockres->master_node_num)) {
LOG_TRACE_ARGS ("Master (%u) dead, lockid %llu\n",
lockres->master_node_num,
GET_INODE_FEOFF(inode));
@@ -652,11 +708,7 @@
ocfs_acquire_lockres(lockres, 0); // ocfs_file_open ocfs_symlink
if (lockres->master_node_num == osb->node_num) {
- LOG_TRACE_ARGS ("Added node to map 0x%08x, lockid %llu\n",
- (__u32)(lockres->oin_openmap & 0xFFFFFFFFULL),
- GET_INODE_FEOFF(inode));
-
- lockres->oin_openmap |= (1 << osb->node_num);
+ ocfs_node_map_set_bit(&lockres->oin_openmap, osb->node_num);
status = ocfs_update_disk_lock (osb, DLOCK_FLAG_OPEN_MAP,
&bh, inode, handle);
if (bh)
@@ -725,7 +777,7 @@
__u32 flags, struct buffer_head **bh, struct inode *inode)
{
int status = -EFAIL;
- ocfs_file_entry *disklock = NULL;
+ ocfs2_dinode *disklock = NULL;
__u32 updated = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
int k = 0;
@@ -781,10 +833,11 @@
BUG();
if (lockres->master_node_num != osb->node_num ||
- lockres->master_node_num != DISK_LOCK_CURRENT_MASTER (disklock)) {
- lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (disklock);
- lockres->lock_type = DISK_LOCK_FILE_LOCK (disklock);
- lockres->oin_openmap = DISK_LOCK_OIN_MAP (disklock);
+ lockres->master_node_num != DISK_LOCK(disklock)->dl_master) {
+ lockres->master_node_num = DISK_LOCK(disklock)->dl_master;
+ lockres->lock_type = DISK_LOCK(disklock)->dl_level;
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(disklock),
+ &lockres->oin_openmap);
}
updated = 1;
OCFS_BH_PUT_DATA(*b);
@@ -794,7 +847,7 @@
no_owner = (lockres->master_node_num == OCFS_INVALID_NODE_NUM);
/* master node is an invalid node */
- if (unlikely(lockres->master_node_num >= OCFS_MAXIMUM_NODES && !no_owner)) {
+ if (unlikely(lockres->master_node_num >= osb->max_nodes && !no_owner)) {
LOG_ERROR_ARGS("lockres: master_node=%d, owner=%s, lockid=%llu\n",
lockres->master_node_num, no_owner?"no":"yes",
GET_INODE_FEOFF(inode));
@@ -803,9 +856,11 @@
goto finally;
}
- wait_on_recovery = TEST_NODE_IN_RECOVERY(osb, lockres->master_node_num);
- owner_dead = !(no_owner || IS_NODE_ALIVE(osb->publ_map,
- lockres->master_node_num, OCFS_MAXIMUM_NODES));
+ wait_on_recovery =
+ ocfs_node_is_recovering(osb, lockres->master_node_num);
+ owner_dead = !(no_owner ||
+ ocfs_node_is_alive(&osb->publ_map,
+ lockres->master_node_num));
if ((owner_dead || wait_on_recovery) &&
lockres->readonly_node == lockres->master_node_num) {
// if owner is dead or in recovery and the lockres
@@ -890,9 +945,9 @@
lockres->master_node_num, lockres->lock_type, flags|extra_lock_flags);
#ifdef VERBOSE_LOCKING_TRACE
- printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, flags=%08x, ronode=%d, romap=%08x\n",
+ printk("lockres: lockid=%llu, this=%d, master=%d, locktype=%d, flags=%08x, ronode=%d\n",
GET_INODE_FEOFF(inode), osb->node_num, lockres->master_node_num, lockres->lock_type,
- flags|extra_lock_flags, lockres->readonly_node, (__u32)(lockres->readonly_map & 0xFFFFFFFFULL));
+ flags|extra_lock_flags, lockres->readonly_node);
#endif
if (wait_on_recovery && !((flags|extra_lock_flags) & FLAG_FILE_RECOVERY)) {
int waitcnt = 0;
@@ -900,7 +955,7 @@
lockres->master_node_num);
while (1) {
LOG_TRACE_ARGS("waitcnt = %d\n", waitcnt);
- if (!TEST_NODE_IN_RECOVERY(osb, lockres->master_node_num))
+ if (!ocfs_node_is_recovering(osb, lockres->master_node_num))
break;
ocfs_sleep(500);
}
@@ -947,23 +1002,33 @@
*/
int ocfs_disk_release_lock (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, struct buffer_head *bh, struct inode *inode)
{
- __u64 votemap = 0ULL;
- __u32 i;
+ ocfs_node_map votemap;
int status = 0, vote_status = 0;
int tmpstat;
__u64 lockseqno;
int cachelock = 0;
int disk_vote = 0;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
struct buffer_head *tmpbh = NULL, **b = NULL;
- __u64 oin_node_map;
__u32 curr_master;
- int lockflags = (lock_id >= osb->vol_layout.bitmap_off ? OCFS_BH_CACHED : 0);
+ int lockflags = (IS_SYSTEM_FILE_INODE(inode) ? 0 : OCFS_BH_CACHED);
int clear_tmp = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ /* TODO: the ifdefed out code here used to be used
+ * right now, we *broadcast* on lock releases
+ * because of the issues surrounding lookup_file_allocation
+ * need to restrict this to only the openmap
+ * at some later date */
+#if 0
+ ocfs_node_map openmap;
+#endif
+
LOG_ENTRY_ARGS ("(0x%p, %llu, %u, %u, 0x%p)\n",
- osb, lock_id, lock_type, flags, lockres);
+ osb, lock_id, lock_type, flags, lockres);
+
+ /* TODO: figure out how to properly handle inode updates w/no oin */
+ ocfs_node_map_dup(osb, &votemap, &osb->publ_map); // temporary hack, forces broadcast
if (bh != NULL)
b = &bh;
@@ -980,8 +1045,11 @@
}
}
fe = OCFS_BH_GET_DATA_READ(*b);
- oin_node_map = DISK_LOCK_OIN_MAP (fe);
- curr_master = DISK_LOCK_CURRENT_MASTER (fe);
+#if 0
+ ocfs_node_map_init(osb, &openmap);
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &openmap);
+#endif
+ curr_master = DISK_LOCK(fe)->dl_master;
OCFS_BH_PUT_DATA(*b);
if (!IS_VALID_NODE_NUM (curr_master)) {
@@ -995,26 +1063,16 @@
goto finito;
}
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
- if (((1 << i) & oin_node_map) &&
- IS_NODE_ALIVE (osb->publ_map, i, OCFS_MAXIMUM_NODES)) {
- votemap |= (1ULL << i);
- }
- }
+#if 0
+ /* publish_map & openmap for operations that dont need broadcast */
+ if (!(flags & (FLAG_FILE_RENAME | FLAG_FILE_DELETE |
+ FLAG_FILE_RELEASE_DENTRY | FLAG_FILE_TRUNCATE |
+ FLAG_FILE_EXTEND)))
+ ocfs_node_map_and(&votemap, &openmap);
+#endif
- /* Send an update to all nodes alive, can be optimized later TODO */
- if ((flags & FLAG_FILE_RENAME) || (flags & FLAG_FILE_DELETE)
- || (flags & FLAG_RELEASE_DENTRY) || (flags & FLAG_FILE_TRUNCATE)
- || (flags & FLAG_FILE_EXTEND))
- votemap = osb->publ_map;
-
- /* TODO: figure out how to properly handle inode updates w/no oin */
- votemap = osb->publ_map; // temporary hack, forces broadcast
-
- /* remove current node from the votemap */
- votemap &= ~(1ULL << osb->node_num);
-
- if (votemap == 0ULL)
+ ocfs_node_map_clear_bit(&votemap, osb->node_num);
+ if (ocfs_node_map_is_empty(&votemap))
goto finally;
if (!(flags & FLAG_FILE_UPDATE_OIN))
@@ -1045,9 +1103,9 @@
LOG_ERROR_ARGS("DISKVOTE!!: lock_type=%u, flags=%08x, offset=%llu, inode=%llu\n",
lock_type, flags, lock_id, inode?GET_INODE_FEOFF(inode):0ULL);
- LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d, romap=%08x\n",
+ LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d\n",
osb->node_num, lockres->master_node_num, lockres->lock_type,
- lockres->readonly_node, (__u32)(lockres->readonly_map & 0xFFFFFFFFULL));
+ lockres->readonly_node);
ocfs_show_trace(NULL);
@@ -1078,20 +1136,20 @@
LOG_TRACE_ARGS("writing lock now... releasemaster: %s, level: %d, master: %d\n",
flags & FLAG_FILE_RELEASE_MASTER ? "yes" : "no",
- DISK_LOCK_FILE_LOCK (fe),
- DISK_LOCK_CURRENT_MASTER (fe));
+ DISK_LOCK(fe)->dl_level,
+ DISK_LOCK(fe)->dl_master);
if (flags & FLAG_FILE_RELEASE_MASTER)
- DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM;
+ DISK_LOCK(fe)->dl_master = OCFS_INVALID_NODE_NUM;
- if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
- (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) {
+ if ((DISK_LOCK(fe)->dl_level == OCFS_DLM_ENABLE_CACHE_LOCK) &&
+ (DISK_LOCK(fe)->dl_master == osb->node_num)) {
lockres->lock_type = OCFS_DLM_ENABLE_CACHE_LOCK;
cachelock = 1;
LOG_TRACE_STR("keeping at CACHE_LOCK");
}
else {
LOG_TRACE_STR("setting to NO_LOCK");
- DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
}
OCFS_BH_PUT_DATA(*b);
@@ -1201,10 +1259,11 @@
/* inode is definitely non NULL */
int new_lock_function(ocfs_super * osb, __u32 requested_lock, __u32 flags, struct buffer_head *bh, int *disk_vote, struct inode *inode)
{
- __u64 vote_map = 0ULL, open_map = 0ULL;
+ ocfs_node_map vote_map;
+ ocfs_node_map open_map;
__u64 lockseqnum = 0;
int tmpstat;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
__u64 lock_id;
__u32 lock_write_flags = DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | DLOCK_FLAG_OPEN_MAP;
__u32 lock_type = requested_lock;
@@ -1244,33 +1303,37 @@
if (bh) {
fe = OCFS_BH_GET_DATA_READ(bh);
/* may not need to rewrite the lock later if we already have a cachelock */
- need_lock_write = (DISK_LOCK_CURRENT_MASTER (fe) != osb->node_num ||
- DISK_LOCK_FILE_LOCK (fe) != OCFS_DLM_ENABLE_CACHE_LOCK);
+ need_lock_write = (DISK_LOCK(fe)->dl_master != osb->node_num ||
+ DISK_LOCK(fe)->dl_level != OCFS_DLM_ENABLE_CACHE_LOCK);
OCFS_BH_PUT_DATA(bh);
} else
need_lock_write = 0;
need_to_zap_buffers = 0;
}
+ ocfs_node_map_init(osb, &open_map);
+
/* that's why it's called fast path */
if (flags & FLAG_FAST_PATH_LOCK)
goto vote_success;
-
+
/* figure out who to vote with */
- if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE | FLAG_FILE_RENAME
- | FLAG_RELEASE_DENTRY))
- vote_map = osb->publ_map; /* broadcast */
- else if (flags & FLAG_DROP_READONLY) {
- /* all nodes that see this is readonly */
- vote_map = osb->publ_map & lockres->readonly_map;
+ if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE |
+ FLAG_FILE_RENAME | FLAG_DROP_READONLY |
+ FLAG_RELEASE_DENTRY)) {
+ ocfs_node_map_dup(osb, &vote_map, &osb->publ_map); /* broadcast */
+ /* only nodes that see this is readonly */
+ if (flags & FLAG_DROP_READONLY)
+ ocfs_node_map_and(&vote_map, &lockres->readonly_map);
} else {
- vote_map = (1 << lockres->master_node_num); /* just owner */
+ ocfs_node_map_init(osb, &vote_map);
+ ocfs_node_map_set_bit(&vote_map, lockres->master_node_num); /* just owner */
lock_type = lockres->lock_type;
}
- vote_map &= ~(1 << osb->node_num); // remove this node
- if (vote_map == 0) {
+ ocfs_node_map_clear_bit(&vote_map, osb->node_num);
+ if (ocfs_node_map_is_empty(&vote_map)) {
/* As this is the only node alive, make it master of the lock */
/* no need to update open map */
lock_write_flags &= ~(DLOCK_FLAG_OPEN_MAP);
@@ -1300,9 +1363,9 @@
LOG_ERROR_ARGS("DISKVOTE!!: req_lock=%u, flags=%08x, offset=%llu, inode=%llu\n",
requested_lock, flags, lock_id, inode?GET_INODE_FEOFF(inode):0ULL);
- LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d, romap=%08x\n",
+ LOG_ERROR_ARGS("DISKVOTE!!: this=%d, master=%d, locktype=%d, ronode=%d\n",
osb->node_num, lockres->master_node_num, lockres->lock_type,
- lockres->readonly_node, (__u32)(lockres->readonly_map & 0xFFFFFFFFULL));
+ lockres->readonly_node);
ocfs_show_trace(NULL);
@@ -1318,7 +1381,7 @@
vote_success:
if (status >= 0 && flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE))
- lockres->oin_openmap = open_map;
+ ocfs_node_map_set(&lockres->oin_openmap, &open_map);
if (need_to_zap_buffers)
ocfs_break_cache_lock_zap_buffers(osb, inode);
@@ -1329,7 +1392,7 @@
/* drop readonly should remove anyone who has responded */
if (flags & FLAG_DROP_READONLY) {
- lockres->readonly_map = vote_map;
+ ocfs_node_map_set(&lockres->readonly_map, &vote_map);
goto bail;
}
@@ -1354,7 +1417,8 @@
/* pick up any change to the open map */
if (bh) {
fe = OCFS_BH_GET_DATA_READ(bh);
- lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe),
+ &lockres->oin_openmap);
OCFS_BH_PUT_DATA(bh);
}
bail:
@@ -1387,7 +1451,7 @@
int status = 0;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- if (lockres->readonly_map == 0ULL)
+ if (ocfs_node_map_is_empty(&lockres->readonly_map))
return status;
status = ocfs_drop_readonly_cache_lock(osb, inode, 0);
return status;
Added: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/dlm.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,62 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlm.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_DLM_H
+#define OCFS2_DLM_H
+
+int new_lock_function(ocfs_super *osb, __u32 requested_lock,
+ __u32 flags, struct buffer_head *bh,
+ int *disk_vote, struct inode *inode);
+int ocfs_acquire_lock(ocfs_super *osb, __u32 lock_type,
+ __u32 flags, struct buffer_head **bh,
+ struct inode *inode);
+int ocfs_break_cache_lock_zap_buffers(ocfs_super *osb,
+ struct inode *inode);
+void ocfs_compute_dlm_stats(int status, int vote_status,
+ ocfs_dlm_stats *stats);
+int ocfs_release_lock(ocfs_super *osb, __u32 lock_type,
+ __u32 flags, struct buffer_head *bh,
+ struct inode *inode);
+int ocfs_update_disk_lock(ocfs_super *osb, __u32 flags,
+ struct buffer_head **bh, struct inode *inode,
+ ocfs_journal_handle *handle);
+int ocfs_update_master_on_open(ocfs_super *osb, struct inode *inode,
+ ocfs_journal_handle *handle);
+int ocfs_wait_for_lock_release(ocfs_super *osb, __u64 offset,
+ __u32 time_to_wait, __u32 lock_type,
+ struct inode *inode);
+void ocfs_set_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
+ ocfs_node_map *vote_map);
+void ocfs_get_publish_vote_map(ocfs_super *osb, ocfs_publish *publish,
+ ocfs_node_map *vote_map);
+void ocfs_set_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
+ ocfs_node_map *open_map);
+void ocfs_get_disk_lock_open_map(ocfs_super *osb, ocfs2_disk_lock *lock,
+ ocfs_node_map *open_map);
+
+#endif /* OCFS2_DLM_H */
Modified: trunk/src/extmap.c
===================================================================
--- trunk/src/extmap.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/extmap.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* extmap.c
*
* Creates, adds, coalesces, deletes extent maps
@@ -24,11 +26,17 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "extmap.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_EXTMAP
/* Crazy wacky extent map stuff */
@@ -449,11 +457,11 @@
if ((ByteCount) && (ByteCount != 0xFFFFFFFF)) {
ByteCount--;
- ByteCount >>= osb->sect_size_bits;
+ ByteCount >>= osb->sb->s_blocksize_bits;
ByteCount++;
}
- Vbo >>= osb->sect_size_bits;
+ Vbo >>= osb->sb->s_blocksize_bits;
ocfs_extent_map_remove (Map, Vbo, (__s64) ByteCount);
@@ -510,15 +518,15 @@
Results = ocfs_extent_map_next_entry (Map, RunIndex, Vbo, Lbo,
&LiSectorCount);
- (*Vbo) <<= osb->sect_size_bits;
- (*Lbo) <<= osb->sect_size_bits;
+ (*Vbo) <<= osb->sb->s_blocksize_bits;
+ (*Lbo) <<= osb->sb->s_blocksize_bits;
if (LiSectorCount > 0x00000000ffffffffLL) {
/* overflow */
Results = 0;
*SectorCount = 0xffffffffUL;
} else {
- *SectorCount = ((__u32)(LiSectorCount & 0xFFFFFFFFULL)) << osb->sect_size_bits;
+ *SectorCount = ((__u32)(LiSectorCount & 0xFFFFFFFFULL)) << osb->sb->s_blocksize_bits;
}
LOG_EXIT_INT (Results);
@@ -542,12 +550,12 @@
LOG_ENTRY ();
/* Sector Align the vbo */
- remainder = Vbo & (osb->sect_size - 1);
+ remainder = Vbo & (osb->sb->s_blocksize - 1);
- ret = ocfs_extent_map_lookup (Map, (Vbo >> osb->sect_size_bits), &LiLbo,
+ ret = ocfs_extent_map_lookup (Map, (Vbo >> osb->sb->s_blocksize_bits), &LiLbo,
&LiSectorCount, Index);
if ((__u32) LiLbo != -1) {
- *Lbo = (((__s64) LiLbo) << (__s64) osb->sect_size_bits);
+ *Lbo = (((__s64) LiLbo) << (__s64) osb->sb->s_blocksize_bits);
if (ret) {
*Lbo += remainder;
}
@@ -558,7 +566,7 @@
*SectorCount = LiSectorCount;
if (*SectorCount) {
- *SectorCount <<= (__s64) osb->sect_size_bits;
+ *SectorCount <<= (__s64) osb->sb->s_blocksize_bits;
if (*SectorCount == 0) {
*SectorCount = (__u32) - 1;
}
@@ -567,6 +575,7 @@
*SectorCount -= remainder;
}
}
+
LOG_EXIT_INT (ret);
return ret;
@@ -576,32 +585,31 @@
/* ocfs_update_extent_map()
*
*/
-int ocfs_update_extent_map (ocfs_super * osb, ocfs_extent_map * Map, void *Buffer, __s64 * localVbo, __u64 * remainingLength, ocfs_ext_flag Flag)
+int ocfs_update_extent_map(ocfs_super *osb, ocfs_extent_map *Map,
+ void *Buffer, __s64 *localVbo,
+ __u64 *remainingLength, ocfs_ext_flag Flag)
{
int ret = -EFAIL;
- ocfs_file_entry *FileEntry;
- ocfs_extent_group *OcfsExtent;
+ ocfs2_dinode *fe;
+ ocfs2_extent_block *eb;
+ ocfs2_extent_list *el;
__s64 tempVbo;
- __s64 tempLbo;
__u64 tempSize;
int j;
LOG_ENTRY ();
if (Flag == LOCAL_EXT) {
- FileEntry = Buffer;
+ fe = Buffer;
+ el = &fe->id2.i_list;
- OCFS_ASSERT (FileEntry->local_ext);
+ OCFS_ASSERT(el->l_tree_depth < 0);
- for (j = 0; j < FileEntry->next_free_ext; j++) {
- tempVbo = FileEntry->extents[j].file_off;
- tempLbo = FileEntry->extents[j].disk_off;
- tempSize = FileEntry->extents[j].num_bytes;
-
+ for (j = 0; j < el->l_next_free_rec; j++) {
/* Add the Extent to extent map list */
- ret =
- ocfs_add_extent_map_entry (osb, Map, tempVbo, tempLbo,
- tempSize);
+ ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+ Map,
+ &el->l_recs[j]);
if (!ret) {
LOG_ERROR_STATUS (ret = -ENOMEM);
goto bail;
@@ -611,21 +619,18 @@
} else {
__u64 localLength = 0;
- OcfsExtent = Buffer;
+ eb = Buffer;
+ el = &eb->h_list;
- for (j = 0; j < OcfsExtent->next_free_ext; j++) {
- if ((__s64) (OcfsExtent->extents[j].file_off +
- OcfsExtent->extents[j].num_bytes) >
- (*localVbo)) {
- tempVbo = OcfsExtent->extents[j].file_off;
- tempLbo = OcfsExtent->extents[j].disk_off;
- tempSize = OcfsExtent->extents[j].num_bytes;
+ for (j = 0; j < el->l_next_free_rec; j++) {
+ if ((__s64)((u64)(el->l_recs[j].e_cpos + el->l_recs[j].e_clusters) << osb->s_clustersize_bits) > (*localVbo)) {
+ tempVbo = (u64)el->l_recs[j].e_cpos << osb->s_clustersize_bits;
+ tempSize = (u64)el->l_recs[j].e_clusters << osb->s_clustersize_bits;
/* Add the Extent to extent map list */
- ret =
- ocfs_add_extent_map_entry (osb, Map, tempVbo,
- tempLbo,
- (__u32) tempSize);
+ ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+ Map,
+ &el->l_recs[j]);
if (!ret) {
LOG_ERROR_STATUS (ret = -ENOMEM);
goto bail;
@@ -647,10 +652,10 @@
}
}
- if ((OcfsExtent->next_free_ext != OCFS_MAX_DATA_EXTENTS) &&
+ if ((el->l_next_free_rec != el->l_count) &&
(*remainingLength)) {
- LOG_ERROR_ARGS ("next_free_extent=%d, rem_len=%llu",
- OcfsExtent->next_free_ext, *remainingLength);
+ LOG_ERROR_ARGS ("l_next_free_rec=%d, rem_len=%llu",
+ el->l_next_free_rec, *remainingLength);
} else
ret = 0;
}
@@ -660,7 +665,29 @@
return ret;
} /* ocfs_update_extent_map */
+
/*
+ * ocfs_add_extent_map_entry_from_rec()
+ *
+ * Add and entry using the fields of an ocfs2_extent_rec.
+ *
+ * FIXME This should be coalesced with ocfs_add_extent_map_entry
+ * once all callers use this function.
+ */
+int ocfs_add_extent_map_entry_from_rec(struct super_block *sb,
+ ocfs_extent_map *extmap,
+ ocfs2_extent_rec *rec)
+{
+ u64 vbo = (u64)rec->e_cpos << OCFS_SB(sb)->s_clustersize_bits;
+ u64 lbo = rec->e_blkno << sb->s_blocksize_bits;
+ u64 bytes =
+ (u64)rec->e_clusters << OCFS_SB(sb)->s_clustersize_bits;
+ return ocfs_add_extent_map_entry(OCFS_SB(sb), extmap,
+ vbo, lbo, bytes);
+}
+
+
+/*
* ocfs_add_extent_map_entry()
*
* adds an entry to an extent map
@@ -669,6 +696,7 @@
*/
int ocfs_add_extent_map_entry (ocfs_super * osb, ocfs_extent_map * Map, __s64 Vbo, __s64 Lbo, __u64 ByteCount)
{
+ /* FIXME take ocfs2_extent_rec */
int ret;
LOG_ENTRY ();
@@ -677,13 +705,13 @@
if (ByteCount) {
/* Round up sectors */
ByteCount--;
- ByteCount >>= osb->sect_size_bits;
+ ByteCount >>= osb->sb->s_blocksize_bits;
ByteCount++;
}
/* Make the ByteOffsets in to Sector numbers. */
- Vbo >>= osb->sect_size_bits;
- Lbo >>= osb->sect_size_bits;
+ Vbo >>= osb->sb->s_blocksize_bits;
+ Lbo >>= osb->sb->s_blocksize_bits;
ret = ocfs_extent_map_add (Map, ((__s64) Vbo), ((__s64) Lbo), ((__s64) ByteCount));
if (!ret)
Added: trunk/src/extmap.h
===================================================================
--- trunk/src/extmap.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/extmap.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,51 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * extmap.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_EXTMAP_H
+#define OCFS2_EXTMAP_H
+
+int ocfs_add_extent_map_entry_from_rec(struct super_block *sb,
+ ocfs_extent_map *extmap,
+ ocfs2_extent_rec *rec);
+int ocfs_add_extent_map_entry(ocfs_super *osb, ocfs_extent_map *Map,
+ __s64 Vbo, __s64 Lbo, __u64 ByteCount);
+void ocfs_delete_all_extent_maps(ocfs_super *osb, struct inode *oin);
+void ocfs_extent_map_destroy(ocfs_extent_map *map);
+void ocfs_extent_map_init(ocfs_extent_map *map);
+int ocfs_get_next_extent_map_entry(ocfs_super *osb,
+ ocfs_extent_map *Map, __u32 RunIndex,
+ __s64 *Vbo, __s64 *Lbo,
+ __u32 *SectorCount);
+int ocfs_lookup_extent_map_entry(ocfs_super *osb, ocfs_extent_map *Map,
+ __s64 Vbo, __s64 *Lbo,
+ __u64 *SectorCount, __u32 *Index);
+int ocfs_update_extent_map(ocfs_super *osb, ocfs_extent_map *Map,
+ void *Buffer, __s64 *localVbo,
+ __u64 *remainingLength, ocfs_ext_flag Flag);
+
+#endif /* OCFS2_EXTMAP_H */
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/file.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* file.c
*
* File open, close, extend, truncate
@@ -24,12 +26,27 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "file.h"
+#include "sysfile.h"
+#include "inode.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_FILE
extern struct semaphore recovery_list_sem;
@@ -38,20 +55,23 @@
static int ocfs_change_file_attrib (ocfs_super * osb, __u64 file_off, struct iattr *attr, struct inode *inode);
static int ocfs_truncate_file (ocfs_super * osb, __u64 file_off, __u64 file_size, struct inode *inode);
-static void ocfs_fe_set_attributes(ocfs_file_entry *fe, struct iattr *attr)
+
+static void ocfs_fe_set_attributes(ocfs2_dinode *fe, struct iattr *attr)
{
if (attr->ia_valid & ATTR_SIZE)
- fe->file_size = attr->ia_size;
+ fe->i_size = attr->ia_size;
if (attr->ia_valid & ATTR_UID)
- fe->uid = attr->ia_uid;
+ fe->i_uid = attr->ia_uid;
if (attr->ia_valid & ATTR_GID)
- fe->gid = attr->ia_gid;
+ fe->i_gid = attr->ia_gid;
if (attr->ia_valid & ATTR_MODE)
- fe->prot_bits = attr->ia_mode & 0007777;
+ fe->i_mode = attr->ia_mode;
if (attr->ia_valid & ATTR_CTIME)
- fe->create_time = ocfs_get_seconds(attr->ia_ctime);
+ fe->i_ctime = ocfs_get_seconds(attr->ia_ctime);
+ if (attr->ia_valid & ATTR_ATIME)
+ fe->i_atime = ocfs_get_seconds(attr->ia_atime);
if (attr->ia_valid & ATTR_MTIME)
- fe->modify_time = ocfs_get_seconds(attr->ia_mtime);
+ fe->i_mtime = ocfs_get_seconds(attr->ia_mtime);
}
static inline int ocfs_sync_inode(struct inode *inode)
@@ -82,7 +102,7 @@
ocfs_journal_handle *handle, struct inode *inode)
{
int status = 0;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
int local_handle = 1;
LOG_ENTRY ();
@@ -99,9 +119,10 @@
}
/* why do we update these here? */
- OCFS_I(inode)->alloc_size = fe->alloc_size;
- OCFS_I(inode)->chng_seq_num = DISK_LOCK_SEQNUM (fe);
- if (fe->this_sector == 0)
+ OCFS_I(inode)->alloc_size =
+ (u64)fe->i_clusters << osb->s_clustersize_bits;
+ OCFS_I(inode)->chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
+ if (fe->i_blkno == 0)
LOG_ERROR_STR ("this_sector=0");
OCFS_BH_PUT_DATA(fe_bh);
@@ -383,7 +404,6 @@
}
bail:
-
if (dec)
iput(inode);
@@ -395,17 +415,16 @@
* ocfs_inode_fill_ext_map()
*
*/
-int ocfs_inode_fill_ext_map(ocfs_super * osb, struct buffer_head * fe_bh, struct inode *inode)
+int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
+ struct inode *inode)
{
int ret = 0;
int j;
- __s64 tempVbo;
- __s64 tempLbo;
- __u64 tempSize;
__u64 tempoff;
- ocfs_extent_group *extent = NULL;
- struct buffer_head *extent_bh = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_extent_block *eb = NULL;
+ ocfs2_extent_list *el;
+ struct buffer_head *eb_bh = NULL;
+ ocfs2_dinode *fe = NULL;
void *tmp;
LOG_ENTRY ();
@@ -413,68 +432,63 @@
/* get a copy of fe, used readonly in this path and */
/* ocfs_create_new_oin will deadlock if fe_bh is locked */
- fe = ocfs_allocate_file_entry();
+#warning markflar, fix the fe copy
+ fe = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
if (fe == NULL) {
LOG_ERROR_STATUS(ret = -ENOMEM);
goto leave;
}
tmp = OCFS_BH_GET_DATA_READ(fe_bh);
- memcpy(fe, tmp, sizeof(ocfs_file_entry));
+ memcpy(fe, tmp, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(fe_bh);
- tempoff = fe->this_sector;
+ tempoff = fe->i_blkno << osb->sb->s_blocksize_bits;
+#warning I think we DO want extent map stuff for dirs now
+#if 0
/* we don't want to do the extent map stuff for a directory. */
- if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+ if (S_ISDIR(fe->i_mode))
goto leave;
+#endif
- if (fe->local_ext) {
- for (j = 0; j < fe->next_free_ext; j++) {
- tempVbo = fe->extents[j].file_off;
- tempLbo = fe->extents[j].disk_off;
- tempSize = fe->extents[j].num_bytes;
-
+ el = &fe->id2.i_list;
+ if (el->l_tree_depth < 0) {
+ for (j = 0; j < el->l_next_free_rec; j++) {
/* Add the Extent to extent map */
- ret = ocfs_add_extent_map_entry (osb,
- &OCFS_I(inode)->map,
- tempVbo,
- tempLbo,
- tempSize);
+ ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+ &OCFS_I(inode)->map,
+ &el->l_recs[j]);
if (!ret) {
LOG_ERROR_STATUS (ret = -ENOMEM);
goto leave;
}
}
} else {
- __u64 next_data_ext;
+ u64 next_leaf;
/* Extents are branched and we are no longer using */
/* Local Extents for this File Entry. */
- ret = ocfs_get_leaf_extent (osb, fe, 0, &extent_bh, inode);
+ ret = ocfs_get_leaf_extent (osb, fe, 0, &eb_bh, inode);
if (ret < 0) {
LOG_ERROR_STATUS (ret);
goto leave;
}
while (1) {
- extent = OCFS_BH_GET_DATA_READ(extent_bh);
+ eb = OCFS_BH_GET_DATA_READ(eb_bh);
+ el = &eb->h_list;
- if (!IS_VALID_EXTENT_DATA (extent)) {
+ if (!IS_VALID_EXTENT_BLOCK(eb) ||
+ (el->l_tree_depth != -1)) {
LOG_ERROR_STATUS(ret = -EFAIL);
goto leave;
}
- for (j = 0; j < extent->next_free_ext; j++) {
- tempVbo = extent->extents[j].file_off;
- tempLbo = extent->extents[j].disk_off;
- tempSize = extent->extents[j].num_bytes;
-
+ for (j = 0; j < el->l_next_free_rec; j++) {
/* Add the Extent to extent map */
- ret = ocfs_add_extent_map_entry (osb,
- &OCFS_I(inode)->map,
- tempVbo,
- tempLbo,
- tempSize);
+ ret = ocfs_add_extent_map_entry_from_rec(osb->sb,
+ &OCFS_I(inode)->map,
+ &el->l_recs[j]);
if (!ret) {
LOG_ERROR_STATUS (ret =
-ENOMEM);
@@ -482,19 +496,19 @@
}
}
- if (extent->next_data_ext > 0) {
- if (!extent->next_data_ext) {
+ if (eb->h_next_leaf_blk > 0) {
+ if (!eb->h_next_leaf_blk) {
LOG_ERROR_STATUS (ret = -EFAIL);
goto leave;
}
- next_data_ext = extent->next_data_ext;
- OCFS_BH_PUT_DATA(extent_bh);
- extent = NULL;
- extent_bh = NULL;
+ next_leaf = eb->h_next_leaf_blk;
+ OCFS_BH_PUT_DATA(eb_bh);
+ eb = NULL;
+ eb_bh = NULL;
ret = ocfs_read_bh(osb,
- next_data_ext,
- &extent_bh,
+ next_leaf << osb->sb->s_blocksize_bits,
+ &eb_bh,
OCFS_BH_COND_CACHED, inode);
if (ret < 0) {
LOG_ERROR_STATUS(ret);
@@ -508,12 +522,12 @@
leave:
/* this fe was a copy */
if (fe)
- ocfs_release_file_entry (fe);
+ kfree(fe);
- if (extent_bh) {
- if (extent)
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
+ if (eb_bh) {
+ if (eb)
+ OCFS_BH_PUT_DATA(eb_bh);
+ brelse(eb_bh);
}
LOG_EXIT_STATUS (ret);
@@ -569,12 +583,11 @@
{
int status = 0;
int tmpstat;
- ocfs_file_entry *fileEntry = NULL;
+ ocfs2_dinode *fileEntry = NULL;
int have_disk_lock = 0;
__u32 lockFlags = 0, locktype = 0;
struct buffer_head *bh = NULL;
ocfs_journal_handle *handle = NULL;
- int flags = 0;
LOG_ENTRY ();
@@ -593,8 +606,8 @@
goto leave;
}
- if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) &&
- (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num))
+ if ((DISK_LOCK(fileEntry)->dl_level == OCFS_DLM_ENABLE_CACHE_LOCK) &&
+ (DISK_LOCK(fileEntry)->dl_master == osb->node_num))
locktype = OCFS_DLM_ENABLE_CACHE_LOCK;
else
locktype = OCFS_DLM_EXCLUSIVE_LOCK;
@@ -630,15 +643,14 @@
fileEntry = OCFS_BH_GET_DATA_WRITE(bh);
- fileEntry->modify_time = OCFS_CURRENT_TIME;
+ fileEntry->i_mtime = OCFS_CURRENT_TIME;
- DISK_LOCK_SEQNUM (fileEntry) = 0;
+ DISK_LOCK(fileEntry)->dl_seq_num = 0;
ocfs_fe_set_attributes(fileEntry, attr);
/* Set the valid bit here */
- SET_VALID_BIT (fileEntry->sync_flags);
+ fileEntry->i_flags |= OCFS2_VALID_FL;
- flags = OCFS_FE_CACHE_FLAGS(osb, fileEntry);
OCFS_BH_PUT_DATA(bh);
status = ocfs_journal_dirty(handle, bh);
@@ -795,7 +807,8 @@
("Will need more allocation: have=%llu, need=%llu\n",
OCFS_I(inode)->alloc_size, newsize);
down_write(&OCFS_I(inode)->ip_io_sem);
- status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode, NULL);
+ status = ocfs_extend_file (osb, newsize, GET_INODE_FEOFF(inode), NULL, inode,
+ NULL, 0, NULL);
up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
@@ -827,7 +840,7 @@
LOG_TRACE_STR
("Generic_file_write ok, asking for OIN update now");
inode->i_size = newsize;
- inode->i_blocks = (newsize + sb->s_blocksize) >> sb->s_blocksize_bits;
+ inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
up(&osb->extend_sem);
}
@@ -918,12 +931,11 @@
static int ocfs_truncate_file (ocfs_super * osb, __u64 file_off, __u64 file_size, struct inode *inode)
{
int status = 0, tmpstat;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
__u32 lockFlags = 0, locktype = 0;
int have_disk_lock = 0;
__u64 new_alloc_size;
struct buffer_head *bh = NULL;
- int flags = 0;
ocfs_journal_handle *handle = NULL;
LOG_ENTRY_ARGS ("(file_off = %llu, file_size = %llu\n",
@@ -957,8 +969,8 @@
lockFlags = FLAG_FILE_TRUNCATE;
- if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK)
- && (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num))
+ if ((DISK_LOCK(fe)->dl_level == OCFS_DLM_ENABLE_CACHE_LOCK)
+ && (DISK_LOCK(fe)->dl_master == osb->node_num))
locktype = OCFS_DLM_ENABLE_CACHE_LOCK;
else
locktype = OCFS_DLM_EXCLUSIVE_LOCK;
@@ -990,9 +1002,9 @@
* waiting on the lock). make sure the size is still a valid
* one. This really ought to check for other things too, like
* a valid bit, etc. */
- if (file_size > fe->file_size) {
+ if (file_size > fe->i_size) {
LOG_TRACE_ARGS("asked to truncate file with size (%llu) "
- "to size (%llu)!\n", fe->file_size,
+ "to size (%llu)!\n", fe->i_size,
file_size);
OCFS_BH_PUT_DATA(bh);
up(&OCFS_I(inode)->priv_sem);
@@ -1001,8 +1013,8 @@
goto leave;
}
- fe->file_size = file_size;
- fe->alloc_size = new_alloc_size;
+ fe->i_size = file_size;
+ fe->i_clusters = (u32)(new_alloc_size >> osb->s_clustersize_bits);
status = ocfs_free_extents_for_truncate (osb, fe, handle, inode);
if (status < 0) {
@@ -1012,11 +1024,10 @@
goto leave;
}
- DISK_LOCK_SEQNUM (fe) = 0;
- SET_VALID_BIT (fe->sync_flags);
- fe->modify_time = OCFS_CURRENT_TIME;
+ DISK_LOCK(fe)->dl_seq_num = 0;
+ fe->i_flags |= OCFS2_VALID_FL;
+ fe->i_mtime = OCFS_CURRENT_TIME;
- flags = OCFS_FE_CACHE_FLAGS(osb, fe);
OCFS_BH_PUT_DATA(bh);
status = ocfs_journal_dirty(handle, bh);
@@ -1060,14 +1071,17 @@
/* ocfs_extend_file()
*
*/
-int ocfs_extend_file (ocfs_super * osb, __u64 file_size, __u64 file_off, ocfs_journal_handle *passed_handle, struct inode *inode, struct iattr *attr)
+int ocfs_extend_file(ocfs_super *osb, __u64 file_size, __u64 file_off,
+ ocfs_journal_handle *passed_handle,
+ struct inode *inode, struct iattr *attr,
+ int system_file, struct buffer_head *fe_bh)
{
int status = 0;
int tmpstat;
- ocfs_file_entry *fileEntry = NULL;
+ ocfs2_dinode *fe = NULL;
__u64 tempOffset = 0;
- __u64 allocSize = 0;
- __u32 size;
+ __u64 current_alloc;
+ __u64 alloc_size = 0;
__u64 bitmapOffset = 0;
__u64 numClustersAlloc = 0;
__u32 lockFlags = 0;
@@ -1076,12 +1090,12 @@
__u64 actualDiskOffset = 0;
__u64 actualLength = 0;
struct buffer_head *bh = NULL;
- int flags = 0;
ocfs_journal_handle *handle = NULL;
int credits;
struct inode *ext_alloc_inode = NULL;
- LOG_ENTRY ();
+ LOG_ENTRY_ARGS("(off=%llu, file_size=%llu, system=%s)\n",
+ file_off, file_size, system_file?"yes":"no");
if (!inode)
BUG();
@@ -1089,29 +1103,36 @@
if (file_size == 0)
goto leave;
- status = ocfs_read_bh (osb, file_off, &bh, OCFS_BH_CACHED, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
+ if (fe_bh)
+ bh = fe_bh;
+ else {
+ status = ocfs_read_bh (osb, file_off, &bh, OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
}
- fileEntry = OCFS_BH_GET_DATA_READ(bh);
+ fe = OCFS_BH_GET_DATA_READ(bh);
- if (!IS_VALID_FILE_ENTRY(fileEntry)) {
- printk("fe->signature=%8s\n", fileEntry->signature);
+ if (!IS_VALID_FILE_ENTRY(fe)) {
+ printk("fe->signature=%8s\n", fe->i_signature);
LOG_ERROR_ARGS ("Invalid fe at offset %llu", file_off);
status = -EFAIL;
OCFS_BH_PUT_DATA(bh);
goto leave;
}
- allocSize = file_size - fileEntry->alloc_size;
+ current_alloc = (u64)fe->i_clusters << osb->s_clustersize_bits;
+ alloc_size = file_size - current_alloc;
OCFS_BH_PUT_DATA(bh);
- fileEntry = NULL;
+ fe = NULL;
+ LOG_TRACE_ARGS("current_alloc=%llu, alloc_size=%llu\n",
+ current_alloc, alloc_size);
if (passed_handle == NULL) {
- credits = ocfs_calc_extend_credits(((__u32) allocSize),
- osb->vol_layout.cluster_size);
+ credits = ocfs_calc_extend_credits(osb->sb,
+ (__u32) alloc_size);
/* cannot call start_trans with a locked buffer head. */
handle = ocfs_start_trans(osb, credits);
@@ -1126,7 +1147,7 @@
/* Grab a lock on the entry found if we have more than
* 1 extents and also make this node the master*/
- fileEntry = OCFS_BH_GET_DATA_READ(bh);
+ fe = OCFS_BH_GET_DATA_READ(bh);
/* now we always take an EXTEND lock */
lockFlags = FLAG_FILE_EXTEND;
@@ -1152,37 +1173,40 @@
goto leave;
}
- fileEntry = OCFS_BH_GET_DATA_WRITE(bh);
+ fe = OCFS_BH_GET_DATA_WRITE(bh);
- if (file_size > (__s64) fileEntry->alloc_size) {
- allocSize = file_size - fileEntry->alloc_size;
+ if (file_size > (__s64)current_alloc) {
+ alloc_size = file_size - current_alloc;
/* TODO: We can add something here so that after 2-3 allocations, */
- /* we give a lot more disk space to the file than the allocSize so */
+ /* we give a lot more disk space to the file than the alloc_size so */
/* in order to try to use the Extents of File Entry only and ofcourse */
/* the file will have more contigous disk space. */
- {
+ if (!system_file) {
int one_percentish_bits = 7;
- __u64 tempSize = fileEntry->alloc_size;
+ __u64 tempSize = current_alloc;
if (tempSize > ONE_MEGA_BYTE)
tempSize = ONE_MEGA_BYTE;
- allocSize += (tempSize * 2);
+ alloc_size += (tempSize * 2);
- if (allocSize < fileEntry->alloc_size >> one_percentish_bits) {
- allocSize = fileEntry->alloc_size >> one_percentish_bits;
- tempSize = allocSize;
+ if (alloc_size <
+ (current_alloc >> one_percentish_bits)) {
+ alloc_size = current_alloc >> one_percentish_bits;
+ tempSize = alloc_size;
// avoid using 64 bit mod
while (tempSize > (10*ONE_MEGA_BYTE))
tempSize -= (10*ONE_MEGA_BYTE);
tempSize = (10*ONE_MEGA_BYTE) - tempSize;
- allocSize += tempSize;
+ alloc_size += tempSize;
}
}
- status = ocfs_find_space(osb, allocSize, &bitmapOffset,
- &numClustersAlloc, 0, handle);
+ status = ocfs_find_space(osb, alloc_size, &bitmapOffset,
+ &numClustersAlloc, system_file, handle);
+ LOG_TRACE_ARGS("find_space: alloc_size=%llu, returned off=%llu, num=%llu\n",
+ alloc_size, bitmapOffset, numClustersAlloc);
if (status < 0) {
OCFS_BH_PUT_DATA(bh);
if (status != -ENOSPC && status != -EINTR)
@@ -1190,30 +1214,67 @@
goto leave;
}
- actualDiskOffset =
- (bitmapOffset << osb->cluster_size_bits) +
- osb->vol_layout.data_start_off;
- actualLength = numClustersAlloc << osb->cluster_size_bits;
+ actualDiskOffset = bitmapOffset << osb->s_clustersize_bits;
+ actualLength = numClustersAlloc << osb->s_clustersize_bits;
OCFS_BH_PUT_DATA(bh);
- fileEntry = NULL;
+ fe = NULL;
- {
+ if (system_file) {
+ struct buffer_head **bhs = NULL;
+ int numbhs = actualLength >> osb->sb->s_blocksize_bits;
+ int i;
+ char *data;
+
+ bhs = kmalloc(numbhs*sizeof(struct buffer_head *), GFP_KERNEL);
+ if (!bhs) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+ memset(bhs, 0, numbhs * sizeof(struct buffer_head *));
+
+ status = ocfs_read_bhs(osb, actualDiskOffset,
+ actualLength, bhs, 0, NULL);
+ if (status < 0) {
+ kfree(bhs);
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ for(i = 0; i < numbhs; i++) {
+ data = OCFS_BH_GET_DATA_WRITE(bhs[i]);
+ memset(data, 0, osb->sb->s_blocksize);
+ OCFS_BH_PUT_DATA(bhs[i]);
+ }
+
+ status = ocfs_write_bhs(osb, bhs, numbhs, 0, NULL);
+ for(i = 0; i < numbhs; i++)
+ brelse(bhs[i]);
+ kfree(bhs);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+ } else {
struct buffer_head *alloc_bh;
unsigned long block;
struct super_block *sb = osb->sb;
- for (block = actualDiskOffset >> osb->sect_size_bits;
- block < (actualDiskOffset+actualLength) >> osb->sect_size_bits;
+ for (block = (unsigned long)(actualDiskOffset >> sb->s_blocksize_bits);
+ block < (unsigned long)((actualDiskOffset+actualLength) >> sb->s_blocksize_bits);
block++) {
+ alloc_bh = sb_getblk(sb, block);
+ if (!alloc_bh) {
+ LOG_ERROR_STATUS(status=-EIO);
+ goto leave;
+ }
LOG_TRACE_ARGS("setting block %lu as new!\n", block);
- alloc_bh = getblk(OCFS_GET_BLOCKDEV(sb), block, sb->s_blocksize);
alloc_bh->b_state |= (1UL << BH_New);
brelse(alloc_bh);
}
}
-
- ext_alloc_inode = igrab(osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE]);
+ ext_alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
if (!ext_alloc_inode) {
status = -EFAIL;
LOG_ERROR_STATUS(status);
@@ -1221,40 +1282,53 @@
}
ocfs_handle_add_inode(handle, ext_alloc_inode);
- status = ocfs_allocate_extent (osb, bh, handle,
- actualDiskOffset, actualLength, inode);
+ status = ocfs_allocate_extent(osb, bh, handle,
+ actualDiskOffset >> osb->sb->s_blocksize_bits,
+ actualLength >> osb->s_clustersize_bits,
+ inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- fileEntry = OCFS_BH_GET_DATA_WRITE(bh);
+ fe = OCFS_BH_GET_DATA_WRITE(bh);
/* update the total allocation size here */
- fileEntry->alloc_size += actualLength;
+ fe->i_clusters +=
+ (u32)(actualLength >> osb->s_clustersize_bits);
down (&(OCFS_I(inode)->priv_sem));
- OCFS_I(inode)->alloc_size = fileEntry->alloc_size;
+ OCFS_I(inode)->alloc_size =
+ (u64)fe->i_clusters << osb->s_clustersize_bits;
up (&(OCFS_I(inode)->priv_sem));
/* no need to do OCFS_SECTOR_ALIGN once the allocation size is correct. */
- DISK_LOCK_SEQNUM (fileEntry) = 0;
+ DISK_LOCK(fe)->dl_seq_num = 0;
}
/* Update tha file size and add the new one to old one. */
- fileEntry->file_size = file_size;
- LOG_TRACE_ARGS("fileEntry->alloc_size = %llu\n", fileEntry->alloc_size);
+ fe->i_size = file_size;
+ LOG_TRACE_ARGS("fe: i_clusters = %u, i_size=%llu\n",
+ fe->i_clusters, fe->i_size);
+ /* NOTE: this is a bit of a hack; unlike regular files,
+ * system files do not have another opportunity to update
+ * the inode/i_private fields */
+ if (system_file) {
+ OCFS_I(inode)->alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
+ inode->i_size = fe->i_size;
+ inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+ }
+ LOG_TRACE_ARGS("inode: alloc_size=%llu, i_size=%llu\n",
+ OCFS_I(inode)->alloc_size, inode->i_size);
+
if (attr)
- ocfs_fe_set_attributes(fileEntry, attr);
+ ocfs_fe_set_attributes(fe, attr);
/* Set the Valid bit and reset the change bit here... TODO */
- SET_VALID_BIT (fileEntry->sync_flags);
- fileEntry->modify_time = OCFS_CURRENT_TIME;
+ fe->i_flags |= OCFS2_VALID_FL;
+ fe->i_mtime = OCFS_CURRENT_TIME;
- tempOffset = fileEntry->this_sector;
- size = osb->sect_size;
+ tempOffset = fe->i_blkno << osb->sb->s_blocksize_bits;
- flags = OCFS_FE_CACHE_FLAGS(osb, fileEntry);
-
OCFS_BH_PUT_DATA(bh);
status = ocfs_journal_dirty(handle, bh);
@@ -1292,7 +1366,7 @@
if (bh != NULL)
LOG_TRACE_ARGS("bh->b_count = %d\n",
atomic_read(&(bh->b_count)));
- if (bh != NULL)
+ if (bh != NULL && fe_bh == NULL)
brelse(bh);
if (ext_alloc_inode)
@@ -1389,7 +1463,7 @@
inode);
else {
status = ocfs_extend_file(osb, newsize, fileOff, NULL,
- inode, attr);
+ inode, attr, 0, NULL);
extended = 1;
}
if (status < 0) {
@@ -1404,7 +1478,7 @@
ocfs_delete_all_extent_maps(osb, inode);
}
inode->i_size = newsize;
- inode->i_blocks = (newsize + sb->s_blocksize) >> sb->s_blocksize_bits;
+ inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
up (&(OCFS_I(inode)->priv_sem));
}
@@ -1451,7 +1525,7 @@
generic_fillattr(inode, stat);
/* We set the blksize from the cluster size for performance */
- stat->blksize = osb->vol_layout.cluster_size;
+ stat->blksize = osb->s_clustersize;
bail:
LOG_EXIT_INT (err);
Added: trunk/src/file.h
===================================================================
--- trunk/src/file.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/file.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,57 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * file.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_FILE_H
+#define OCFS2_FILE_H
+
+int ocfs_extend_file (ocfs_super * osb, __u64 file_size, __u64 file_off,
+ ocfs_journal_handle *passed_handle,
+ struct inode *inode, struct iattr *attr,
+ int system_file, struct buffer_head *fe_bh);
+int ocfs_file_open(struct inode *inode, struct file *file);
+ssize_t ocfs_file_read(struct file *filp, char *buf, size_t count,
+ loff_t * ppos);
+int ocfs_file_release(struct inode *inode, struct file *file);
+ssize_t ocfs_file_write(struct file *filp, const char *buf,
+ size_t count, loff_t *ppos);
+int ocfs_flush(struct file *file);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+int ocfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat);
+#else
+int ocfs_getattr(struct dentry *dentry, struct iattr *attr);
+#endif
+int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
+ struct inode *inode);
+int ocfs_inode_notify_open(ocfs_super *osb, struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ struct inode *inode);
+int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
+int ocfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
+
+#endif /* OCFS2_FILE_H */
Modified: trunk/src/heartbeat.c
===================================================================
--- trunk/src/heartbeat.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/heartbeat.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* heartbeat.c
*
* Keeps track of alive nodes in the cluster.
@@ -24,18 +26,24 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
-#if !defined(USERSPACE_TOOL)
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
-#if defined(USERSPACE_TOOL)
-#include <format.h>
-#endif
+#include "alloc.h"
+#include "heartbeat.h"
+#include "util.h"
+#include "vote.h"
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HEARTBEAT
@@ -57,8 +65,8 @@
int status = 0;
ocfs_publish *publish = NULL;
int publish_idx = OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num;
- struct buffer_head **pub_bh = &osb->cfg_bhs[publish_idx];
- __u64 node_publ_off = osb->vol_layout.publ_sect_off + (osb->node_num * osb->sect_size);
+ struct buffer_head **pub_bh = &osb->autoconfig_bhs[publish_idx];
+ __u64 node_publ_off = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
LOG_ENTRY_ARGS ("(0x%p, %u, %s)\n", osb, flag,
read_publish ? "true" : "false");
@@ -81,8 +89,10 @@
publish->dirty = 0;
}
/* Write the current time in local node's publish sector */
- publish->time = ocfs_get_publish_time();
- publish->hbm[osb->node_num] = osb->hbm;
+ publish->time = jiffies;
+ /* Dissallow 0 */
+ if (!publish->time)
+ publish->time = 1;
spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
publish->comm_seq_num = OcfsGlobalCtxt.comm_seq_num;
spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
@@ -129,8 +139,8 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u)\n", osb, bhs, first_time);
- num_nodes = OCFS_MAXIMUM_NODES;
- node_map = &(osb->vol_node_map);
+ num_nodes = osb->max_nodes;
+ node_map = osb->vol_node_map;
/* First time thru, update buffer with timestamps for all nodes */
if (first_time) {
@@ -145,17 +155,15 @@
/* Refresh local buffers */
for (i = 0; i < num_nodes; i++) {
publish = OCFS_BH_GET_DATA_READ(bhs[i]);
- node_map->time[i] = publish->time;
+ node_map[i].time = publish->time;
if (publish->mounted && i != osb->node_num) {
printk("ocfs2: Adding %s (node %d) to "
"clustered device (%u,%u)\n",
osb->node_cfg_info[i]->node_name, i,
MAJOR(osb->sb->s_dev),
MINOR(osb->sb->s_dev));
- node_map->miss_cnt[i] = 0;
- UPDATE_PUBLISH_MAP(osb->publ_map, i,
- OCFS_PUBLISH_SET,
- num_nodes);
+ node_map[i].miss_cnt = 0;
+ ocfs_publish_map_set(&osb->publ_map, i);
}
OCFS_BH_PUT_DATA(bhs[i]);
}
@@ -173,18 +181,16 @@
/* Check if the node is hung or not by comparing the disk */
/* and memory timestamp values */
- if (node_map->time[i] == publish->time) {
- if (IS_NODE_ALIVE(osb->publ_map, i, num_nodes)) {
- if (atomic_read (&(node_map->dismount[i]))) {
- node_map->miss_cnt[i] = MISS_COUNT_VALUE;
- atomic_set (&(node_map->dismount[i]), 0);
- UPDATE_PUBLISH_MAP (osb->publ_map, i,
- OCFS_PUBLISH_CLEAR,
- num_nodes);
+ if (node_map[i].time == publish->time) {
+ if (ocfs_node_is_alive(&osb->publ_map, i)) {
+ if (atomic_read (&(node_map[i].dismount))) {
+ node_map[i].miss_cnt = MISS_COUNT_VALUE;
+ atomic_set (&(node_map[i].dismount), 0);
+ ocfs_publish_map_clear(&osb->publ_map, i);
} else
- (node_map->miss_cnt[i])++;
+ (node_map[i].miss_cnt)++;
- if (node_map->miss_cnt[i] > MISS_COUNT_VALUE) {
+ if (node_map[i].miss_cnt > MISS_COUNT_VALUE) {
#if !defined(USERSPACE_TOOL)
printk ("ocfs2: Removing %s (node %d) "
"from clustered device (%u,%u)\n",
@@ -193,9 +199,8 @@
MINOR(osb->sb->s_dev));
#endif
- SET_NODE_IN_RECOVERY(osb, i);
- UPDATE_PUBLISH_MAP (osb->publ_map, i,
- OCFS_PUBLISH_CLEAR, num_nodes);
+ ocfs_recovery_map_set(osb, i);
+ ocfs_publish_map_clear(&osb->publ_map, i);
/* Ok, we'd better recover him now...*/
ocfs_recovery_thread(osb, i);
@@ -203,34 +208,16 @@
}
} else {
#if !defined(USERSPACE_TOOL)
- if (!IS_NODE_ALIVE(osb->publ_map, i, num_nodes) && osb->node_num != i)
+ if (!ocfs_node_is_alive(&osb->publ_map, i) &&
+ (osb->node_num != i))
printk ("ocfs2: Adding %s (node %d) to clustered device (%u,%u)\n",
osb->node_cfg_info[i]->node_name, i,
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
#endif
- node_map->miss_cnt[i] = 0;
- node_map->time[i] = publish->time;
- UPDATE_PUBLISH_MAP (osb->publ_map, i, OCFS_PUBLISH_SET,
- num_nodes);
+ node_map[i].miss_cnt = 0;
+ node_map[i].time = publish->time;
+ ocfs_publish_map_set(&osb->publ_map, i);
-#if !defined(USERSPACE_TOOL)
- /* Update the multiple the other node wants us to beat */
- if ((publish->hbm[osb->node_num] != DISK_HBEAT_INVALID)
- && (osb->hbm > publish->hbm[osb->node_num])) {
- /* Go to the lowest multiplier any of the nodes */
- /* alive want us to heartbeat with. */
- osb->hbm = publish->hbm[osb->node_num];
-
- if (osb->hbm == 0)
- osb->hbm = DISK_HBEAT_NO_COMM;
-
- if (OcfsGlobalCtxt.hbm > osb->hbm)
- OcfsGlobalCtxt.hbm = osb->hbm;
-
- if (OcfsGlobalCtxt.hbm == 0)
- OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM;
- }
-#endif
}
OCFS_BH_PUT_DATA(bhs[i]);
}
Added: trunk/src/heartbeat.h
===================================================================
--- trunk/src/heartbeat.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/heartbeat.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * heartbeat.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_HEARTBEAT_H
+#define OCFS2_HEARTBEAT_H
+
+int ocfs_nm_heart_beat(ocfs_super *osb, __u32 flag, int read_publish);
+void ocfs_update_publish_map(ocfs_super *osb, struct buffer_head *bhs[],
+ int first_time);
+
+#endif /* OCFS2_HEARTBEAT_H */
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/inode.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* inode.c
*
* vfs' aops, fops, dops and iops
@@ -24,14 +26,49 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include <asm/byteorder.h>
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "dir.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "file.h"
+#include "inode.h"
+#include "ioctl.h"
+#include "lockres.h"
+#include "namei.h"
+#include "super.h"
+#include "symlink.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INODE
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+# include <linux/iobuf.h>
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)
+# define free_kiovec_sz(nr, buf, bh) free_kiovec(nr, buf)
+# define alloc_kiovec_sz(nr, buf, bh) alloc_kiovec(nr, buf)
+# endif
+#endif /* for 2.6 - no more kiovec, kiobuf structures - vfs handles
+ * this for us (direct i/o) */
+
+
extern struct semaphore recovery_list_sem;
typedef struct _ocfs_find_inode_args
@@ -167,34 +204,6 @@
LOG_ENTRY_ARGS("(feoff = %llu)\n", feoff);
- /* Shortcut: if they ask for the root dirnode, just return
- * it. */
- if (feoff == OCFS_ROOT_INODE_FE_OFF(osb)) {
- LOG_TRACE_ARGS("Asked for root dirnode (%llu)\n",
- feoff);
-
- if (osb->root_inode) {
- inode = osb->root_inode;
-
- /* should we iget it or not? i suppose if you're in
- * here and you've asked for the root inode you don't
- * know what it is and will prolly iput it later... */
- if (inode) {
- if (ocfs_inc_icount(inode) < 0)
- BUG();
- }
- goto bail;
- }
- /* If we fall through here, then it's mount time! */
- }
-
- /* This shouldn't happen anymore. */
- if (osb->vol_layout.root_start_off == 0) {
- LOG_ERROR_ARGS("root_start_off = 0! Skipping inode create for "
- "%llu\n", feoff);
- goto bail;
- }
-
/* Ok. By now we've either got the offsets passed to us by the
* caller, or we just pulled them off the bh. Lets do some
* sanity checks to make sure they're OK. */
@@ -203,10 +212,6 @@
goto bail;
}
- /* try to detect whether this is a system file. */
- if (feoff < osb->vol_layout.root_start_off)
- flags |= OCFS_FIND_INODE_FLAG_SYSFILE;
-
args.feoff = feoff;
args.flags = flags;
args.ino = ino_from_off(sb, feoff);
@@ -297,7 +302,7 @@
*/
int ocfs_inode_init_private(struct inode *inode)
{
- ocfs_inode_private *i = NULL;
+ ocfs_inode_private *i;
if (!inode)
BUG();
@@ -337,18 +342,19 @@
* ocfs_populate_inode()
*
*/
-void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe, umode_t mode, int create_ino)
+void ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,
+ int create_ino)
{
struct super_block *sb;
ocfs_super *osb;
- __u64 offset;
- LOG_ENTRY_ARGS ("(0x%p, %u, size:%llu)\n", inode, mode, fe->file_size);
+ LOG_ENTRY_ARGS ("(0x%p, size:%llu)\n", inode, fe->i_size);
sb = inode->i_sb;
osb = OCFS_SB(sb);
- offset = fe->this_sector;
+ // this means that read_inode cannot create a superblock
+ // inode today. change if needed.
if (!IS_VALID_FILE_ENTRY(fe)) {
printk("ocfs2: invalid file entry!\n");
BUG();
@@ -362,65 +368,89 @@
}
inode->i_version = 1;
- inode->i_rdev = MKDEV (fe->dev_major, fe->dev_minor);
- inode->i_mode = mode;
- inode->i_uid = fe->uid;
- inode->i_gid = fe->gid;
- inode->i_blksize = (__u32) osb->vol_layout.cluster_size; // sb->s_blocksize;
- inode->i_blocks = (fe->file_size + sb->s_blocksize) >> sb->s_blocksize_bits;
+ inode->i_generation = le32_to_cpu(fe->i_generation);
+ inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+ inode->i_mode = fe->i_mode;
+ inode->i_uid = fe->i_uid;
+ inode->i_gid = fe->i_gid;
+ inode->i_blksize = (u32)osb->s_clustersize; // sb->s_blocksize;
+ inode->i_blocks = (fe->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
inode->i_mapping->a_ops = &ocfs_aops;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
inode->i_attr_flags |= ATTR_FLAG_NOATIME;
#endif
inode->i_flags |= S_NOATIME;
- OCFS_SET_INODE_TIME(inode, i_atime, fe->modify_time);
- OCFS_SET_INODE_TIME(inode, i_mtime, fe->modify_time);
- OCFS_SET_INODE_TIME(inode, i_ctime, fe->create_time);
+ OCFS_SET_INODE_TIME(inode, i_atime, fe->i_atime);
+ OCFS_SET_INODE_TIME(inode, i_mtime, fe->i_mtime);
+ OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
- SET_INODE_FEOFF(inode, offset);
+ SET_INODE_FEOFF(inode,
+ fe->i_blkno << osb->sb->s_blocksize_bits);
- OCFS_I(inode)->alloc_size = fe->alloc_size;
+ OCFS_I(inode)->alloc_size =
+ (u64)fe->i_clusters << osb->s_clustersize_bits;
OCFS_I(inode)->inode = inode;
- OCFS_I(inode)->chng_seq_num = DISK_LOCK_SEQNUM (fe);
- OCFS_I(inode)->u.fe_private = fe->u.fe_private;
+ OCFS_I(inode)->chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
+ OCFS_I(inode)->u.fe_private = fe->id1.i_pad1;
- if (fe->attribs & OCFS_ATTRIB_DIRECTORY)
+ if (S_ISDIR(fe->i_mode))
OCFS_I(inode)->oin_flags |= OCFS_OIN_DIRECTORY;
if (create_ino)
- inode->i_ino = ino_from_off(inode->i_sb, offset);
+ inode->i_ino =
+ ino_from_off(inode->i_sb,
+ fe->i_blkno << osb->sb->s_blocksize_bits);
- LOG_TRACE_ARGS("offset = %llu, ino = %lu, create_ino = %s\n",
- offset, inode->i_ino,
+ LOG_TRACE_ARGS("blkno = %llu, ino = %lu, create_ino = %s\n",
+ fe->i_blkno, inode->i_ino,
create_ino ? "true" : "false");
- inode->i_nlink = fe->link_cnt;
+ inode->i_nlink = fe->i_links_count;
+
+ if (le32_to_cpu(fe->i_flags) & OCFS2_LOCAL_ALLOC_FL) {
+ LOG_TRACE_ARGS("local alloc inode: i_ino=%lu\n", inode->i_ino);
+ } else if (le32_to_cpu(fe->i_flags) & OCFS2_BITMAP_FL) {
+ OCFS_I(inode)->u.ip_bitinfo.used_bits =
+ le32_to_cpu(fe->id1.bitmap1.i_used);
+ OCFS_I(inode)->u.ip_bitinfo.total_bits =
+ le32_to_cpu(fe->id1.bitmap1.i_total);
+ LOG_TRACE_ARGS("bitmap inode: i_ino=%lu, used=%u, total=%u\n",
+ inode->i_ino, OCFS_I(inode)->u.ip_bitinfo.used_bits,
+ OCFS_I(inode)->u.ip_bitinfo.total_bits);
+ } else if (le32_to_cpu(fe->i_flags) & OCFS2_SUPER_BLOCK_FL) {
+ LOG_TRACE_ARGS("superblock inode: i_ino=%lu\n", inode->i_ino);
+ // we can't actually hit this as read_inode can't handle
+ // superblocks today ;-)
+ BUG();
+ }
+
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
inode->i_fop = &ocfs_fops;
inode->i_op = &ocfs_file_iops;
- inode->i_size = fe->file_size;
+ inode->i_size = fe->i_size;
break;
case S_IFDIR:
if (inode->i_nlink < 2) {
LOG_ERROR_ARGS("inlink=%d for %llu\n", inode->i_nlink,
- offset);
+ fe->i_blkno);
inode->i_nlink = 2;
}
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
inode->i_op = &ocfs_dir_iops;
inode->i_fop = &ocfs_dops;
- inode->i_size = fe->file_size;
+ inode->i_size = fe->i_size;
break;
case S_IFLNK:
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
inode->i_op = &ocfs_symlink_inode_operations;
//inode->i_fop = &ocfs_fops;
- inode->i_size = fe->file_size;
+ inode->i_size = fe->i_size;
break;
default:
- init_special_inode (inode, mode, inode->i_rdev);
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
break;
}
@@ -470,9 +500,8 @@
{
struct super_block *sb;
ocfs_super *osb;
- umode_t mode;
__u64 feoff = 0ULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
struct buffer_head *bh = NULL;
int status;
int sysfile = 0;
@@ -493,7 +522,6 @@
}
feoff = args->feoff;
- sysfile = (args->flags & OCFS_FIND_INODE_FLAG_SYSFILE);
/* Read the FE off disk. This is safe because the kernel only
* does one read_inode2 for a new inode, and if it doesn't
@@ -506,39 +534,14 @@
}
fe = OCFS_BH_GET_DATA_READ(bh);
+ sysfile = le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL;
+ if (S_ISCHR(fe->i_mode) || S_ISBLK(fe->i_mode))
+ inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- mode = fe->prot_bits;
+ ocfs_populate_inode (inode, fe, 0);
- switch (fe->attribs) {
- case OCFS_ATTRIB_DIRECTORY:
- mode |= S_IFDIR;
- break;
- case OCFS_ATTRIB_CHAR:
- inode->i_rdev = MKDEV (fe->dev_major, fe->dev_minor);
- mode |= S_IFCHR;
- break;
- case OCFS_ATTRIB_BLOCK:
- inode->i_rdev = MKDEV (fe->dev_major, fe->dev_minor);
- mode |= S_IFBLK;
- break;
- case OCFS_ATTRIB_FIFO:
- mode |= S_IFIFO;
- break;
- case OCFS_ATTRIB_SYMLINK:
- mode |= S_IFLNK;
- break;
- case OCFS_ATTRIB_SOCKET:
- mode |= S_IFSOCK;
- break;
- case OCFS_ATTRIB_REG:
- default:
- mode |= S_IFREG;
- break;
- }
- ocfs_populate_inode (inode, fe, mode, 0);
-
/* eventually this case has to GO! */
- if (feoff != fe->this_sector)
+ if (feoff != (fe->i_blkno << osb->sb->s_blocksize_bits))
BUG();
if (sysfile)
@@ -581,10 +584,6 @@
goto bail;
args = opaque;
- if (args->ino == OCFS_ROOT_INODE_NUMBER) {
- printk("find_actor: asked about root inode\n");
- goto bail;
- }
if (GET_INODE_FEOFF(inode) != args->feoff) {
LOG_ERROR_STATUS(-EINVAL);
@@ -656,7 +655,7 @@
struct buffer_head *orphan_dir_bh = NULL;
struct buffer_head *fe_bh = NULL;
__u32 lock_flags = FLAG_FILE_DELETE;
- ocfs_file_entry *fe;
+ ocfs2_dinode *fe;
LOG_ENTRY_ARGS("(inode->i_ino = %lu)\n", inode->i_ino);
@@ -665,7 +664,7 @@
goto clear_inode;
}
- if (inode->i_ino == OCFS_ROOT_INODE_NUMBER) {
+ if (inode == osb->root_inode) {
LOG_TRACE_STR("Skipping root inode delete.");
goto clear_inode;
}
@@ -733,7 +732,7 @@
/* check OCFS_SYNC_FLAG_ORPHANED */
fe = OCFS_BH_GET_DATA_READ(fe_bh);
- if (!(fe->sync_flags & OCFS_SYNC_FLAG_ORPHANED)) {
+ if (!(fe->i_flags & OCFS2_ORPHANED_FL)) {
OCFS_BH_PUT_DATA(fe_bh);
/* for lack of a better error? */
status = -EEXIST;
@@ -742,7 +741,7 @@
}
/* has someone already deleted us?! baaad... */
- if (fe->u.i_dtime) {
+ if (fe->i_dtime) {
OCFS_BH_PUT_DATA(fe_bh);
status = -EEXIST;
@@ -762,9 +761,8 @@
}
fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- fe->u.i_dtime = OCFS_CURRENT_TIME;
- fe->sync_flags &= (~OCFS_SYNC_FLAG_VALID);
- fe->sync_flags &= (~OCFS_SYNC_FLAG_ORPHANED);
+ fe->i_dtime = OCFS_CURRENT_TIME;
+ fe->i_flags &= (~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
OCFS_BH_PUT_DATA(fe_bh);
status = ocfs_journal_dirty(handle, fe_bh);
@@ -928,11 +926,7 @@
memcpy (kaddr, symname, len - 1);
mapping->a_ops->commit_write (NULL, page, 0, len - 1);
err = mapping->a_ops->readpage (NULL, page);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
wait_on_page_locked (page);
-#else
- wait_on_page (page);
-#endif
page_cache_release (page);
if (err < 0)
goto fail;
@@ -940,11 +934,7 @@
return 0;
fail_map:
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
unlock_page (page);
-#else
- UnlockPage (page);
-#endif
page_cache_release (page);
fail:
return err;
@@ -957,9 +947,8 @@
static int ocfs_symlink_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
{
int err = -EIO;
- ocfs_super *osb;
int status;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
__u64 entryOffset;
struct buffer_head *bh = NULL;
@@ -971,37 +960,38 @@
goto bail;
}
- osb = OCFS_SB(inode->i_sb);
-
- if ((iblock << 9) > PATH_MAX + 1) {
- LOG_ERROR_ARGS ("file offset > PATH_MAX: %llu",
- (unsigned long long)iblock << 9);
+ if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
+ LOG_ERROR_ARGS ("block offset > PATH_MAX: %llu",
+ (unsigned long long)iblock);
goto bail;
}
entryOffset = GET_INODE_FEOFF(inode);
- status = ocfs_read_bh (osb, entryOffset, &bh, OCFS_BH_CACHED, inode);
+ status = ocfs_read_bh(OCFS_SB(inode->i_sb), entryOffset, &bh,
+ OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
fe = OCFS_BH_GET_DATA_READ(bh);
- if (!IS_VALID_FILE_ENTRY (fe)) {
+ if (!IS_VALID_FILE_ENTRY(fe)) {
OCFS_BH_PUT_DATA(bh);
LOG_ERROR_ARGS ("Invalid fe at offset %llu", entryOffset);
goto bail;
}
- if ((iblock << 9) >= (__s64)fe->alloc_size) {
+ if ((u64)iblock >= ocfs_clusters_to_blocks(inode->i_sb,
+ fe->i_clusters)) {
OCFS_BH_PUT_DATA(bh);
- LOG_ERROR_ARGS ("file offset is outside the allocated size: %llu",
- (unsigned long long)iblock << 9);
+ LOG_ERROR_ARGS ("block offset is outside the allocated size: %llu",
+ (unsigned long long)iblock);
goto bail;
}
- map_bh(bh_result, inode->i_sb, (fe->extents[0].disk_off >> osb->sect_size_bits) + iblock);
+ map_bh(bh_result, inode->i_sb,
+ fe->id2.i_list.l_recs[0].e_blkno + iblock);
OCFS_BH_PUT_DATA(bh);
err = 0;
@@ -1020,10 +1010,11 @@
int block, int create, int *err, int reada)
{
struct buffer_head * bh = NULL;
- int fatal = 0, tmperr = 0, new = 0;
- ocfs_super *osb = NULL;
- __s64 vbo = 0LL, lbo = 0LL;
+ int fatal = 0, tmperr, new = 0;
+ ocfs_super *osb;
+ __s64 vbo, lbo;
int readflags = OCFS_BH_CACHED;
+ char *buf;
osb = OCFS_SB(inode->i_sb);
vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
@@ -1044,27 +1035,42 @@
new = 1;
}
- // ???: do we need priv_sem? should have i_sem i think
+ /* ???: do we need priv_sem? should have i_sem i think */
if (vbo >= OCFS_I(inode)->alloc_size) {
- *err = ocfs_extend_file(osb, vbo + 512LL, GET_INODE_FEOFF(inode),
- handle, inode, NULL);
+ int vbo_pad;
+
+ vbo_pad = inode->i_sb->s_blocksize;
+ vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
+
+ *err = ocfs_extend_file(osb,
+ vbo + vbo_pad,
+ GET_INODE_FEOFF(inode),
+ handle, inode, NULL, 0, NULL);
if (*err < 0) {
*err = -ENOSPC;
return NULL;
}
- // fe->file_size will be vbo+512 here, and alloc_size will be whatever
- // i_size will be changed by caller (ocfs_add_entry) if we return !NULL
+ /*
+ * fe->i_size will be vbo + padding to blocksize here,
+ * and i_blocks will be whatever is actually allocated.
+ * i_size will be changed by caller (ocfs_add_entry) if
+ * we return !NULL.
+ */
}
- // do we need extend sem? no extend dlm message for dirs
- tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL, inode,1);
+ /* do we need extend sem? no extend dlm message for dirs */
+ /*
+ * UGLY: last argument to lookup_file_allocation() (locked) is
+ * forced to '1' here, even though we don't have the lock. This
+ * is to force fast, unlocked operation. Get A Real DLM.
+ */
+ tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, 1, NULL,
+ inode, 1);
if (tmperr < 0)
goto fail;
if (new) {
- bh = getblk(OCFS_GET_BLOCKDEV(osb->sb),
- lbo >> osb->sb->s_blocksize_bits,
- osb->sb->s_blocksize);
+ bh = sb_getblk(osb->sb, lbo >> osb->sb->s_blocksize_bits);
if (!bh) {
tmperr = -EIO;
goto fail;
@@ -1077,8 +1083,8 @@
if (fatal)
goto fail;
- char *buf = OCFS_BH_GET_DATA_WRITE(bh);
- memset(buf, 0, osb->sect_size);
+ buf = OCFS_BH_GET_DATA_WRITE(bh);
+ memset(buf, 0, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(bh);
fatal = ocfs_journal_dirty(handle, bh);
if (fatal)
@@ -1139,13 +1145,18 @@
vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->alloc_size) {
+ int vbo_pad;
+
+ vbo_pad = inode->i_sb->s_blocksize;
+ vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
+
LOG_TRACE_STR("Extending allocation");
LOG_ERROR_ARGS("extending inode %lu in get_block!!\n",
inode->i_ino);
down_write(&OCFS_I(inode)->ip_io_sem);
- err = ocfs_extend_file(osb, vbo + osb->sect_size,
- GET_INODE_FEOFF(inode), NULL, inode,
- NULL);
+ err = ocfs_extend_file(osb, vbo + vbo_pad,
+ GET_INODE_FEOFF(inode),
+ NULL, inode, NULL, 0, NULL);
up_write(&OCFS_I(inode)->ip_io_sem);
if (err < 0) {
err = -ENOSPC;
@@ -1357,7 +1368,7 @@
__s64 lbo; /* logical (disk) offset */
__s64 vbo_max; /* file offset, max_blocks from iblock */
int set_new = 0; /* flag */
- u32 new_size; /* In sectors, the size of the contiguous block */
+ __u64 new_size; /* In bytes, the size of the contiguous block */
unsigned char blocksize_bits;
int needs_trunc = 0;
@@ -1394,7 +1405,7 @@
if (create && vbo_max > OCFS_I(inode)->alloc_size) {
/* WARNING: How much do we really want to extend the file? */
status = ocfs_extend_file(osb, vbo_max, GET_INODE_FEOFF(inode),
- NULL, inode, NULL);
+ NULL, inode, NULL, 0, NULL);
if (status < 0) {
status = -ENOSPC;
LOG_ERROR_STR("ocfs_direct_IO_get_blocks: failed to extend the file!");
@@ -1422,9 +1433,9 @@
/* make sure we don't map more than max_blocks blocks here as
that's all the kernel will handle at this point. */
- if (new_size > max_blocks)
- new_size = max_blocks;
- bh_result->b_size = new_size << blocksize_bits;
+ if (new_size > (__u64)max_blocks << blocksize_bits)
+ new_size = (__u64)max_blocks << blocksize_bits;
+ bh_result->b_size = new_size;
ret = 0;
bail:
@@ -1516,7 +1527,7 @@
struct inode *inode = filp->f_dentry->d_inode;
int max_sectors;
int nbhs;
- int sector_size, sector_bits, sector_mask;
+ int sector_size, sector_bits, sector_mask, sectors_per_page;
int ret = 0;
int large_io = 0;
int inuse = 0;
@@ -1533,9 +1544,10 @@
saved_size = size;
/* FIXME: Need to differentiate between sectors and blocksize */
- sector_size = 512;
- sector_bits = 9;
- sector_mask = 511;
+ sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ sector_mask = sector_size - 1;
+ sectors_per_page = PAGE_SIZE / sector_size;
/* max sectors is 1024 in 2.4.9
* max data is 512kb
*/
@@ -1553,7 +1565,7 @@
size = inode->i_size - *offp;
}
- /* make sure we are aligned to either 4kb or 512 byte IO */
+ /* make sure aligned to either PAGE_SIZE or sect_size IO */
#ifndef LARGEIOS
if ((*offp & sector_mask) || (size & sector_mask))
/* if not, then fail, we need either to do dio */
@@ -1563,19 +1575,19 @@
large_io = 0;
#endif
#ifdef LARGEIOS
- if ((*offp & 4095) || (size & 4095)) {
- /* if it's not 4kb, then 512 */
+ if ((*offp & ~PAGE_MASK) || (size & ~PAGE_MASK)) {
+ /* if it's not PAGE_SIZE, then sect_size */
if ((*offp & sector_mask) || (size & sector_mask))
/* if not, then fail, we need either to do dio */
return err;
max_sectors = KIO_MAX_SECTORS; /* for 2.4.9 - 1024 */
- } /* ok we 're 4kb aligned, lets see if the buffer is */
+ } /* ok we 're PAGE_SIZE aligned, lets see if the buffer is */
else {
- if (!((unsigned long) buf & 4095)) {
- /* yippie we are .. we can do 4kb size io's */
+ if (!((unsigned long) buf & ~PAGE_MASK)) {
+ /* yippie we are .. we can do PAGE_SIZE size io's */
large_io = 1;
/* for 2.4.9 */
- max_sectors = KIO_MAX_SECTORS / 8;
+ max_sectors = KIO_MAX_SECTORS / sectors_per_page;
} else {
max_sectors = KIO_MAX_SECTORS;
large_io = 0;
@@ -1637,7 +1649,7 @@
doio:
size = totalioblocks << sector_bits;
if (large_io)
- nbhs = (size >> 12);
+ nbhs = (size >> PAGE_SHIFT);
else
nbhs = (size >> sector_bits);
if (nbhs > max_sectors)
@@ -1666,10 +1678,10 @@
totalioblocks = 0;
while (size > 0) {
if (large_io) {
- blocks = size >> 12;
+ blocks = size >> PAGE_SHIFT;
if (blocks > max_sectors)
blocks = max_sectors;
- iosize = blocks << 12;
+ iosize = blocks << PAGE_SHIFT;
} else {
blocks = size >> sector_bits;
if (blocks > max_sectors)
@@ -1686,22 +1698,22 @@
OCFS_KIO_BLOCKS(iobuf)[0] = firstphys + totalioblocks;
if (large_io) {
- blocknr+=8;
- OCFS_KIO_BLOCKS(iobuf)[0] = OCFS_KIO_BLOCKS(iobuf)[0] / 8;
+ blocknr += sectors_per_page;
+ OCFS_KIO_BLOCKS(iobuf)[0] = OCFS_KIO_BLOCKS(iobuf)[0] / sectors_per_page;
} else {
blocknr++;
}
for (i = 1; i < blocks; i++) {
if (large_io) {
- blocknr+=8;
+ blocknr += sectors_per_page;
} else {
blocknr++;
}
OCFS_KIO_BLOCKS(iobuf)[i] = OCFS_KIO_BLOCKS(iobuf)[0] + i;
}
err = brw_kiovec (rw, 1, &iobuf, inode->i_dev, OCFS_KIO_BLOCKS(iobuf),
- large_io ? 4096 : sector_size);
+ large_io ? PAGE_SIZE : sector_size);
#ifdef SUSE
if (rw == READ && err > 0)
mark_dirty_kiobuf(iobuf, err);
@@ -1711,7 +1723,8 @@
size -= err;
buf += err;
if (large_io) {
- totalioblocks += (blocks * 8);
+ totalioblocks +=
+ (blocks * sectors_per_page);
} else {
totalioblocks += blocks;
}
@@ -1770,7 +1783,7 @@
int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos)
{
- int err = 0;
+ int err = 0;
int max_sectors = 25000;
struct inode *inode = filp->f_dentry->d_inode;
unsigned long blocknr, blocks, iosize,myiosize;
@@ -1779,9 +1792,9 @@
unsigned long blocks_end_cluster = 0;
/* FIXME: Need to differentiate betwen sectors and blocksize */
- int sector_bits = 9;
- int sector_size = 512;
- int sector_mask = 511;
+ int sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ int sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ int sector_mask = sector_size - 1;
int ret;
unsigned long firstlogic;
@@ -1817,7 +1830,7 @@
iosize = blocks << sector_bits;
clustersize = inode->i_blksize >> sector_bits;
blocks_end_cluster = clustersize - (blocknr % clustersize);
- myiosize = size >> 9;
+ myiosize = size >> sector_bits;
firstlogic = blocknr;
totalioblocks = 0;
@@ -1897,7 +1910,7 @@
}
up (&(OCFS_I(inode)->priv_sem));
- if (osb->publ_map == (1 << osb->node_num)) {
+ if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
LOG_TRACE_STR ("Only node alive.");
goto bail;
}
@@ -1937,7 +1950,8 @@
{
int status = 0;
struct buffer_head *fe_bh = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
+ ocfs2_extent_list *fel;
__u64 offset;
int drop_priv_sem = 0;
@@ -1984,10 +1998,10 @@
goto leave;
/* Add checks as needed */
- if ((fe->u.i_dtime) || (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) {
- if (fe->u.i_dtime)
+ if ((fe->i_dtime) || (!(fe->i_flags & OCFS2_VALID_FL))) {
+ if (fe->i_dtime)
LOG_ERROR_ARGS("Inode %lu has dtime = %llu\n",
- inode->i_ino, fe->u.i_dtime);
+ inode->i_ino, fe->i_dtime);
else
LOG_TRACE_STR ("File Entry is invalid");
@@ -1995,90 +2009,73 @@
goto leave;
}
- if ((OCFS_I(inode)->alloc_size != (__s64) fe->alloc_size) ||
- (inode->i_size != (__s64) fe->file_size) ||
- (OCFS_I(inode)->chng_seq_num != DISK_LOCK_SEQNUM (fe)) ||
- inode->i_uid != fe->uid ||
- inode->i_gid != fe->gid ||
- inode->i_mode != fe->prot_bits ||
- inode->i_nlink != fe->link_cnt){
+ if ((OCFS_I(inode)->alloc_size !=
+ (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) ||
+ (inode->i_size != (__s64) fe->i_size) ||
+ (OCFS_I(inode)->chng_seq_num != DISK_LOCK(fe)->dl_seq_num) ||
+ inode->i_uid != fe->i_uid ||
+ inode->i_gid != fe->i_gid ||
+ inode->i_mode != fe->i_mode ||
+ inode->i_nlink != fe->i_links_count){
- if (OCFS_I(inode)->alloc_size > (__s64)fe->alloc_size){
+ if (OCFS_I(inode)->alloc_size >
+ (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits)) {
ocfs_extent_map_destroy (&OCFS_I(inode)->map);
ocfs_extent_map_init (&OCFS_I(inode)->map);
}
+ if (le32_to_cpu(fe->i_flags) & OCFS2_BITMAP_FL) {
+ OCFS_I(inode)->u.ip_bitinfo.used_bits =
+ le32_to_cpu(fe->id1.bitmap1.i_used);
+ OCFS_I(inode)->u.ip_bitinfo.total_bits =
+ le32_to_cpu(fe->id1.bitmap1.i_total);
+ LOG_TRACE_ARGS("updated bitmap inode: i_ino=%lu, used=%u, total=%u\n",
+ inode->i_ino, OCFS_I(inode)->u.ip_bitinfo.used_bits,
+ OCFS_I(inode)->u.ip_bitinfo.total_bits);
+ }
+
LOG_TRACE_STR("Allocsize, filesize or seq no did not match");
- OCFS_I(inode)->alloc_size = fe->alloc_size;
- inode->i_size = fe->file_size;
-printk("verifyupdate: setting nlink from %d to %d for %llu\n", inode->i_nlink, fe->link_cnt, GET_INODE_FEOFF(inode));
- inode->i_nlink = fe->link_cnt;
- OCFS_I(inode)->chng_seq_num = DISK_LOCK_SEQNUM (fe);
- inode->i_blocks = (inode->i_size + osb->sect_size) >> osb->sect_size_bits;
- inode->i_uid = fe->uid;
- inode->i_gid = fe->gid;
- inode->i_mode = fe->prot_bits;
- inode->i_blksize = (__u32) osb->vol_layout.cluster_size;
- OCFS_SET_INODE_TIME(inode, i_ctime, fe->create_time);
- OCFS_SET_INODE_TIME(inode, i_atime, fe->modify_time);
- OCFS_SET_INODE_TIME(inode, i_mtime, fe->modify_time);
+ OCFS_I(inode)->alloc_size =
+ (u64)fe->i_clusters << osb->s_clustersize_bits;
+ inode->i_size = fe->i_size;
+ LOG_TRACE_ARGS("verifyupdate: setting nlink from %d to %d for %llu\n",
+ inode->i_nlink, fe->i_links_count, GET_INODE_FEOFF(inode));
+ inode->i_nlink = fe->i_links_count;
+ OCFS_I(inode)->chng_seq_num = DISK_LOCK(fe)->dl_seq_num;
+ inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+ inode->i_uid = fe->i_uid;
+ inode->i_gid = fe->i_gid;
+ inode->i_mode = fe->i_mode;
+ inode->i_blksize = (__u32) osb->s_clustersize;
+ OCFS_SET_INODE_TIME(inode, i_ctime, fe->i_ctime);
+ OCFS_SET_INODE_TIME(inode, i_atime, fe->i_atime);
+ OCFS_SET_INODE_TIME(inode, i_mtime, fe->i_mtime);
if (!S_ISDIR (inode->i_mode) &&
- (OCFS_I(inode)->alloc_size != (__s64) fe->alloc_size ||
- inode->i_size != (__s64) fe->file_size ||
- OCFS_I(inode)->chng_seq_num != DISK_LOCK_SEQNUM (fe))) {
+ (OCFS_I(inode)->alloc_size !=
+ (__s64)((u64)fe->i_clusters << osb->s_clustersize_bits) ||
+ inode->i_size != (__s64) fe->i_size ||
+ OCFS_I(inode)->chng_seq_num != DISK_LOCK(fe)->dl_seq_num)) {
*needs_trunc = 1;
}
- switch (fe->attribs) {
- case OCFS_ATTRIB_DIRECTORY:
- inode->i_mode |= S_IFDIR;
- break;
- case OCFS_ATTRIB_SYMLINK:
- inode->i_mode |= S_IFLNK;
- break;
- case OCFS_ATTRIB_REG:
- inode->i_mode |= S_IFREG;
- break;
- case OCFS_ATTRIB_CHAR:
- case OCFS_ATTRIB_BLOCK:
- case OCFS_ATTRIB_FIFO:
- case OCFS_ATTRIB_SOCKET:
- if (fe->attribs == OCFS_ATTRIB_CHAR)
- inode->i_mode |= S_IFCHR;
- else if (fe->attribs == OCFS_ATTRIB_BLOCK)
- inode->i_mode |= S_IFBLK;
- else if (fe->attribs == OCFS_ATTRIB_FIFO)
- inode->i_mode |= S_IFIFO;
- else if (fe->attribs == OCFS_ATTRIB_SOCKET)
- inode->i_mode |= S_IFSOCK;
-
+ if (S_ISCHR(fe->i_mode) ||
+ S_ISBLK(fe->i_mode) ||
+ S_ISFIFO(fe->i_mode) ||
+ S_ISSOCK(fe->i_mode)) {
inode->i_rdev = OCFS_NODEV;
- init_special_inode (inode, inode->i_mode,
- MKDEV(fe->dev_major,
- fe->dev_minor));
- break;
- default:
- LOG_ERROR_ARGS ("attribs=%d", fe->attribs);
- inode->i_mode |= S_IFREG;
- break;
+ init_special_inode(inode, inode->i_mode,
+ huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)));
}
- if (fe->local_ext) {
- __s64 tempVbo;
- __s64 tempLbo;
- __u64 tempSize;
+ fel = &fe->id2.i_list;
+ if (fel->l_tree_depth < 0) {
__u32 j;
/* Add the Extents to extent map */
- for (j = 0; j < fe->next_free_ext; j++) {
- tempVbo = fe->extents[j].file_off;
- tempLbo = fe->extents[j].disk_off;
- tempSize = fe->extents[j].num_bytes;
-
- if (!ocfs_add_extent_map_entry (osb,
- &OCFS_I(inode)->map,
- tempVbo, tempLbo,
- tempSize))
+ for (j = 0; j < fel->l_next_free_rec; j++) {
+ if (!ocfs_add_extent_map_entry_from_rec(osb->sb,
+ &OCFS_I(inode)->map,
+ &fel->l_recs[j]))
goto leave;
}
}
Added: trunk/src/inode.h
===================================================================
--- trunk/src/inode.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/inode.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,53 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * inode.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_INODE_H
+#define OCFS2_INODE_H
+
+int ocfs_block_symlink(struct inode *inode, const char *symname,
+ int len);
+struct buffer_head *ocfs_bread(ocfs_journal_handle *handle,
+ struct inode * inode, int block,
+ int create, int *err, int reada);
+void ocfs_clear_inode(struct inode *inode);
+void ocfs_delete_inode(struct inode *inode);
+struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
+int ocfs_inode_init_private(struct inode *inode);
+int ocfs_inode_revalidate(struct dentry *dentry);
+void ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,
+ int create_ino);
+void ocfs_put_inode(struct inode *inode);
+void ocfs_read_inode(struct inode *inode);
+void ocfs_read_inode2(struct inode *inode, void *opaque);
+ssize_t ocfs_rw_direct(int rw, struct file *filp, char *buf,
+ size_t size, loff_t *offp);
+void ocfs_sync_blockdev(struct super_block *sb);
+int ocfs_verify_update_inode(ocfs_super *osb, struct inode *inode,
+ int *needs_trunc, int lockres_locked);
+
+#endif /* OCFS2_INODE_H */
Deleted: trunk/src/io.c
===================================================================
--- trunk/src/io.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/io.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,322 +0,0 @@
-/*
- * io.c
- *
- * Buffer cache handling
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
- * Manish Singh, Neeraj Goyal, Suchit Kaura
- */
-
-#include <linux/types.h>
-
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-
-#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_IO
-
-static void ocfs_end_buffer_io_sync(struct buffer_head *bh,
- int uptodate)
-{
-// LOG_ENTRY_ARGS("(bh->b_blocknr = %u, uptodate = %d)\n", bh->b_blocknr,
-// uptodate);
-
- if (!uptodate)
- LOG_ERROR_STATUS(-EIO);
-
- if (uptodate)
- set_buffer_uptodate(bh);
- else
- clear_buffer_uptodate(bh);
- unlock_buffer(bh);
-
-// LOG_EXIT();
- return;
-}
-
-
-int ocfs_write_bhs (ocfs_super * osb, struct buffer_head *bhs[],
- int nr, int flags, struct inode *inode)
-{
- int status = 0;
- int i;
- struct super_block *sb;
- ocfs_blockdev dev;
- struct buffer_head *bh;
-
-#ifdef OCFS_DBG_TIMING
- my_timing_t begin, end;
-#endif
-
- LOG_ENTRY_ARGS("(bh[0]->b_blocknr = %llu, nr=%d, flags=%u, inode=%p)\n",
- (unsigned long long)bhs[0]->b_blocknr, nr, flags, inode);
-#ifdef OCFS_DBG_TIMING
- rdtsc (begin.lohi[0], begin.lohi[1]);
-#endif
-
- if (osb == NULL || osb->sb == NULL || bhs == NULL) {
- LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == "
- "NULL");
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- if (nr > 256)
- LOG_TRACE_ARGS ("Getting write for %d blocks\n", nr);
-
- sb = osb->sb;
- dev = OCFS_GET_BLOCKDEV(sb);
-
- /* we don't ever want cached writes -- those should go to the
- * journal so we can control when they actually hit disk and
- * so we can make sure they never get overwritten by a
- * subsequent read. */
- if ((flags & OCFS_BH_CACHED) || (flags & OCFS_BH_COND_CACHED)) {
- LOG_TRACE_STR("asking for a cached write!");
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- for (i = 0 ; i < nr ; i++) {
- bh = bhs[i];
- if (bh == NULL) {
- LOG_TRACE_STR("bh == NULL");
- status = -EIO;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- if (check_block_zero_write(bh) < 0) {
- status = -EIO;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- if (flags & OCFS_BH_CONCURRENT_WRITE)
- goto skip_modified_check;
-
-skip_modified_check:
- if (!(flags & OCFS_BH_IGNORE_JBD) && buffer_jbd(bh)) {
-#ifdef VERBOSE_BH_JBD_TRACE
- LOG_TRACE_ARGS("trying to write a jbd managed bh "
- "(blocknr = %llu), nr=%d\n",
- (unsigned long long)bh->b_blocknr, nr);
-#endif
- continue;
- }
-
- lock_buffer(bh);
-
- set_buffer_uptodate(bh);
- /* remove from dirty list before I/O. */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- /*
- * mark_buffer_clean() doesn't exist in 2.6.x kernels.
- * Not many places actually used mark_buffer_clean, but
- * at least reiserfs uses clear_buffer_dirty() as
- * a replacment.
- */
- clear_buffer_dirty(bh);
-#else
- mark_buffer_clean(bh);
-#endif
-
- bh->b_end_io = ocfs_end_buffer_io_sync;
- submit_bh(WRITE, bh);
- }
-
- for (i = (nr-1) ; i >= 0; i--) {
- bh = bhs[i];
-
- wait_on_buffer(bh);
-
- if (inode)
- SET_BH_SEQNUM(inode, bh);
- else
- CLEAR_BH_SEQNUM(bh);
- }
-
-bail:
-
-#ifdef OCFS_DBG_TIMING
- IO_FUNC_TIMING_PRINT("ocfs_write_bhs", status);
-#endif
-
- LOG_EXIT_STATUS(status);
- return status;
-}
-
-/*
- * ocfs_read_bhs()
- *
- */
-int ocfs_read_bhs (ocfs_super * osb, __u64 off, __u64 len,
- struct buffer_head *bhs[], int flags,
- struct inode *inode)
-{
- int status = 0;
- struct super_block *sb;
- int nr, i, ignore_cache = 0;
- __u64 blocknum;
- ocfs_blockdev dev;
- struct buffer_head *bh;
-
-#ifdef OCFS_DBG_TIMING
- my_timing_t begin, end;
-#endif
- LOG_ENTRY_ARGS("(off=(%llu), len=(%llu), flags=%d, inode=%p)\n", off,
- len, flags, inode);
-#ifdef OCFS_DBG_TIMING
- rdtsc (begin.lohi[0], begin.lohi[1]);
-#endif
-
- if (len % 512) {
- LOG_TRACE_ARGS("len %% 512 (len=%llu)\n", len);
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- if (osb == NULL || osb->sb == NULL || bhs == NULL) {
- LOG_TRACE_STR("osb == NULL || osb->sb == NULL || bhs == NULL "
- "|| num == NULL");
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- if ((flags & OCFS_BH_COND_CACHED) &&
- (off >= osb->vol_layout.bitmap_off))
- flags |= OCFS_BH_CACHED;
-
- if (OCFS_NONCACHED(osb, off)) {
- if (flags & OCFS_BH_CACHED)
- LOG_TRACE_STR("hey bozo you are trying to write "
- "a system thingy cached!");
- flags &= ~OCFS_BH_CACHED;
- }
-
- sb = osb->sb;
- dev = OCFS_GET_BLOCKDEV(sb);
- blocknum = off >> sb->s_blocksize_bits;
-
- nr = (len + 511) >> 9;
- if (nr == 0) {
- LOG_TRACE_STR("No buffers will be read!!!");
- LOG_TRACE_ARGS("Len=%llu Off=%llu numbuffers=%u "
- "blocknum=%llu\n", len, off,
- nr, blocknum);
- status = 0;
- goto bail;
- }
-
- for (i = 0 ; i < nr ; i++) {
- if (bhs[i] == NULL) {
- bhs[i] = getblk (dev, blocknum++, sb->s_blocksize);
- if (bhs[i] == NULL) {
- LOG_TRACE_STR("bh == NULL");
- status = -EIO;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- }
- bh = bhs[i];
- ignore_cache = 0;
-
- /* Lock everyone else out of this bh */
- OCFS_BH_GET_DATA_READ(bh);
-
- if (flags & OCFS_BH_CACHED && inode &&
- !TEST_BH_SEQNUM(inode, bh)) {
-#ifdef VERBOSE_BH_SEQNUM_TRACE
- LOG_TRACE_ARGS("(read) bh (%llu) seqnum (%lu) does not "
- "match inode (%u)\n",
- (unsigned long long)bh->b_blocknr,
- (bh->b_state & STATE_BIT_MASK) >> 19,
- atomic_read(GET_INODE_CLEAN_SEQ(inode)));
-#endif
- ignore_cache = 1;
- }
-
- if ((flags & OCFS_BH_CACHED) && (!buffer_uptodate(bh)))
- ignore_cache = 1;
-
- if (buffer_jbd(bh)) {
-#ifdef VERBOSE_BH_JBD_TRACE
- if (!(flags & OCFS_BH_CACHED) || ignore_cache)
- LOG_TRACE_ARGS("trying to sync read a jbd "
- "managed bh (blocknr = %llu)\n",
- (unsigned long long)bh->b_blocknr);
-#endif
- continue;
- }
-
- if (!(flags & OCFS_BH_CACHED) || ignore_cache) {
- if (buffer_dirty(bh)) {
- /* This should probably be a BUG, or
- * at least return an error. */
- LOG_TRACE_ARGS("asking me to sync read a "
- "dirty buffer! (blocknr = %llu)\n",
- (unsigned long long)bh->b_blocknr);
- continue;
- }
-
- lock_buffer(bh);
- clear_buffer_uptodate(bh);
- bh->b_end_io = ocfs_end_buffer_io_sync;
- if (flags & OCFS_BH_READAHEAD)
- submit_bh(READA, bh);
- else
- submit_bh(READ, bh);
- continue;
- }
- }
-
- status = 0;
-
- for (i = (nr-1); i >= 0; i--) {
- bh = bhs[i];
-
- wait_on_buffer(bh);
-
- if (inode)
- SET_BH_SEQNUM(inode, bh);
- else
- CLEAR_BH_SEQNUM(bh);
-
- OCFS_BH_PUT_DATA(bh);
- }
- LOG_TRACE_ARGS("off=(%llu), len=(%llu), cached=%s\n", off, len,
- (!(flags & OCFS_BH_CACHED) || ignore_cache) ? "no" : "yes");
-
-bail:
-
-#ifdef OCFS_DBG_TIMING
- IO_FUNC_TIMING_PRINT("ocfs_read_bhs", status);
-#endif
-
- LOG_EXIT_STATUS(status);
- return status;
-}
-
-
-
Modified: trunk/src/ioctl.c
===================================================================
--- trunk/src/ioctl.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ioctl.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* ioctl.c
*
* ioctl interface
@@ -24,11 +26,19 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <asm/uaccess.h>
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "ioctl.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_IOCTL
Added: trunk/src/ioctl.h
===================================================================
--- trunk/src/ioctl.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ioctl.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,35 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+ unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/journal.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* journal.c
*
* Defines functions of journalling api
@@ -24,12 +26,30 @@
* Manish Singh, Joel Becker
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "inode.h"
+#include "journal.h"
+#include "nm.h"
+#include "super.h"
+#include "util.h"
+#include "vote.h"
+#include "sysfile.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_JOURNAL
typedef enum _release_locks_action {
@@ -62,6 +82,8 @@
* #included.
*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#include <linux/smp_lock.h>
+
static inline handle_t *ocfs_journal_start(journal_t *journal, int nblocks)
{
handle_t *h;
@@ -285,7 +307,7 @@
iput(lock->inode);
list_del(&(lock->lock_list));
handle->num_locks--;
- ocfs_free(lock);
+ kfree(lock);
}
LOG_EXIT_STATUS(status);
@@ -301,21 +323,16 @@
LOG_ENTRY_ARGS("(%llu)\n", lockid);
- /* make sure that we have a root_start off */
- /* this can be called early in the first mount */
- if (osb->vol_layout.root_start_off != 0 &&
- lockid >= osb->vol_layout.root_start_off) {
- journal = osb->journal;
- down(&journal->commit_sem);
- list_for_each(p1, &(journal->commited)) {
- handle = list_entry(p1, ocfs_journal_handle, h_list);
- if (handle->new_file_lockid == lockid) {
- status = 0;
- break;
- }
+ journal = osb->journal;
+ down(&journal->commit_sem);
+ list_for_each(p1, &(journal->commited)) {
+ handle = list_entry(p1, ocfs_journal_handle, h_list);
+ if (handle->new_file_lockid == lockid) {
+ status = 0;
+ break;
}
- up(&journal->commit_sem);
}
+ up(&journal->commit_sem);
LOG_EXIT_STATUS(status);
return status;
@@ -430,7 +447,7 @@
}
handle->num_buffs = 0;
if (handle->buffs) {
- ocfs_free(handle->buffs);
+ kfree(handle->buffs);
handle->buffs = NULL;
}
@@ -464,7 +481,7 @@
ocfs_free_bitmap_free_head(commit_head);
if (checkpoint)
- ocfs_free(handle);
+ kfree(handle);
LOG_EXIT();
@@ -519,9 +536,9 @@
* turned off later. */
LOG_ERROR_ARGS("block %lu was modified but never "
"dirtied!\n", co->blocknr);
- bh = getblk(OCFS_GET_BLOCKDEV(osb->sb), co->blocknr,
- osb->sect_size);
+ bh = sb_getblk(osb->sb, co->blocknr);
if (bh == NULL)
+#warning bingo, oops on oom
BUG();
dirtied = 0;
@@ -608,8 +625,8 @@
ocfs_free_bitmap_free_head(handle->commit_bits);
if (handle->buffs)
- ocfs_free(handle->buffs);
- ocfs_free(handle);
+ kfree(handle->buffs);
+ kfree(handle);
LOG_EXIT();
return;
@@ -744,7 +761,10 @@
(unsigned long long)bh->b_blocknr);
if (handle->num_buffs >= handle->max_buffs) {
- LOG_ERROR_STR("Cannot add buffer to full transaction!");
+ LOG_ERROR_ARGS("Cannot add buffer to full transaction! "
+ "num_buffs=%d, max_buffs=%d, block=%llu\n",
+ handle->num_buffs, handle->max_buffs,
+ (unsigned long long)bh->b_blocknr);
goto done;
}
@@ -843,7 +863,7 @@
int status = -1;
struct inode *inode = NULL; /* the journal inode */
journal_t * k_journal = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
__u64 lock_id = 0;
struct buffer_head *bh = NULL;
__u64 alloc_size;
@@ -856,11 +876,8 @@
INIT_LIST_HEAD(&(osb->journal->commited));
init_MUTEX(&(osb->journal->commit_sem));
- lock_id = ((OCFS_JOURNAL_FILE + osb->node_num) * osb->sect_size) +
- osb->vol_layout.root_int_off;
-
- /* Ok, look up the inode for our journal */
- inode = ocfs_iget(osb, lock_id);
+ /* already have the inode for our journal */
+ inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, osb->node_num);
if (inode == NULL) {
LOG_ERROR_STR("access error");
status = -EACCES;
@@ -873,6 +890,7 @@
status = -EACCES;
goto done;
}
+ lock_id = GET_INODE_FEOFF(inode);
down_write(&OCFS_I(inode)->ip_io_sem);
@@ -892,10 +910,10 @@
fe = OCFS_BH_GET_DATA_READ(bh);
/* should be exactly eight megs. need to run tuneocfs otherwise. */
- if (fe->file_size != OCFS_JOURNAL_DEFAULT_SIZE) {
+ if (fe->i_size != OCFS_JOURNAL_DEFAULT_SIZE) {
LOG_ERROR_ARGS("Journal file size (%llu) is not the default (%u). "
"You must run tuneocfs to add a journal for this node.\n",
- fe->file_size, OCFS_JOURNAL_DEFAULT_SIZE);
+ fe->i_size, OCFS_JOURNAL_DEFAULT_SIZE);
status = -EINVAL;
}
@@ -907,13 +925,13 @@
goto done;
}
- LOG_TRACE_ARGS("fe->file_size = %llu\n", fe->file_size);
- LOG_TRACE_ARGS("fe->alloc_size = %llu\n", fe->alloc_size);
+ LOG_TRACE_ARGS("fe->i_size = %llu\n", fe->i_size);
+ LOG_TRACE_ARGS("fe->i_clusters = %u\n", fe->i_clusters);
/* gonna need this later */
- alloc_size = fe->alloc_size;
- LOG_TRACE_ARGS("fe->this_sector = %llu\n", fe->this_sector);
- inode->i_size = fe->file_size;
+ alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
+ LOG_TRACE_ARGS("fe->i_blkno = %llu\n", fe->i_blkno);
+ inode->i_size = fe->i_size;
OCFS_BH_PUT_DATA(bh);
fe = NULL;
@@ -1031,7 +1049,7 @@
send_sig (SIGINT, osb->commit->c_task, 0);
wait_for_completion(&osb->commit->c_complete);
osb->commit->c_task = NULL;
- ocfs_free(osb->commit);
+ kfree(osb->commit);
}
/* Shutdown the kernel journal system */
@@ -1134,7 +1152,7 @@
struct buffer_head **bhs = NULL;
LOG_ENTRY();
- totalblks = size >> osb->sect_size_bits;
+ totalblks = size >> osb->sb->s_blocksize_bits;
/* Ok, allocate the maximum number of blocks we'll possibly
* need. This is heavy, but it doesn't happen often. If need
@@ -1152,7 +1170,7 @@
LOG_TRACE_ARGS("Force reading %u blocks\n", totalblks);
- status = ocfs_lookup_file_allocation(osb, vbo, &lbo, size, NULL,
+ status = ocfs_lookup_file_allocation(osb, vbo, &lbo, totalblks, NULL,
inode, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1179,7 +1197,7 @@
"bytes\n", run, lbo, numbytes);
/* there are actually returned in bytes. need blocks. */
- numblks = numbytes / osb->sect_size;
+ numblks = numbytes >> osb->sb->s_blocksize_bits;
status = ocfs_read_bhs(osb, lbo, numbytes, bhs, 0, NULL);
if (status < 0) {
@@ -1198,7 +1216,7 @@
for(i = 0; i < totalblks; i++)
if (bhs[i])
brelse(bhs[i]);
- ocfs_free(bhs);
+ kfree(bhs);
}
LOG_EXIT_STATUS(status);
return(status);
@@ -1241,7 +1259,7 @@
LOG_EXIT_STATUS(status);
- ocfs_free(arg);
+ kfree(arg);
return status;
}
@@ -1281,7 +1299,7 @@
{
int status = -1;
__u64 lock_id = 0;
- ocfs_file_entry *fe;
+ ocfs2_dinode *fe;
struct inode *inode = NULL;
journal_t *k_journal = NULL;
struct buffer_head *bh = NULL;
@@ -1308,12 +1326,8 @@
goto done;
}
- /* get the journal file fe and lock */
- lock_id = ((OCFS_JOURNAL_FILE + node_num) * osb->sect_size)
- + osb->vol_layout.root_int_off;
-
/* Ok, look up the inode for our journal */
- inode = ocfs_iget(osb, lock_id);
+ inode = ocfs_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, node_num);
if (inode == NULL) {
LOG_ERROR_STR("access error");
status = -EACCES;
@@ -1326,6 +1340,7 @@
status = -EACCES;
goto done;
}
+ lock_id = GET_INODE_FEOFF(inode);
down_write(&OCFS_I(inode)->ip_io_sem);
@@ -1363,7 +1378,7 @@
fe = OCFS_BH_GET_DATA_READ(bh);
/* gonna need this later */
- alloc_size = fe->alloc_size;
+ alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
OCFS_BH_PUT_DATA(bh);
OCFS_I(inode)->alloc_size = alloc_size;
@@ -1433,7 +1448,7 @@
status = 0;
clear_node:
- CLEAR_NODE_IN_RECOVERY(osb, node_num);
+ ocfs_recovery_map_clear(osb, node_num);
ocfs_recover_oin_locks(osb, node_num);
done:
if (recovery_lock)
@@ -1482,8 +1497,7 @@
down (&(osb->publish_lock));
/* Read the publish sector */
- node_publ_off = osb->vol_layout.publ_sect_off +
- (node_num * osb->sect_size);
+ node_publ_off = (osb->publish_blkno + node_num) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, node_publ_off, &publish_bh,
OCFS_BH_COND_CACHED, NULL);
if (status < 0) {
@@ -1554,26 +1568,14 @@
* one last commit cache and then exit */
if (journal->state == OCFS_JOURNAL_IN_SHUTDOWN)
finish = 1;
+
+ /* ignore the actual signal */
if (signal_pending(current)) {
- /* ignore the actual signal */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
dequeue_signal_lock(current,
¤t->blocked,
&info);
-#else
-#ifdef HAVE_NPTL
- spin_lock_irq (¤t->sighand->siglock);
- dequeue_signal(¤t->blocked,
- &info);
- spin_unlock_irq(¤t->sighand->siglock);
-#else
- spin_lock_irq(¤t->sigmask_lock);
- dequeue_signal(¤t->blocked,
- &info);
- spin_unlock_irq(¤t->sigmask_lock);
-#endif /* !HAVE_NPTL */
-#endif /* 2.4.x kernel */
}
+
LOG_TRACE_STR("FLUSH_EVENT: interrupted");
break;
case 0:
@@ -1685,7 +1687,7 @@
if (tmpstat < 0)
LOG_ERROR_STATUS((status = tmpstat));
list_del(&(handle->h_list));
- ocfs_free(handle);
+ kfree(handle);
atomic_dec(&journal->num_trans);
}
Added: trunk/src/journal.h
===================================================================
--- trunk/src/journal.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/journal.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * journal.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_JOURNAL_H
+#define OCFS2_JOURNAL_H
+
+int ocfs_commit_thread(void *arg);
+
+#endif /* OCFS2_JOURNAL_H */
Modified: trunk/src/lockres.c
===================================================================
--- trunk/src/lockres.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/lockres.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* lockres.c
*
* lock resource handling
@@ -24,11 +26,23 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "dlm.h"
+#include "lockres.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_buffer_head.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_LOCKRES
@@ -50,7 +64,7 @@
{
int status = 0;
struct buffer_head *tmpbh = NULL, **b = NULL;
- ocfs_file_entry *fe;
+ ocfs2_dinode *fe;
int flags;
ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);;
@@ -60,8 +74,8 @@
b = (bh == NULL) ? &tmpbh : bh;
if (reread) {
- flags = OCFS_NONCACHED(osb, lock_id) ? 0 :
- lockres->master_node_num == osb->node_num ?
+ flags = (inode && OCFS_I(inode)->flags & OCFS_INODE_SYSTEM_FILE) ?
+ 0 : lockres->master_node_num == osb->node_num ?
OCFS_BH_CACHED : 0;
status = ocfs_read_bh(osb, lock_id, b, flags, inode);
if (status < 0) {
@@ -83,9 +97,9 @@
if (!IS_VALID_FILE_ENTRY(fe))
BUG();
- lockres->lock_type = DISK_LOCK_FILE_LOCK (fe);
- lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe);
- lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe);
+ lockres->lock_type = DISK_LOCK(fe)->dl_level;
+ lockres->master_node_num = DISK_LOCK(fe)->dl_master;
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &lockres->oin_openmap);
if (lockres->readonly_node != OCFS_INVALID_NODE_NUM &&
lockres->readonly_node != lockres->master_node_num) {
@@ -191,13 +205,13 @@
lockres->lock_type = OCFS_DLM_NO_LOCK;
lockres->master_node_num = OCFS_INVALID_NODE_NUM;
lockres->last_upd_seq_num = 0;
- lockres->oin_openmap = 0;
+ ocfs_node_map_init(osb, &lockres->oin_openmap);
lockres->lock_state = 0;
init_MUTEX(&lockres->lock_mutex);
atomic_set (&lockres->lr_ref_cnt, 0);
- lockres->readonly_map = 0ULL;
+ ocfs_node_map_init(osb, &lockres->readonly_map);
lockres->readonly_node = OCFS_INVALID_NODE_NUM;
lockres->lock_holders = 0;
Added: trunk/src/lockres.h
===================================================================
--- trunk/src/lockres.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/lockres.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * lockres.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_LOCKRES_H
+#define OCFS2_LOCKRES_H
+
+/* lockres.c */
+int ocfs_acquire_lockres(ocfs_lock_res *lockres, __u32 timeout);
+void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
+void ocfs_release_lockres(ocfs_lock_res *lockres);
+int ocfs_update_lockres(ocfs_super *osb, __u64 lock_id,
+ struct buffer_head **bh, __u32 *updated,
+ __u32 timeout, struct inode *inode, int reread,
+ int locked);
+
+#endif /* OCFS2_LOCKRES_H */
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/namei.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,10 +1,25 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* namei.c
*
* Create and rename file, directory, symlinks
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
*
+ * Portions of this code from linux/fs/ext3/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card at masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linux Torvalds
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
@@ -24,34 +39,59 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "dcache.h"
+#include "dir.h"
+#include "dlm.h"
+#include "file.h"
+#include "sysfile.h"
+#include "inode.h"
+#include "lockres.h"
+#include "namei.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NAMEI
-static int inline search_dirblock(struct buffer_head * bh, struct inode *dir,
+static int inline search_dirblock(struct buffer_head *bh,
+ struct inode *dir,
const char *name, int namelen,
unsigned long offset,
- struct ocfs2_dir_entry ** res_dir);
-static int ocfs_delete_entry (ocfs_journal_handle *handle, struct inode * dir,
- struct ocfs2_dir_entry * de_del,
- struct buffer_head * bh);
-static int __ocfs_add_entry (ocfs_journal_handle *handle, struct inode *dir,
- const char *name, int namelen,
- struct inode *inode, __u64 inode_off,
- struct buffer_head *parent_fe_bh);
-static inline int ocfs_match (int len, const char * const name, struct ocfs2_dir_entry * de);
+ struct ocfs2_dir_entry **res_dir);
+static int ocfs_delete_entry(ocfs_journal_handle *handle,
+ struct inode *dir,
+ struct ocfs2_dir_entry *de_del,
+ struct buffer_head *bh);
+static int __ocfs_add_entry(ocfs_journal_handle *handle,
+ struct inode *dir,
+ const char *name, int namelen,
+ struct inode *inode, __u64 inode_off,
+ struct buffer_head *parent_fe_bh);
+static inline int ocfs_match(int len, const char *const name,
+ struct ocfs2_dir_entry *de);
static int ocfs_mknod_locked(ocfs_super *osb, struct inode *dir,
struct dentry *dentry, int mode,
- ocfs_dev dev, struct buffer_head **new_fe_bh,
+ ocfs_dev dev,
+ struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
ocfs_journal_handle *handle,
struct inode *inode);
-static int ocfs_double_lock(ocfs_super *osb, ocfs_journal_handle *handle,
+static int ocfs_double_lock(ocfs_super *osb,
+ ocfs_journal_handle *handle,
__u32 type1, __u32 flags1,
struct buffer_head **bh1,
struct inode *inode1,
@@ -59,13 +99,9 @@
struct buffer_head **bh2,
struct inode *inode2);
-static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
- struct inode *inode, ocfs_file_entry *fe);
+static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
+ struct inode *inode, ocfs2_dinode *fe);
-static struct dentry_operations ocfs_dentry_ops = {
- .d_revalidate = ocfs_dentry_revalidate
-};
-
static inline int ocfs_add_entry(ocfs_journal_handle *handle,
struct dentry *dentry,
struct inode *inode, __u64 inode_off,
@@ -99,7 +135,7 @@
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
dentry->d_name.len, dentry->d_name.name);
- if (dentry->d_name.len > OCFS_MAX_FILENAME_LENGTH) {
+ if (dentry->d_name.len > OCFS2_MAX_FILENAME_LENGTH) {
LOG_ERROR_STR ("name too long");
ret = ERR_PTR (-ENAMETOOLONG);
goto bail;
@@ -109,9 +145,9 @@
dir);
down_read(&OCFS_I(dir)->ip_io_sem);
- status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
- dentry->d_name.len, &fe_off, dir, 1,
- &dirent_bh, &dirent);
+ status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+ dentry->d_name.len, &fe_off,
+ dir, 1, &dirent_bh, &dirent);
up_read(&OCFS_I(dir)->ip_io_sem);
if (status < 0)
goto bail_add;
@@ -144,7 +180,7 @@
__u64 file_off;
ocfs_journal_handle *handle = NULL;
ocfs_super *osb;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
struct buffer_head *new_fe_bh = NULL;
struct inode *inode = NULL;
@@ -209,10 +245,11 @@
fe = OCFS_BH_GET_DATA_READ(new_fe_bh);
- ocfs_populate_inode (inode, fe, mode, 1);
+ ocfs_populate_inode (inode, fe, 1);
- file_off = fe->this_sector;
- handle->new_file_lockid = fe->this_sector;
+ file_off = fe->i_blkno << osb->sb->s_blocksize_bits;
+ handle->new_file_lockid =
+ fe->i_blkno << osb->sb->s_blocksize_bits;
OCFS_BH_PUT_DATA(new_fe_bh);
fe = NULL;
@@ -239,12 +276,14 @@
de = OCFS_BH_GET_DATA_WRITE(newdirbh);
de->inode = cpu_to_le64(file_off);
de->name_len = 1;
- de->rec_len = cpu_to_le16(OCFS_DIR_REC_LEN(de->name_len));
+ de->rec_len =
+ cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ocfs_set_de_type(dir->i_sb, de, S_IFDIR);
de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
de->inode = cpu_to_le64(GET_INODE_FEOFF(dir));
- de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-OCFS_DIR_REC_LEN(1));
+ de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
+ OCFS2_DIR_REC_LEN(1));
de->name_len = 2;
strcpy (de->name, "..");
ocfs_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -264,7 +303,7 @@
goto leave;
}
fe = OCFS_BH_GET_DATA_WRITE(parent_fe_bh);
- fe->link_cnt++;
+ fe->i_links_count++;
OCFS_BH_PUT_DATA(parent_fe_bh);
fe = NULL;
status = ocfs_journal_dirty(handle, parent_fe_bh);
@@ -313,25 +352,30 @@
*/
static int ocfs_mknod_locked(ocfs_super *osb, struct inode *dir,
struct dentry *dentry, int mode,
- ocfs_dev dev, struct buffer_head **new_fe_bh,
+ ocfs_dev dev,
+ struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
ocfs_journal_handle *handle,
struct inode *inode)
{
int status = 0;
- ocfs_file_entry *fe = NULL;
- __u64 bitmapOffset = 0;
+ ocfs2_dinode *fe = NULL;
+ ocfs2_extent_list *fel;
+ __u64 disk_off = 0;
__u64 fileOffset = 0;
struct inode *inode_alloc_inode = NULL;
- struct super_block *sb = osb->sb;
+ ocfs_node_map just_me;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
dev, dentry->d_name.len, dentry->d_name.name);
OCFS_ASSERT(new_fe_bh);
*new_fe_bh = NULL;
+
+ ocfs_node_map_init(osb, &just_me);
+ ocfs_node_map_set_bit(&just_me, osb->node_num);
- inode_alloc_inode = igrab(osb->system_inodes[INODE_ALLOC_BITMAP_SYSTEM_INODE]);
+ inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
if (!inode_alloc_inode) {
status = -EFAIL;
LOG_ERROR_STATUS(status);
@@ -339,18 +383,16 @@
}
ocfs_handle_add_inode(handle, inode_alloc_inode);
- status = ocfs_alloc_node_block (osb, osb->inode_size,
- &bitmapOffset, &fileOffset,
- osb->node_num, DISK_ALLOC_INODE,
- handle);
+ status = ocfs_alloc_node_block(osb, osb->sb->s_blocksize,
+ &disk_off, &fileOffset,
+ osb->node_num, DISK_ALLOC_INODE,
+ handle);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- *new_fe_bh = getblk (OCFS_GET_BLOCKDEV(sb),
- bitmapOffset >> sb->s_blocksize_bits,
- sb->s_blocksize);
+ *new_fe_bh = sb_getblk(osb->sb, disk_off >> osb->sb->s_blocksize_bits);
if (!*new_fe_bh) {
status = -EIO;
LOG_ERROR_STATUS(status);
@@ -366,59 +408,46 @@
}
fe = OCFS_BH_GET_DATA_WRITE(*new_fe_bh);
- memset (fe, 0, osb->sect_size);
- inode->i_ino = ino_from_off(osb->sb, bitmapOffset);
- fe->this_sector = bitmapOffset;
- fe->alloc_file_off = fileOffset;
- fe->alloc_node = osb->node_num;
- fe->uid = current->fsuid;
- fe->gid = current->fsgid;
- fe->prot_bits = mode & 0007777;
- if (S_ISCHR (mode) || S_ISBLK (mode)) {
- fe->dev_major = MAJOR (dev);
- fe->dev_minor = MINOR (dev);
- } else {
- fe->dev_major = MAJOR (dir->i_sb->s_dev);
- fe->dev_minor = MINOR (dir->i_sb->s_dev);
- }
+ memset(fe, 0, osb->sb->s_blocksize);
+ inode->i_ino = ino_from_off(osb->sb, disk_off);
+ /*
+ * FIXME This needs to be
+ * (node_num << 24) | (nodeconf->gen_counter++ & 0x00FFFFFF)
+ */
+ inode->i_generation = 0;
+ fe->i_generation = cpu_to_le32(inode->i_generation);
+ fe->i_blkno = disk_off >> osb->sb->s_blocksize_bits;
+ fe->i_suballoc_blkno = fileOffset >> osb->sb->s_blocksize_bits;
+ fe->i_suballoc_node = osb->node_num;
+ fe->i_uid = current->fsuid;
+ fe->i_gid = current->fsgid;
+ fe->i_mode = mode;
+ if (S_ISCHR (mode) || S_ISBLK (mode))
+ fe->id1.dev1.i_rdev = huge_encode_dev(dev);
if (S_ISLNK (mode) || S_ISDIR (mode) || S_ISREG (mode))
atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
- if (S_ISLNK (mode))
- fe->attribs |= OCFS_ATTRIB_SYMLINK;
- else if (S_ISCHR (mode))
- fe->attribs |= OCFS_ATTRIB_CHAR;
- else if (S_ISBLK (mode))
- fe->attribs |= OCFS_ATTRIB_BLOCK;
- else if (S_ISFIFO (mode))
- fe->attribs |= OCFS_ATTRIB_FIFO;
- else if (S_ISSOCK (mode))
- fe->attribs |= OCFS_ATTRIB_SOCKET;
- else if (S_ISDIR (mode))
- fe->attribs |= OCFS_ATTRIB_DIRECTORY;
- else
- fe->attribs |= OCFS_ATTRIB_REG;
-
if (S_ISDIR (mode))
- fe->link_cnt = 2;
+ fe->i_links_count = 2;
else
- fe->link_cnt = 1;
+ fe->i_links_count = 1;
- fe->local_ext = 1;
- fe->granularity = -1;
- fe->next_free_ext = 0;
- fe->last_ext_ptr = 0;
- strcpy (fe->signature, OCFS_FILE_ENTRY_SIGNATURE);
- SET_VALID_BIT (fe->sync_flags);
- DISK_LOCK_SEQNUM (fe) = 0;
- DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num;
- DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_ENABLE_CACHE_LOCK;
- DISK_LOCK_READER_NODE (fe) = osb->node_num;
- DISK_LOCK_WRITER_NODE (fe) = osb->node_num;
- DISK_LOCK_OIN_MAP(fe) = (1 << osb->node_num);
- fe->create_time = fe->modify_time = OCFS_CURRENT_TIME;
- fe->dir_node_ptr = GET_INODE_FEOFF(dir);
+ fe->i_last_eb_blk = 0;
+ strcpy (fe->i_signature, OCFS2_FILE_ENTRY_SIGNATURE);
+ fe->i_flags |= OCFS2_VALID_FL;
+ DISK_LOCK(fe)->dl_seq_num = 0;
+ DISK_LOCK(fe)->dl_master = osb->node_num;
+ DISK_LOCK(fe)->dl_level = OCFS_DLM_ENABLE_CACHE_LOCK;
+ ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &just_me);
+ fe->i_atime = fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
+ fe->i_dtime = 0;
+
+ fel = &fe->id2.i_list;
+ fel->l_tree_depth = -1;
+ fel->l_next_free_rec = 0;
+ fel->l_count = ocfs2_extent_recs_per_inode(osb->sb);
+
OCFS_BH_PUT_DATA(*new_fe_bh);
fe = NULL;
@@ -432,7 +461,8 @@
* for add_entry. */
inode->i_mode = mode;
- status = ocfs_add_entry (handle, dentry, inode, bitmapOffset, parent_fe_bh);
+ status = ocfs_add_entry(handle, dentry, inode, disk_off,
+ parent_fe_bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -519,7 +549,7 @@
ocfs_super *osb = OCFS_SB(dir->i_sb);
__u64 fe_off = GET_INODE_FEOFF(inode);
struct inode *parentInode = dentry->d_parent->d_inode;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
__u32 lockFlags = (S_ISDIR (inode->i_mode) ? (FLAG_RELEASE_DENTRY | FLAG_DIR) : FLAG_RELEASE_DENTRY);
struct buffer_head *fe_bh = NULL;
struct buffer_head *parent_node_bh = NULL; /* parent locknode */
@@ -568,10 +598,11 @@
got_parent = 1;
/* this will re-read the directory now with the EXCLUSIVE */
- /* lock already held */
- status = ocfs_find_files_on_disk (osb, dentry->d_name.name,
- dentry->d_name.len, &fe_off,
- parentInode, 0, &dirent_bh, &dirent);
+ /* lock already held; it will also return the fe_off to us */
+ status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
+ dentry->d_name.len, &fe_off,
+ parentInode, 0, &dirent_bh,
+ &dirent);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
@@ -619,17 +650,17 @@
}
fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- if (fe->link_cnt != inode->i_nlink) {
- printk("ocfs_unlink: hmm, inode has nlink = %u, fe has link_cnt = %u. Setting inode from fe.\n", inode->i_nlink, fe->link_cnt);
- inode->i_nlink = fe->link_cnt;
+ if (fe->i_links_count != inode->i_nlink) {
+ printk("ocfs_unlink: hmm, inode has nlink = %u, fe has link_cnt = %u. Setting inode from fe.\n",
+ inode->i_nlink, fe->i_links_count);
+ inode->i_nlink = fe->i_links_count;
}
if (S_ISDIR (inode->i_mode))
- fe->link_cnt = 0;
+ fe->i_links_count = 0;
else
- fe->link_cnt--;
-
- if (!fe->link_cnt) {
+ fe->i_links_count--;
+ if (!fe->i_links_count) {
status = ocfs_orphan_add(osb, handle, inode, fe);
if (status < 0) {
OCFS_BH_PUT_DATA(fe_bh);
@@ -653,7 +684,7 @@
}
if (S_ISDIR (inode->i_mode)) {
- ocfs_file_entry *dirfe;
+ ocfs2_dinode *dirfe;
status = ocfs_journal_access(handle, parent_node_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -661,7 +692,7 @@
goto leave;
}
dirfe = OCFS_BH_GET_DATA_WRITE(parent_node_bh);
- dirfe->link_cnt--;
+ dirfe->i_links_count--;
OCFS_BH_PUT_DATA(parent_node_bh);
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
@@ -855,10 +886,8 @@
int status = 0;
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
- ocfs_file_entry *newfe = NULL, *oldfe = NULL;
- struct buffer_head *oldfe_bh = NULL;
+ ocfs2_dinode *newfe = NULL;
struct buffer_head *newfe_bh = NULL;
- struct buffer_head *insert_bh = NULL;
ocfs_super *osb = NULL;
__u64 oldfe_lockid;
__u64 newfe_lockid = 0;
@@ -970,14 +999,14 @@
goto finally;
status = -EMLINK;
if (!new_inode && new_dir!=old_dir &&
- new_dir->i_nlink >= OCFS_LINK_MAX)
+ new_dir->i_nlink >= OCFS2_LINK_MAX)
goto finally;
}
status = -ENOENT;
- old_de_bh = ocfs_find_entry (old_dentry->d_name.name,
- old_dentry->d_name.len,
- old_dir, &old_de);
+ old_de_bh = ocfs_find_entry(old_dentry->d_name.name,
+ old_dentry->d_name.len,
+ old_dir, &old_de);
if (!old_de_bh)
goto finally;
@@ -992,9 +1021,10 @@
/* check if the target already exists (in which case we need
* to delete it */
- status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name,
- new_dentry->d_name.len, &newfe_lockid,
- new_dir, 0, &new_de_bh, &new_de);
+ status = ocfs_find_files_on_disk(osb, new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ &newfe_lockid, new_dir, 0,
+ &new_de_bh, &new_de);
/* The only error we allow here is -ENOENT because the new
* file not existing is perfectly valid. */
if ((status < 0) && (status != -ENOENT)) {
@@ -1019,7 +1049,7 @@
if (S_ISDIR(new_inode->i_mode))
newfe_flags = FLAG_DIR;
- status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
+ status = ocfs_acquire_lock(osb, OCFS_DLM_EXCLUSIVE_LOCK,
newfe_flags, &newfe_bh,
new_inode);
if (status < 0) {
@@ -1057,12 +1087,12 @@
newfe = OCFS_BH_GET_DATA_WRITE(newfe_bh);
if (S_ISDIR (new_inode->i_mode))
- newfe->link_cnt = 0;
+ newfe->i_links_count = 0;
else
- newfe->link_cnt--;
+ newfe->i_links_count--;
- if (!newfe->link_cnt) {
- status = ocfs_orphan_add(osb, handle, new_inode,
+ if (!newfe->i_links_count) {
+ status = ocfs_orphan_add(osb, handle, new_inode,
newfe);
if (status < 0) {
OCFS_BH_PUT_DATA(newfe_bh);
@@ -1192,15 +1222,8 @@
iput(new_inode);
}
- if (oldfe_bh) {
- if (oldfe)
- OCFS_BH_PUT_DATA(oldfe_bh);
- brelse(oldfe_bh);
- }
if (newfe_bh)
brelse(newfe_bh);
- if (insert_bh)
- brelse(insert_bh);
if (old_dir_bh)
brelse(old_dir_bh);
if (new_dir_bh)
@@ -1231,7 +1254,7 @@
int l;
struct buffer_head *new_fe_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
ocfs_journal_handle *handle = NULL;
int got_lock = 0;
@@ -1289,11 +1312,11 @@
}
fe = OCFS_BH_GET_DATA_READ(new_fe_bh);
- file_off = fe->this_sector;
+ file_off = fe->i_blkno << osb->sb->s_blocksize_bits;
OCFS_BH_PUT_DATA(new_fe_bh);
fe = NULL;
- status = ocfs_extend_file (osb, newsize, file_off, handle, inode, NULL);
+ status = ocfs_extend_file (osb, newsize, file_off, handle, inode, NULL, 0, new_fe_bh);
if (status < 0) {
if (status != -ENOSPC && status != -EINTR) {
LOG_ERROR_ARGS ("Failed to extend file to %llu", newsize);
@@ -1305,12 +1328,12 @@
inode->i_rdev = OCFS_NODEV;
fe = OCFS_BH_GET_DATA_READ(new_fe_bh);
- ocfs_populate_inode (inode, fe, S_IFLNK | S_IRWXUGO, 1);
+ ocfs_populate_inode (inode, fe, 1);
OCFS_BH_PUT_DATA(new_fe_bh);
fe = NULL;
inode->i_size = newsize;
- inode->i_blocks = (newsize + sb->s_blocksize) >> sb->s_blocksize_bits;
+ inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
ocfs_init_lockres (osb, inode);
status = ocfs_inode_notify_open(osb, new_fe_bh, handle, inode);
@@ -1325,6 +1348,7 @@
goto abort_trans;
}
+
abort_trans:
if (status < 0) {
ocfs_abort_trans(handle);
@@ -1349,7 +1373,6 @@
bail:
up_write(&OCFS_I(dir)->ip_io_sem);
-
if (new_fe_bh) {
if (fe)
OCFS_BH_PUT_DATA(new_fe_bh);
@@ -1369,11 +1392,11 @@
const char * error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
- if (rlen < OCFS_DIR_REC_LEN(1))
+ if (rlen < OCFS2_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
else if (rlen % 4 != 0)
error_msg = "rec_len % 4 != 0";
- else if (rlen < OCFS_DIR_REC_LEN(de->name_len))
+ else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
error_msg = "rec_len is too small for name_len";
else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
error_msg = "directory entry across blocks";
@@ -1410,7 +1433,7 @@
struct super_block * sb;
int retval, status;
char *buf = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
sb = dir->i_sb;
@@ -1419,7 +1442,7 @@
bh = ocfs_bread (handle, dir, 0, 0, &retval, 0);
if (!bh)
return retval;
- rec_len = OCFS_DIR_REC_LEN(namelen);
+ rec_len = OCFS2_DIR_REC_LEN(namelen);
offset = 0;
de = (struct ocfs2_dir_entry *) bh->b_data;
while (1) {
@@ -1449,8 +1472,8 @@
/* update the parent file entry file size */
status = ocfs_journal_access(handle, parent_fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
buf = OCFS_BH_GET_DATA_WRITE(parent_fe_bh);
- fe = (ocfs_file_entry *)buf;
- fe->file_size = dir->i_size;
+ fe = (ocfs2_dinode *)buf;
+ fe->i_size = dir->i_size;
OCFS_BH_PUT_DATA(parent_fe_bh);
buf = NULL;
status = ocfs_journal_dirty(handle, parent_fe_bh);
@@ -1472,8 +1495,10 @@
retval = -EEXIST;
goto bail;
}
- if ((le64_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) ||
- (le16_to_cpu(de->rec_len) >= OCFS_DIR_REC_LEN(de->name_len) + rec_len)) {
+ if (((le64_to_cpu(de->inode) == 0) &&
+ (le16_to_cpu(de->rec_len) >= rec_len)) ||
+ (le16_to_cpu(de->rec_len) >=
+ (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
OCFS_BH_PUT_DATA(bh);
buf = NULL;
status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
@@ -1482,14 +1507,14 @@
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
de1 = (struct ocfs2_dir_entry *) ((char *) de +
- OCFS_DIR_REC_LEN(de->name_len));
+ OCFS2_DIR_REC_LEN(de->name_len));
de1->rec_len =
cpu_to_le16(le16_to_cpu(de->rec_len) -
- OCFS_DIR_REC_LEN(de->name_len));
- de->rec_len = cpu_to_le16(OCFS_DIR_REC_LEN(de->name_len));
+ OCFS2_DIR_REC_LEN(de->name_len));
+ de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
- de->file_type = OCFS_FT_UNKNOWN;
+ de->file_type = OCFS2_FT_UNKNOWN;
if (inode_off) {
de->inode = cpu_to_le64(inode_off);
ocfs_set_de_type(dir->i_sb, de, inode->i_mode);
@@ -1532,7 +1557,7 @@
i = 0;
pde = NULL;
- de = (struct ocfs2_dir_entry *) OCFS_BH_GET_DATA_READ(bh);
+ de = OCFS_BH_GET_DATA_READ(bh);
while (i < bh->b_size) {
if (!ocfs_check_dir_entry(dir, de, bh, i)) {
OCFS_BH_PUT_DATA(bh);
@@ -1613,9 +1638,9 @@
-struct buffer_head * ocfs_find_entry (const char *name, int namelen,
- struct inode *dir,
- struct ocfs2_dir_entry ** res_dir)
+struct buffer_head *ocfs_find_entry(const char *name, int namelen,
+ struct inode *dir,
+ struct ocfs2_dir_entry **res_dir)
{
struct super_block * sb;
struct buffer_head * bh_use[NAMEI_RA_SIZE];
@@ -1631,7 +1656,7 @@
*res_dir = NULL;
sb = dir->i_sb;
- nblocks = dir->i_size >> OCFS_SB(sb)->sect_size_bits;
+ nblocks = dir->i_size >> sb->s_blocksize_bits;
start = OCFS_I(dir)->i_dir_start_lookup;
if (start >= nblocks)
start = 0;
@@ -1675,7 +1700,7 @@
goto next;
}
i = search_dirblock(bh, dir, name, namelen,
- block << OCFS_SB(sb)->sect_size_bits, res_dir);
+ block << sb->s_blocksize_bits, res_dir);
if (i == 1) {
OCFS_I(dir)->i_dir_start_lookup = block;
ret = bh;
@@ -1695,7 +1720,7 @@
* search the last part of the directory before giving up.
*/
block = nblocks;
- nblocks = dir->i_size >> OCFS_SB(sb)->sect_size_bits;
+ nblocks = dir->i_size >> sb->s_blocksize_bits;
if (block < nblocks) {
start = 0;
goto restart;
@@ -1712,22 +1737,27 @@
* ocfs_orphan_add()
*
*/
-static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
- struct inode *inode, ocfs_file_entry *fe)
+static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
+ struct inode *inode, ocfs2_dinode *fe)
{
struct inode *orphan_dir_inode = NULL;
struct buffer_head *orphan_dir_bh = NULL;
int status = 0;
char *name = NULL;
int namelen;
- ocfs_file_entry *orphan_fe;
+ ocfs2_dinode *orphan_fe;
LOG_ENTRY_ARGS("(inode->i_ino = %lu)\n", inode->i_ino);
/* create a unique name here. */
- name = ocfs_malloc(OCFS_MAX_FILENAME_LENGTH+1);
+ name = kmalloc(OCFS2_MAX_FILENAME_LENGTH+1, GFP_KERNEL);
+ if (!name) {
+ status = -EFAIL;
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
- namelen = snprintf(name, OCFS_MAX_FILENAME_LENGTH+1, "%llu",
+ namelen = snprintf(name, OCFS2_MAX_FILENAME_LENGTH+1, "%llu",
GET_INODE_FEOFF(inode));
if (namelen <= 0) {
if (namelen)
@@ -1780,8 +1810,8 @@
* underneath us... */
orphan_fe = OCFS_BH_GET_DATA_WRITE(orphan_dir_bh);
if (S_ISDIR(inode->i_mode))
- orphan_fe->link_cnt++;
- orphan_dir_inode->i_nlink = orphan_fe->link_cnt;
+ orphan_fe->i_links_count++;
+ orphan_dir_inode->i_nlink = orphan_fe->i_links_count;
OCFS_BH_PUT_DATA(orphan_dir_bh);
status = ocfs_journal_dirty(handle, orphan_dir_bh);
@@ -1791,14 +1821,14 @@
}
- OCFS_SET_FLAG (fe->sync_flags, OCFS_SYNC_FLAG_ORPHANED);
+ fe->i_flags |= OCFS2_ORPHANED_FL;
leave:
if (orphan_dir_inode)
iput(orphan_dir_inode);
if (name)
- ocfs_free(name);
+ kfree(name);
if (orphan_dir_bh)
brelse(orphan_dir_bh);
@@ -1814,14 +1844,19 @@
{
char *name = NULL;
int namelen;
- ocfs_file_entry *orphan_fe;
+ ocfs2_dinode *orphan_fe;
int status = 0;
struct buffer_head *target_de_bh;
struct ocfs2_dir_entry *target_de = NULL;
- name = ocfs_malloc(OCFS_MAX_FILENAME_LENGTH+1);
+ name = kmalloc(OCFS2_MAX_FILENAME_LENGTH+1, GFP_KERNEL);
+ if (!name) {
+ status = -EFAIL;
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
- namelen = snprintf(name, OCFS_MAX_FILENAME_LENGTH+1, "%llu",
+ namelen = snprintf(name, OCFS2_MAX_FILENAME_LENGTH+1, "%llu",
GET_INODE_FEOFF(inode));
if (namelen <= 0) {
if (namelen)
@@ -1862,8 +1897,8 @@
/* do the i_nlink dance! :) */
orphan_fe = OCFS_BH_GET_DATA_WRITE(orphan_dir_bh);
if (S_ISDIR(inode->i_mode))
- orphan_fe->link_cnt--;
- orphan_dir_inode->i_nlink = orphan_fe->link_cnt;
+ orphan_fe->i_links_count--;
+ orphan_dir_inode->i_nlink = orphan_fe->i_links_count;
OCFS_BH_PUT_DATA(orphan_dir_bh);
status = ocfs_journal_dirty(handle, orphan_dir_bh);
@@ -1874,7 +1909,7 @@
leave:
if (name)
- ocfs_free(name);
+ kfree(name);
return status;
}
@@ -1890,12 +1925,12 @@
int tmpstat;
struct buffer_head *bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
if (S_ISDIR(inode->i_mode))
return -EPERM;
- if (inode->i_nlink >= OCFS_LINK_MAX)
+ if (inode->i_nlink >= OCFS2_LINK_MAX)
return -EMLINK;
#define OCFS_DATA_TRANS_BLOCKS (3 * 8 - 2)
@@ -1933,8 +1968,8 @@
goto bail;
fe = OCFS_BH_GET_DATA_WRITE(bh);
- fe->link_cnt++;
- inode->i_nlink = fe->link_cnt;
+ fe->i_links_count++;
+ inode->i_nlink = fe->i_links_count;
inode->i_ctime = CURRENT_TIME;
atomic_inc(&inode->i_count);
OCFS_BH_PUT_DATA(bh);
Added: trunk/src/namei.h
===================================================================
--- trunk/src/namei.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/namei.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,63 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * namei.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_NAMEI_H
+#define OCFS2_NAMEI_H
+
+int ocfs_check_dir_entry (struct inode *dir,
+ struct ocfs2_dir_entry *de,
+ struct buffer_head *bh, unsigned long offset);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+int ocfs_create (struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
+#else
+int ocfs_create (struct inode *dir, struct dentry *dentry, int mode);
+#endif
+struct buffer_head *ocfs_find_entry(const char *name, int namelen,
+ struct inode *dir,
+ struct ocfs2_dir_entry **res_dir);
+int ocfs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+struct dentry *ocfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd);
+#else
+struct dentry *ocfs_lookup(struct inode *dir, struct dentry *dentry);
+#endif
+int ocfs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+int ocfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+ ocfs_dev dev);
+int ocfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
+int ocfs_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname);
+int ocfs_unlink(struct inode *dir, struct dentry *dentry);
+int ocfs_orphan_del(ocfs_super *osb, ocfs_journal_handle *handle,
+ struct inode *orphan_dir_inode, struct inode *inode,
+ struct buffer_head *orphan_dir_bh);
+
+#endif /* OCFS2_NAMEI_H */
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/nm.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* nm.c
*
* net and disk process vote, nm thread, etc.
@@ -24,12 +26,31 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/bitops.h>
+#include <linux/net.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "dlm.h"
+#include "heartbeat.h"
+#include "inode.h"
+#include "lockres.h"
+#include "nm.h"
+#include "util.h"
+#include "volcfg.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NM
@@ -169,7 +190,7 @@
ocfs_process_vote(osb, &(sv->ctxt));
/* we should free this when done. */
- ocfs_free(sv);
+ kfree(sv);
LOG_EXIT();
return;
@@ -211,7 +232,7 @@
ctxt = &sv->ctxt;
ctxt->u.publish = (ocfs_publish *) &sv->publish_sect;
- memcpy(ctxt->u.publish, publish, osb->sect_size);
+ memcpy(ctxt->u.publish, publish, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(bh);
ctxt->request_method = DISK_VOTE;
@@ -231,8 +252,8 @@
/* if no error, then the workqueue should clear it? */
if ((status < 0) && ctxt) {
if (ctxt->u.publish)
- ocfs_free(ctxt->u.publish);
- ocfs_free(ctxt);
+ kfree(ctxt->u.publish);
+ kfree(ctxt);
}
LOG_EXIT_STATUS(status);
@@ -257,11 +278,10 @@
__u32 i;
unsigned long j;
__u32 highest_vote_node;
- __u64 offset = 0;
+ __u64 offset = 0, len = 0;
__u32 num_nodes = 0;
__u32 vote_node;
ocfs_node_config_hdr *node_cfg_hdr = NULL;
- __u64 curr_node_map;
__u64 cfg_seq_num;
int which;
struct buffer_head *bh = NULL;
@@ -286,9 +306,6 @@
vote_node = OCFS_INVALID_NODE_NUM;
highest_vote_node = 0;
- if (OcfsGlobalCtxt.hbm == 0)
- OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM;
-
if (!time_after (jiffies, (unsigned long) (osb->hbt)))
goto finally;
@@ -296,17 +313,18 @@
down (&(osb->publish_lock));
/* Get the Publish Sector start Offset */
- offset = osb->vol_layout.new_cfg_off;
+ offset = osb->new_autoconfig_blkno << osb->sb->s_blocksize_bits;
+ len = osb->total_autoconfig_blocks << osb->sb->s_blocksize_bits;
- /* Read disk for Publish Sectors of all nodes */
- status = ocfs_read_bhs(osb, offset, osb->cfg_len, osb->cfg_bhs, 0, NULL);
+ /* Read disk for 4 autoconfig blocks + all nodes publish blocks */
+ status = ocfs_read_bhs(osb, offset, len, osb->autoconfig_bhs, 0, NULL);
if (status < 0) {
up (&(osb->publish_lock));
LOG_ERROR_STATUS (status);
goto finally;
}
- bh = osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num];
+ bh = osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num];
publish = OCFS_BH_GET_DATA_READ(bh);
if ((osb->check_mounted) && (publish->mounted == 0)) {
printk("ocfs2: Heartbeat timed out, volume has been "
@@ -323,10 +341,10 @@
up (&(osb->publish_lock));
/* If another node was added to the config read and update the cfg */
- node_cfg_hdr = OCFS_BH_GET_DATA_READ(osb->cfg_bhs[1]);
+ node_cfg_hdr = OCFS_BH_GET_DATA_READ(osb->autoconfig_bhs[1]);
num_nodes = node_cfg_hdr->num_nodes;
cfg_seq_num = node_cfg_hdr->cfg_seq_num;
- OCFS_BH_PUT_DATA(osb->cfg_bhs[1]);
+ OCFS_BH_PUT_DATA(osb->autoconfig_bhs[1]);
if ((osb->cfg_seq_num != cfg_seq_num) ||
(osb->num_cfg_nodes != num_nodes)) {
@@ -339,10 +357,10 @@
}
}
- num_nodes = OCFS_MAXIMUM_NODES;
+ num_nodes = osb->max_nodes;
/* Refresh the publish map */
- ocfs_update_publish_map (osb, &(osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS]), 0);
+ ocfs_update_publish_map (osb, &(osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS]), 0);
/* send signal to mount thread to continue */
if (atomic_read (&osb->nm_init) < OCFS_HEARTBEAT_INIT) {
@@ -352,21 +370,19 @@
atomic_inc (&osb->nm_init);
}
- LOG_TRACE_ARGS ("Publish map: 0x%08X\n", (__u32)(osb->publ_map & 0xFFFFFFFFULL));
-
- /* map of local node */
- curr_node_map = (__u64) ((__u64)1 << osb->node_num);
-
/* Check for the highest node looking for a vote, if anybody is looking */
for (i = 0, which = OCFS_VOLCFG_NEWCFG_SECTORS; i < num_nodes; i++, which++) {
- publish = OCFS_BH_GET_DATA_READ(osb->cfg_bhs[which]);
+ ocfs_node_map vote_map;
+ publish = OCFS_BH_GET_DATA_READ(osb->autoconfig_bhs[which]);
if (publish->time == (__u64) 0)
goto loop;
- if (publish->vote != FLAG_VOTE_NODE ||
- !(publish->vote_map & curr_node_map))
+ if (publish->vote != FLAG_VOTE_NODE)
goto loop;
+ ocfs_get_publish_vote_map(osb, publish, &vote_map);
+ if (!ocfs_node_map_test_bit(&vote_map, osb->node_num))
+ goto loop;
LOG_TRACE_ARGS ("node(%u): vote=%d dirty=%d type=%u\n",
i, publish->vote, publish->dirty,
@@ -375,11 +391,11 @@
highest_vote_node = i;
/* Check if the node is alive or not */
- if (IS_NODE_ALIVE (osb->publ_map, highest_vote_node,
- num_nodes)) {
+ if (ocfs_node_is_alive(&osb->publ_map,
+ highest_vote_node)) {
vote_node = highest_vote_node;
} else {
- OCFS_BH_PUT_DATA(osb->cfg_bhs[which]);
+ OCFS_BH_PUT_DATA(osb->autoconfig_bhs[which]);
publish = NULL;
/* I guess we don't even have to do
* anything here... */
@@ -390,7 +406,7 @@
}
loop:
publish = NULL;
- OCFS_BH_PUT_DATA(osb->cfg_bhs[which]);
+ OCFS_BH_PUT_DATA(osb->autoconfig_bhs[which]);
}
@@ -399,7 +415,7 @@
__s32 voted;
LOG_TRACE_ARGS("vote_node = %d\n", vote_node);
- bh = osb->cfg_bhs[OCFS_VOLCFG_NEWCFG_SECTORS
+ bh = osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS
+ osb->node_num];
down(&(osb->publish_lock));
@@ -422,7 +438,7 @@
up(&(osb->publish_lock));
which = vote_node + OCFS_VOLCFG_NEWCFG_SECTORS;
- bh = osb->cfg_bhs[which];
+ bh = osb->autoconfig_bhs[which];
status= ocfs_schedule_process_vote(osb, bh, vote_node);
if (status < 0) {
@@ -444,23 +460,10 @@
}
set_current_state (TASK_INTERRUPTIBLE);
schedule_timeout (osb->hbt - j);
- /* this should all just be in some common function as
- * it's totally duplicated. */
+
+ /* ignore the actual signal */
if (signal_pending(current)) {
- /* ignore the actual signal */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
dequeue_signal_lock(current, ¤t->blocked, &info);
-#else
-#ifdef HAVE_NPTL
- spin_lock_irq (¤t->sighand->siglock);
- dequeue_signal(¤t->blocked, &info);
- spin_unlock_irq(¤t->sighand->siglock);
-#else
- spin_lock_irq(¤t->sigmask_lock);
- dequeue_signal(¤t->blocked, &info);
- spin_unlock_irq(¤t->sigmask_lock);
-#endif /* !HAVE_NPTL */
-#endif /* 2.4.x kernel */
}
}
@@ -500,9 +503,11 @@
return INVALID_REQUEST;
}
} else if (lockres) {
- *master_alive = lockres->master_node_num != OCFS_INVALID_NODE_NUM &&
- IS_NODE_ALIVE(osb->publ_map,
- lockres->master_node_num, OCFS_MAXIMUM_NODES);
+ *master_alive =
+ (lockres->master_node_num !=
+ OCFS_INVALID_NODE_NUM) &&
+ ocfs_node_is_alive(&osb->publ_map,
+ lockres->master_node_num);
// if an outstanding vote request is found on this lockid
// and this node number is higher, this node wins
@@ -719,9 +724,9 @@
int status = 0;
int tmpstat = 0;
ocfs_lock_res *lockres = NULL;
- __u32 flags, num_nodes, i;
+ __u32 flags, num_nodes;
__u64 offset;
- ocfs_file_entry *fe = NULL;
+ ocfs2_dinode *fe = NULL;
ocfs_vote *vote = NULL;
struct buffer_head *fe_bh = NULL, *vote_bh = NULL;
int vote_type = INVALID_REQUEST, vote_response = 0;
@@ -738,6 +743,7 @@
__u32 node_num = ctxt->node_num;
__u64 lock_id, seq_num;
int needs_trunc = 0;
+ ocfs_node_map disk_map;
LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, ctxt);
@@ -751,10 +757,10 @@
return status;
}
- num_nodes = OCFS_MAXIMUM_NODES;
+ num_nodes = osb->max_nodes;
if (disk_vote) {
flags = publish->vote_type;
- lock_id = publish->dir_ent;
+ lock_id = publish->lock_id;
seq_num = publish->publ_seq_num;
} else {
ocfs_dlm_req_master *req_master = (ocfs_dlm_req_master *)dlm_msg->msg_buf;
@@ -762,13 +768,12 @@
lock_id = req_master->lock_id;
seq_num = req_master->lock_seq_num;
}
- lockflags = (lock_id >= osb->vol_layout.bitmap_off ? OCFS_BH_CACHED : 0);
LOG_TRACE_ARGS ("node=%u, id=%llu, seq=%llu\n", node_num,
lock_id, seq_num);
if (disk_vote) {
- offset = osb->vol_layout.vote_sect_off + (osb->node_num * osb->sect_size);
+ offset = (osb->vote_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, offset, &vote_bh, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -819,12 +824,14 @@
}
}
+ lockflags = (IS_SYSTEM_FILE_INODE(inode) ? 0 : OCFS_BH_CACHED);
+
/* fail here if no inode, unless this is a delete/rename release */
vote_type = get_process_vote_action(osb, lockres, node_num, flags,
status, &master_alive, inode);
#ifdef VERBOSE_PROCESS_VOTE
- printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s\n", ocfs_getpid(),
+ printk("(%u) ocfs_process_vote: %s request for lockid: %llu, action: (%u) %s, type: %s\n", current->pid,
flags & FLAG_RELEASE_LOCK ? "RELEASE" :
(flags & FLAG_ACQUIRE_LOCK ? "ACQUIRE" : "MODIFY"), lock_id,
vote_type, process_vote_strings[vote_type], disk_vote ? "disk vote" : "net vote" );
@@ -844,8 +851,8 @@
/* Zero out the vote for everybody, if any already set
* and hung */
vote = OCFS_BH_GET_DATA_WRITE(vote_bh);
- for (i = 0; i < num_nodes; i++)
- vote->vote[i] = 0;
+ vote->node = (__u8)-1;
+ vote->type = 0;
OCFS_BH_PUT_DATA(vote_bh);
status = ocfs_write_bh(osb, vote_bh, 0, NULL);
if (status < 0) {
@@ -980,8 +987,8 @@
LOG_ERROR_STR("Wowzers, how'd I hit this code:"
" master node is invalid!?!");
}
- if (lockres->master_node_num == osb->node_num &&
- lockres->lock_type < OCFS_DLM_EXCLUSIVE_LOCK) {
+ if ((lockres->master_node_num == osb->node_num) &&
+ (lockres->lock_type < OCFS_DLM_EXCLUSIVE_LOCK)) {
LOG_ERROR_STR("Wowzers, how'd I hit this "
"code!?!");
}
@@ -1004,7 +1011,7 @@
#endif
#endif
}
- lockres->readonly_map |= (1 << node_num);
+ ocfs_node_map_set_bit(&lockres->readonly_map, node_num);
lockres->readonly_node = osb->node_num;
vote_response = FLAG_VOTE_NODE;
status = 0;
@@ -1041,12 +1048,12 @@
* map that lock will be changing to RW before we
* continue. RETRY this request while we spawn
* off a thread to collect up the communication */
- if (lockres->readonly_map != 0ULL) {
+ if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
// assumption: node asking for vote has already dropped readonly_node
- lockres->readonly_map &= ~(1 << node_num);
+ ocfs_node_map_clear_bit(&lockres->readonly_map, node_num);
// should not be in there, but...
- lockres->readonly_map &= ~(1 << osb->node_num);
- if (lockres->readonly_map != 0ULL) {
+ ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
+ if (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
OCFS_ASSERT(lockres->readonly_node == osb->node_num);
OCFS_ASSERT(inode);
status = ocfs_drop_readonly_cache_lock(osb, inode, 1);
@@ -1085,14 +1092,18 @@
}
fe = OCFS_BH_GET_DATA_READ(fe_bh);
- is_dir = fe->attribs & OCFS_ATTRIB_DIRECTORY;
- is_locked = DISK_LOCK_FILE_LOCK (fe) > OCFS_DLM_NO_LOCK;
+ is_dir = S_ISDIR(fe->i_mode);
+ is_locked = DISK_LOCK(fe)->dl_level > OCFS_DLM_NO_LOCK;
if (vote_type == CHANGE_MASTER) {
OCFS_BH_PUT_DATA(fe_bh);
fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- if (inode && OCFS_I(inode)->open_hndl_cnt)
- DISK_LOCK_OIN_MAP (fe) |= (1 << osb->node_num);
- DISK_LOCK_CURRENT_MASTER (fe) = node_num;
+ if (inode && OCFS_I(inode)->open_hndl_cnt) {
+ ocfs_node_map_init(osb, &disk_map);
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+ ocfs_node_map_set_bit(&disk_map, osb->node_num);
+ ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+ }
+ DISK_LOCK(fe)->dl_master = node_num;
}
OCFS_BH_PUT_DATA(fe_bh);
@@ -1104,7 +1115,7 @@
if (vote_type == RELEASE_CACHE) {
LOG_TRACE_STR("release cache vote, setting to NO_LOCK");
fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK;
+ DISK_LOCK(fe)->dl_level = OCFS_DLM_NO_LOCK;
OCFS_BH_PUT_DATA(fe_bh);
}
status = ocfs_write_bh(osb, fe_bh, 0, inode);
@@ -1134,16 +1145,16 @@
}
fe = OCFS_BH_GET_DATA_READ(fe_bh);
- if (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID)) {
+ if (!(fe->i_flags & OCFS2_VALID_FL)) {
vote_response = FLAG_VOTE_FILE_DEL;
OCFS_BH_PUT_DATA(fe_bh);
} else {
- __u64 tmpmap;
-
+ ocfs_node_map_init(osb, &disk_map);
OCFS_BH_PUT_DATA(fe_bh);
fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- DISK_LOCK_OIN_MAP (fe) |= (1 << node_num);
- tmpmap = DISK_LOCK_OIN_MAP (fe);
+ ocfs_get_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
+ ocfs_node_map_set_bit(&disk_map, node_num);
+ ocfs_set_disk_lock_open_map(osb, DISK_LOCK(fe), &disk_map);
OCFS_BH_PUT_DATA(fe_bh);
/* Write new map on the disk */
@@ -1155,7 +1166,7 @@
}
/* Add this node to the oin map on the file entry */
- lockres->oin_openmap = tmpmap;
+ ocfs_node_map_set(&lockres->oin_openmap, &disk_map);
vote_response = FLAG_VOTE_NODE;
}
brelse(fe_bh);
@@ -1179,15 +1190,16 @@
* even in the error case. do nothing for error. */
if (lockres->master_node_num != node_num ||
lockres->lock_type != OCFS_DLM_ENABLE_CACHE_LOCK ||
- lockres->readonly_map != 0ULL)
- LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d map=%16llX ronode=%d\n",
+ !ocfs_node_map_is_empty(&lockres->readonly_map))
+ LOG_ERROR_ARGS("(drop-ro) master=%d node_num=%d locktype=%d ronode=%d\n",
lockres->master_node_num, node_num, lockres->lock_type,
- lockres->readonly_map, lockres->readonly_node);
+ lockres->readonly_node);
else
lockres->readonly_node = OCFS_INVALID_NODE_NUM;
status = 0;
vote_response = FLAG_VOTE_NODE;
+ inc_inode_seq = 1;
break;
case NOT_MASTER:
@@ -1244,10 +1256,11 @@
if (disk_vote) {
vote = OCFS_BH_GET_DATA_WRITE(vote_bh);
- vote->dir_ent = lock_id;
+ vote->lock_id = lock_id;
vote->vote_seq_num = seq_num;
vote->open_handle = open_handle;
- vote->vote[node_num] = vote_response;
+ vote->node = node_num;
+ vote->type = vote_response;
OCFS_BH_PUT_DATA(vote_bh);
status = ocfs_write_bh(osb, vote_bh, 0, NULL);
brelse(vote_bh);
@@ -1256,7 +1269,7 @@
}
#ifdef VERBOSE_PROCESS_VOTE
- printk("(%u) vote: lockid=%llu, node=%d, seqnum=%llu, response=%d, open_handle=%s\n",ocfs_getpid(), lock_id, node_num, seq_num, vote_response, open_handle?"yes":"no");
+ printk("(%u) vote: lockid=%llu, node=%d, seqnum=%llu, response=%d, open_handle=%s\n",current->pid, lock_id, node_num, seq_num, vote_response, open_handle?"yes":"no");
#endif
if (status < 0)
@@ -1440,13 +1453,15 @@
}
lockres->lock_state |= FLAG_READONLY_DROPPING;
- lockres->readonly_map &= ~(1 << osb->node_num); /* remove this node */
+ /* remove this node */
+ ocfs_node_map_clear_bit(&lockres->readonly_map, osb->node_num);
status = 0;
- while (lockres->readonly_map != 0ULL) {
+ while (!ocfs_node_map_is_empty(&lockres->readonly_map)) {
int disk_vote = 0;
- lockres->readonly_map &= osb->publ_map; /* remove all dead nodes */
+ /* remove all dead nodes */
+ ocfs_node_map_and(&lockres->readonly_map, &osb->publ_map);
status = new_lock_function(osb, OCFS_DLM_ENABLE_CACHE_LOCK, FLAG_DROP_READONLY,
NULL, &disk_vote, inode);
if (status == -EAGAIN) {
@@ -1465,8 +1480,8 @@
LOG_ERROR_STATUS (status);
break;
}
-
- if (lockres->readonly_map == 0ULL &&
+
+ if (ocfs_node_map_is_empty(&lockres->readonly_map) &&
lockres->readonly_node == osb->node_num)
lockres->readonly_node = OCFS_INVALID_NODE_NUM;
@@ -1495,23 +1510,9 @@
ocfs_daemonize (OCFS_DROP_RO_THREAD_NAME, strlen(OCFS_DROP_RO_THREAD_NAME));
status = _ocfs_drop_readonly_cache_lock(arg);
- /* this should all just be in some common function as
- * it's totally duplicated. */
+ /* ignore the actual signal */
if (signal_pending(current)) {
- /* ignore the actual signal */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
dequeue_signal_lock(current, ¤t->blocked, &info);
-#else
-#ifdef HAVE_NPTL
- spin_lock_irq (¤t->sighand->siglock);
- dequeue_signal(¤t->blocked, &info);
- spin_unlock_irq(¤t->sighand->siglock);
-#else
- spin_lock_irq(¤t->sigmask_lock);
- dequeue_signal(¤t->blocked, &info);
- spin_unlock_irq(¤t->sigmask_lock);
-#endif /* !HAVE_NPTL */
-#endif /* 2.4.x kernel */
}
/* Flush all scheduled tasks */
@@ -1522,5 +1523,12 @@
#endif
#warning need a way to wait on all of these threads on dismount
+/*
+ * The way to do this is to create a wait queue on the osb. When one of
+ * these guys start, you bump a counter. When it ends, it decrements
+ * the counter and wake_up()s the wait queue. The counter probably can
+ * be protected by a spinlock on the OSB. The dismount handling just
+ * waits on that wait queue until readonly_threads == 0.
+ */
return status;
}
Added: trunk/src/nm.h
===================================================================
--- trunk/src/nm.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/nm.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * nm.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_NM_H
+#define OCFS2_NM_H
+
+int ocfs_drop_readonly_cache_lock(ocfs_super *osb, struct inode *inode,
+ int yield);
+void ocfs_inc_inode_seq(ocfs_super *osb, struct inode *inode,
+ int sync_buffers);
+int ocfs_process_vote(ocfs_super *osb, ocfs_vote_request_ctxt *ctxt);
+int ocfs_recv_thread(void *unused);
+void ocfs_recover_oin_locks(ocfs_super *osb, __u32 node_num);
+int ocfs_volume_thread(void *arg);
+
+#endif /* OCFS2_NM_H */
Added: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,1440 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs.h
+ *
+ * Defines macros and structures used in ocfs
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS_H
+#define OCFS_H
+
+/*
+** System header files
+*/
+#define __KERNEL_SYSCALLS__
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+# include <linux/workqueue.h>
+#else
+# include <linux/tqueue.h>
+#endif
+
+
+#define OCFS_BITS_IN_CHUNK(sb) ((sb)->s_blocksize * 8)
+
+typedef struct _ocfs_alloc_bm
+{
+ __u32 validbits; /* number of valid bits */
+ __u32 allocbits; /* number of allocated bits */
+ __u32 failed;
+ __u32 ok_retries;
+ /* 'numbh' is the number of buffer heads in chunk. We keep
+ * around enough buffer heads to cover the entire alloc'd size
+ * of the bitmap, even though we may only ever care about the
+ * valid size */
+ __u32 numbh;
+ struct buffer_head **chunk;
+}
+ocfs_alloc_bm;
+
+
+
+#define OCFS_POINTER_SIZE (sizeof(void *))
+
+enum
+{
+ OCFS_VOTE_REQUEST = 1,
+ OCFS_VOTE_REPLY,
+ OCFS_INFO_DISMOUNT
+};
+
+enum {
+ DISK_VOTE,
+ COMM_VOTE
+};
+
+enum {
+ INVALID_REQUEST, // reply with a NO vote
+ UPDATE_OIN_INODE, // update both oin and inode
+ DELETE_ACQUIRE,// delete or rename acquire request
+ DELETE_RELEASE,// delete or rename release request
+ RELEASE_CACHE, // release a cache lock I hold
+ CHANGE_MASTER, // request to change master to requestor
+ ADD_OIN_MAP, // add requestor into oin map
+ NOT_MASTER, // I am not master, retry
+ REMASTER_THIS, // remaster lock to me
+ REMASTER_REQUESTOR, // remaster lock to requestor
+ DROP_READONLY, // RO cachelock needs to convert to RW
+ READONLY, // a RW or RO cachelock, requesting RO
+ RELEASE_DENTRY
+};
+
+#define OCFS_MAX_DLM_PKT_SIZE 256
+#define OCFS_DLM_MAX_MSG_SIZE 256
+#define OCFS_DLM_MSG_MAGIC 0x79677083
+
+enum {
+ OSB_DATA_LOCK,
+ OSB_MD_LOCK,
+ OSB_CFG_LOCK,
+ OSB_LOG_LOCK
+};
+
+
+/* convenience macro */
+
+#define OCFS_ASSERT(x) do { if (!(x)) BUG(); } while (0)
+
+
+#define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+#define BX_(x) ((x) - (((x)>>1)&0x77777777) \
+ - (((x)>>2)&0x33333333) \
+ - (((x)>>3)&0x11111111))
+
+
+#ifndef list_for_each_prev_safe
+#define list_for_each_prev_safe(pos, n, head) \
+ for (pos = (head)->prev, n = pos->prev; pos != (head); \
+ pos = n, n = pos->prev)
+#endif
+
+
+#define OCFS_CURRENT_TIME ocfs_get_seconds(CURRENT_TIME)
+#define OCFS_SET_INODE_TIME(i, x, y) (ocfs_get_seconds(i->x) = (y))
+
+
+#define ONE_MEGA_BYTE (1 * 1024 * 1024) /* in bytes */
+
+#define MISS_COUNT_VALUE 30
+
+/*
+** The following flag values reflect the operation to be performed
+** by ocfs_create_modify_file
+*/
+// FILEFLAG MASK
+#define FLAG_FILE_CREATE 0x00000001
+#define FLAG_FILE_EXTEND 0x00000002
+#define FLAG_FILE_DELETE 0x00000004
+#define FLAG_FILE_RENAME 0x00000008
+#define FLAG_FILE_UPDATE 0x00000010
+#define FLAG_FILE_RECOVERY 0x00000020
+#define FLAG_FILE_CREATE_DIR 0x00000040
+#define FLAG_FILE_UPDATE_OIN 0x00000080
+#define FLAG_FILE_RELEASE_MASTER 0x00000100
+#define FLAG_RELEASE_DENTRY 0x00000200
+#define FLAG_CHANGE_MASTER 0x00000400
+#define FLAG_ADD_OIN_MAP 0x00000800
+#define FLAG_DIR 0x00001000
+#define FLAG_REMASTER 0x00002000
+#define FLAG_FAST_PATH_LOCK 0x00004000
+#define FLAG_FILE_UNUSED5 0x00008000
+#define FLAG_FILE_UNUSED6 0x00010000
+//#define FLAG_DEL_NAME 0x00020000
+//#define FLAG_DEL_INODE 0x00040000
+#define FLAG_FILE_UNUSED7 0x00080000
+#define FLAG_FILE_UNUSED8 0x00100000
+#define FLAG_FILE_UNUSED9 0x00200000
+#define FLAG_FILE_RELEASE_CACHE 0x00400000
+#define FLAG_FILE_UNUSED10 0x00800000
+#define FLAG_FILE_UNUSED11 0x01000000
+#define FLAG_FILE_UNUSED12 0x02000000
+#define FLAG_FILE_UNUSED13 0x04000000
+#define FLAG_FILE_TRUNCATE 0x08000000
+#define FLAG_DROP_READONLY 0x10000000
+#define FLAG_READDIR 0x20000000
+#define FLAG_ACQUIRE_LOCK 0x40000000
+#define FLAG_RELEASE_LOCK 0x80000000
+
+
+
+#define OCFS_MAXIMUM_NODES 32
+#define OCFS_MAX_OSB_ID 65536
+
+
+#define HEARTBEAT_METHOD_DISK (1)
+#define HEARTBEAT_METHOD_IPC (2)
+
+
+enum
+{
+ LEFT_NO_OVERLAP,
+ LEFT_ADJACENT,
+ LEFT_OVERLAP,
+ FULLY_CONTAINED,
+ FULLY_CONTAINING,
+ RIGHT_OVERLAP,
+ RIGHT_ADJACENT,
+ RIGHT_NO_OVERLAP
+};
+
+
+/*
+** Extents Defines
+*/
+
+typedef enum _ocfs_ext_flag {
+ LOCAL_EXT = 1,
+ NONLOCAL_EXT = 2
+} ocfs_ext_flag;
+
+/*
+** Structure signatures
+*/
+#define OCFS_TYPE_OSB (0x05534643)
+#define OCFS_TYPE_GLOBAL_DATA (0x07534643)
+
+// LOCKTYPE ONE
+#define OCFS_DLM_NO_LOCK (0x0)
+#define OCFS_DLM_EXCLUSIVE_LOCK (0x2)
+#define OCFS_DLM_ENABLE_CACHE_LOCK (0x8)
+
+#define OCFS_INVALID_NODE_NUM UINT_MAX
+
+typedef enum _ocfs_rw_mode
+{
+ OCFS_READ,
+ OCFS_WRITE
+}
+ocfs_rw_mode;
+
+
+/* lockres->lock_state flags */
+#define FLAG_READONLY_DROPPING 0x00000008
+
+/* osb->osb_flags flags */
+#define OCFS_OSB_FLAGS_BEING_DISMOUNTED (0x00000004)
+#define OCFS_OSB_FLAGS_SHUTDOWN (0x00000008)
+#define OCFS_OSB_FLAGS_OSB_INITIALIZED (0x00000020)
+
+/* OcfsGlobalCtxt.flags flags */
+#define OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED (0x00000001)
+#define OCFS_FLAG_MEM_LISTS_INITIALIZED (0x00000002)
+#define OCFS_FLAG_SHUTDOWN_VOL_THREAD (0x00000004)
+
+/*
+** Information on Publish sector of each node
+*/
+#define DISK_HBEAT_COMM_ON 20 /* in the order of 5 secs */
+#define DISK_HBEAT_NO_COMM 4 /* in the order of 1 sec */
+#define DISK_HBEAT_INVALID 0 /* in the order of 100ms */
+
+
+/*
+** Information on Vote sector of each node
+*/
+// VOTEFLAG MASK
+#define FLAG_VOTE_NODE 0x1
+#define FLAG_VOTE_OIN_UPDATED 0x2
+#define FLAG_VOTE_OIN_ALREADY_INUSE 0x4
+#define FLAG_VOTE_UPDATE_RETRY 0x8
+#define FLAG_VOTE_FILE_DEL 0x10
+
+
+#define INVALID_NODE_POINTER -1
+#define OCFS_LOCAL_ALLOC_SIGNATURE "LCLBMP"
+
+#define MAX_IP_ADDR_LEN 32
+
+#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | \
+ sigmask(SIGINT) | sigmask(SIGQUIT))
+
+#define EFAIL 999
+#define EWARNING 998
+
+#define OCFS_MAGIC 0xa156f7eb
+
+#define OCFS_LINUX_MAX_FILE_SIZE 9223372036854775807LL
+#define INITIAL_EXTENT_MAP_SIZE 10
+
+#define OCFS_UNUSED_INODE_NUMBER 1
+
+#define OCFS_VOLCFG_LOCK_ITERATE (HZ/10) /* in jiffies */
+#define OCFS_VOLCFG_LOCK_TIME 1000 /* in ms */
+#define OCFS_VOLCFG_HDR_SECTORS 2 /* in sectors */
+#define OCFS_VOLCFG_NEWCFG_SECTORS 4 /* in sectors */
+
+#define OCFS_NM_HEARTBEAT_TIME 500 /* in ms */
+#define OCFS_HEARTBEAT_INIT 10 /* number of NM iterations to stabilize the publish map */
+
+#ifndef O_DIRECT
+#warning this depends on the architecture!
+#define O_DIRECT 040000
+#endif
+
+#define NOT_MOUNTED_EXCLUSIVE (-1)
+
+
+#define BLOCKS_PER_CLEAN_LIST ( ((PAGE_SIZE-sizeof(void *))/sizeof(unsigned long)) >> 3 )
+
+#define IORUN_ALLOC_SIZE (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs))
+
+#ifndef _OCFSDEF_H_
+#define _OCFSDEF_H_
+
+/* sm - ocfs 1.0 fails to set fe->sig for dirs */
+#define IS_VALID_FILE_ENTRY(ptr) \
+ (!strcmp((ptr)->i_signature, OCFS2_FILE_ENTRY_SIGNATURE))
+#define IS_VALID_EXTENT_BLOCK(ptr) \
+ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
+
+#define IS_VALID_NODE_NUM(node) \
+ (((node) >= 0) && ((node) < OCFS_MAXIMUM_NODES))
+
+
+#define down_with_flag(_sem, _flg) \
+ do { \
+ if (!_flg) { \
+ down (_sem); \
+ _flg = 1; \
+ } \
+ } while (0)
+
+#define up_with_flag(_sem, _flg) \
+ do { \
+ if (_flg) { \
+ up (_sem); \
+ _flg = 0; \
+ } \
+ } while (0)
+
+#define ocfs_task_interruptible(_o) ((_o)->dlm_task != current && signal_pending(current))
+
+/*
+** Macros
+*/
+#define OCFS_SET_FLAG(flag, value) ((flag) |= (value))
+#define OCFS_CLEAR_FLAG(flag, value) ((flag) &= ~(value))
+
+
+/*
+** Structures...
+*/
+
+
+static inline void *ocfs_malloc(size_t size)
+{
+ void *p = kmalloc(size, GFP_KERNEL);
+ if (p)
+ memset(p, 0, size);
+ return p;
+}
+
+
+typedef enum _ocfs_protocol
+{
+ OCFS_TCP = 1,
+ OCFS_UDP
+}
+ocfs_protocol;
+
+
+#define ocfs_allocate_extent_entry() (ocfs_extent *)kmem_cache_alloc ( \
+ OcfsGlobalCtxt.extent_cache, GFP_NOFS)
+#define ocfs_free_extent_entry(ext) kmem_cache_free(OcfsGlobalCtxt.extent_cache, ext)
+
+
+#define OCFS_NAME "OCFS2"
+
+/* ioctl commands */
+#define OCFS_IOC_MAGIC 'O'
+#define OCFS_IOC_GETTYPE _IOR(OCFS_IOC_MAGIC, 1, struct ocfs_ioc)
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+typedef long sector_t;
+#define map_bh(bh, sb, blk) \
+ ({ \
+ bh->b_dev = sb->s_dev; \
+ bh->b_blocknr = blk; \
+ bh->b_state |= (1UL << BH_Mapped); \
+ })
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+typedef dev_t ocfs_dev;
+#define OCFS_NODEV 0
+#else /* 2.4 kernel */
+typedef int ocfs_dev;
+#define OCFS_NODEV NODEV
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+/* No longer exists in 2.5 */
+#define fsync_inode_buffers(inode) sync_mapping_buffers(inode->i_mapping)
+#endif /* >= 2.6.0 */
+
+#define OCFS_SB(sb) ((ocfs_super *)OCFS_GENERIC_SB_MEMBER(sb))
+
+
+#define OCFS_MINOR_VERSION (0)
+#define OCFS_MAJOR_VERSION (2)
+#define OCFS_MINOR_VER_STRING "0"
+#define OCFS_MAJOR_VER_STRING "2"
+
+#define OCFS_VOLUME_SIGNATURE "OracleCFS"
+#define MAX_VOL_SIGNATURE_LEN 128
+#define MAX_MOUNT_POINT_LEN 128
+
+#define DLOCK_FLAG_OPEN_MAP (0x1)
+#define DLOCK_FLAG_LOCK (0x2)
+#define DLOCK_FLAG_SEQ_NUM (0x4)
+#define DLOCK_FLAG_MASTER (0x8)
+#define DLOCK_FLAG_LAST_UPDATE (0x10)
+#define DLOCK_FLAG_ADD_SELF (0x20)
+#define DLOCK_FLAG_ALL (DLOCK_FLAG_OPEN_MAP | DLOCK_FLAG_LOCK | \
+ DLOCK_FLAG_SEQ_NUM | DLOCK_FLAG_MASTER | \
+ DLOCK_FLAG_LAST_UPDATE)
+
+
+
+#define OCFS_IPC_DEFAULT_PORT 7001
+
+
+#define OCFS_IPC_DLM_VERSION 0x0201
+
+
+/* =========================================================== */
+
+#include "ocfs2_fs.h"
+
+typedef struct _ocfs_vol_disk_hdr // CLASS
+{
+ __u32 minor_version; // NUMBER RANGE(0,UINT_MAX)
+ __u32 major_version; // NUMBER RANGE(0,UINT_MAX)
+ __u8 signature[MAX_VOL_SIGNATURE_LEN]; // CHAR[MAX_VOL_SIGNATURE_LEN]
+ __u8 mount_point[MAX_MOUNT_POINT_LEN]; // CHAR[MAX_MOUNT_POINT_LEN]
+ __u64 serial_num; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Size of the device in bytes */
+ __u64 device_size; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Start of the volume... typically 0 */
+ __u64 start_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Offset to Volume Bitmap... */
+ __u64 bitmap_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Offset to the Publish Sector */
+ __u64 publ_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Offset to the Vote Sector */
+ __u64 vote_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 root_bitmap_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 data_start_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 root_bitmap_size; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 root_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 root_size; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Cluster size as specified during format */
+ __u64 cluster_size; // CLUSTERSIZE
+ /* Max number of nodes.... OCFS_MAXIMUM_NODES */
+ __u64 num_nodes; // NUMBER RANGE(0,32)
+ /* Number of free clusters at format */
+ __u64 num_clusters; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* OCFS_DEFAULT_FILE_NODE_SIZE */
+ __u64 file_node_size; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 internal_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Offset to Node Config */
+ __u64 node_cfg_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Size of Node Config */
+ __u64 node_cfg_size; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* Offset to Node Config Lock */
+ __u64 new_cfg_off; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u32 prot_bits; // PERMS
+ __s32 excl_mount; // NODENUM
+}
+ocfs_vol_disk_hdr; // END CLASS
+
+typedef struct _ocfs_vol_label // CLASS
+{
+ ocfs2_disk_lock disk_lock; // DISKLOCK
+ __u8 label[MAX_VOL_LABEL_LEN]; // CHAR[MAX_VOL_LABEL_LEN]
+ __u16 label_len; // NUMBER RANGE(0,MAX_VOL_LABEL_LEN)
+ __u8 vol_id[MAX_VOL_ID_LENGTH]; // HEX[MAX_VOL_ID_LENGTH]
+ __u16 vol_id_len; // NUMBER RANGE(0,MAX_VOL_ID_LENGTH)
+ __u8 cluster_name[MAX_CLUSTER_NAME_LEN]; // CHAR[MAX_CLUSTER_NAME_LEN]
+ __u16 cluster_name_len; // NUMBER RANGE(0,MAX_CLUSTER_NAME_LEN)
+}
+ocfs_vol_label; // END CLASS
+
+typedef struct _BARF_BARF_BARF
+{
+ char node_name[MAX_NODE_NAME_LENGTH];
+ ocfs_guid guid;
+ ocfs_ipc_config_info ipc_config;
+}
+BARF_BARF_BARF;
+
+typedef struct _ocfs_extent
+{
+ struct list_head list;
+ __s64 virtual;
+ __s64 physical;
+ __s64 sectors;
+}
+ocfs_extent;
+
+typedef struct _ocfs_extent_map
+{
+ spinlock_t lock;
+ __u32 count;
+ int initialized;
+ struct list_head head;
+ __u32 next_idx;
+ struct list_head *next_ptr;
+}
+ocfs_extent_map;
+
+
+typedef struct _ocfs_obj_id
+{
+ __u32 type; /* 4 byte signature to uniquely identify the struct */
+ __u32 size; /* sizeof the struct */
+}
+ocfs_obj_id;
+
+
+typedef struct _ocfs_super ocfs_super;
+typedef struct _ocfs_io_runs ocfs_io_runs;
+typedef struct _ocfs_lock_res ocfs_lock_res;
+
+/* this limits us to 256 nodes
+ * if we need more, we can do a kmalloc for the map */
+#define OCFS_NODE_MAP_MAX_NODES 256
+typedef struct _ocfs_node_map
+{
+ __u32 num_nodes;
+ unsigned long map[BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES)];
+} ocfs_node_map;
+
+
+
+/* XXX: fields that can go if we move this to the inode private */
+struct _ocfs_lock_res
+{
+ __u32 master_node_num; /* Master Node */
+ __u32 lock_state;
+ __u32 lock_holders;
+ __u8 lock_type;
+ atomic_t lr_ref_cnt; /* When 0, freed */ // XXX
+ struct semaphore lock_mutex;
+ __u32 readonly_node;
+ ocfs_node_map readonly_map;
+ ocfs_node_map oin_openmap;
+ __u64 last_upd_seq_num;
+};
+
+struct _ocfs_journal_handle;
+
+/* OCFS2 Inode Private Data */
+typedef struct _ocfs_inode_private
+{
+ /* inode and feoff fields never change and are always safe to
+ * read. */
+ struct inode *inode;
+
+ __u64 feoff;
+
+ /* These fields are protected by priv_sem */
+ struct semaphore priv_sem;
+ __u32 open_hndl_cnt;
+ __u64 chng_seq_num;
+ ocfs_extent_map map;
+ __s64 alloc_size;
+ __u32 oin_flags;
+
+ atomic_t ip_needs_verification;
+
+ /* This protects io on the metadata buffers related to this
+ * inode. We also consider an "abort_trans" an I/O as it will
+ * revert the buffer back to a previous state. */
+ struct rw_semaphore ip_io_sem;
+
+ /* Used by the journalling code to attach an inode to a
+ * handle. These are protected by ip_io_sem in order to lock
+ * out other I/O to the inode until we either commit or
+ * abort. */
+ struct list_head ip_handle_list;
+ struct _ocfs_journal_handle *ip_handle;
+
+ /* inode_extend_sem locks out extends on behalf of other nodes. */
+ struct rw_semaphore inode_extend_sem;
+
+ struct list_head recovery_list; /* protected by recovery_list_sem */
+ __u32 num_extends; /* protected by oin_num_ext_lock */
+
+ atomic_t i_clean_buffer_seq;
+ __u32 flags; /* see below */
+
+ /* stolen right off of ocfs2_dinode */
+ union {
+ __u64 fe_private;
+ __u64 child_dirnode;
+ struct _ip_bitinfo {
+ __u32 used_bits;
+ __u32 total_bits;
+ } ip_bitinfo;
+ } u;
+
+ ocfs_lock_res i_lockres;
+ __u32 i_dir_start_lookup;
+} ocfs_inode_private;
+
+/* Eventually, the 'flags' and 'oin_flags' fields need to be
+ * merged. */
+/* oin_flags flags */
+#define OCFS_OIN_DIRECTORY (0x00000002)
+#define OCFS_OIN_OPEN_FOR_DIRECTIO (0x00000008)
+#define OCFS_OIN_OPEN_FOR_WRITE (0x00000010)
+
+/* 'flags' flags. */
+/* has this inode been deleted, either from this node or from another node. */
+#define OCFS_INODE_DELETED 0x00000001
+/* is this the journal inode? */
+#define OCFS_INODE_JOURNAL 0x00000002
+/* set on init_private, cleared on clear_inode */
+#define OCFS_INODE_INITIALIZED 0x00000004
+/* is this a system file? */
+#define OCFS_INODE_SYSTEM_FILE 0x00000008
+/* are we going to let another node deal with deletion of this inode? */
+#define OCFS_INODE_SKIP_DELETE 0x00000010
+
+#define OCFS_I(i) ((ocfs_inode_private *)(i->u.generic_ip))
+
+#define GET_INODE_CLEAN_SEQ(i) (atomic_t *)(&(OCFS_I(i)->i_clean_buffer_seq))
+
+#define IS_SYSTEM_FILE_INODE(i) ((i) && (OCFS_I(i)->flags & OCFS_INODE_SYSTEM_FILE))
+
+
+#define INODE_DELETED(i) (OCFS_I(i)->flags & OCFS_INODE_DELETED)
+#define SET_INODE_DELETED(i) (OCFS_I(i)->flags |= OCFS_INODE_DELETED)
+#define CLEAR_INODE_DELETED(i) (OCFS_I(i)->flags &= (~OCFS_INODE_DELETED))
+
+#define INODE_JOURNAL(i) (OCFS_I(i)->flags & OCFS_INODE_JOURNAL)
+#define SET_INODE_JOURNAL(i) (OCFS_I(i)->flags |= OCFS_INODE_JOURNAL)
+#define CLEAR_INODE_JOURNAL(i) (OCFS_I(i)->flags &= (~OCFS_INODE_JOURNAL))
+
+#define SET_INODE_FEOFF(i,o) \
+do { \
+ OCFS_I(i)->feoff = o; \
+} while (0)
+
+#define GET_INODE_FEOFF(i) OCFS_I(i)->feoff
+
+
+#define GET_INODE_LOCKRES(i) ({ if (i==NULL) BUG(); (&(OCFS_I(i)->i_lockres)); })
+
+typedef enum _ocfs_vol_state
+{
+ VOLUME_DISABLED,
+ VOLUME_INIT,
+ VOLUME_ENABLED,
+ VOLUME_LOCKED,
+ VOLUME_IN_RECOVERY,
+ VOLUME_MOUNTED,
+ VOLUME_BEING_DISMOUNTED,
+ VOLUME_DISMOUNTED
+}
+ocfs_vol_state;
+
+typedef struct _ocfs_vol_node_map
+{
+ __u64 time;
+ __u32 miss_cnt;
+ atomic_t dismount;
+}
+ocfs_vol_node_map;
+
+struct _ocfs_bitmap_free_head;
+
+typedef struct _ocfs_inode_hash {
+ spinlock_t lock; /* protects the whole hash */
+ int size; /* number of lists in the hash */
+ unsigned int num_ents; /* global number of offsets in there */
+ struct list_head *hash;
+} ocfs_inode_hash;
+
+typedef struct _ocfs_inode_num {
+ enum {
+ INUM_UNBOUND = 0, /* unbound to an inode*/
+ INUM_BOUND /* we have an inode attached. */
+ } i_state;
+ struct list_head i_list;
+ unsigned long i_ino;
+ __u64 i_feoff; /* used only for directory inodes,
+ * points to parent fe of
+ * dirnode. for files i_voteoff == i_feoff,
+ * for root directory this is 0 */
+ struct inode *i_inode; /* may be null! */
+} ocfs_inode_num;
+
+typedef struct _ocfs_commit_task
+{
+ struct completion c_complete;
+ struct task_struct *c_task;
+ struct semaphore c_lock;
+ struct list_head c_list;
+} ocfs_commit_task;
+
+typedef struct _ocfs_dlm_stats
+{
+ atomic_t total;
+ atomic_t okay;
+ atomic_t etimedout;
+ atomic_t efail;
+ atomic_t eagain;
+ atomic_t enoent;
+ atomic_t def;
+}
+ocfs_dlm_stats;
+
+typedef struct _ocfs_alloc_stats
+{
+ atomic_t moves;
+ atomic_t local_data;
+ atomic_t bitmap_data;
+ atomic_t bitmap_meta;
+ atomic_t dir_allocs;
+ atomic_t dir_extends;
+ atomic_t ext_allocs;
+ atomic_t ext_extends;
+} ocfs_alloc_stats;
+
+extern char *system_file_names[];
+
+
+struct _ocfs_journal;
+
+/*
+ * ocfs_super
+ *
+ * A mounted volume is represented using the following structure.
+ */
+struct _ocfs_super
+{
+ ocfs_obj_id obj_id;
+ struct semaphore osb_res; /* resource to protect the ocfs_super */
+ struct list_head osb_next; /* list of ocfs_super(s) */
+ __u32 osb_id; /* id used by the proc interface */
+ struct completion dlm_complete;
+ struct task_struct *dlm_task;
+ ocfs_commit_task *commit;
+ __u32 osb_flags;
+ __s64 file_open_cnt; /* num of open files/dirs. vol cannot be dismounted if > 0 */
+ ocfs_node_map publ_map;
+ struct list_head cache_lock_list;
+ struct super_block *sb;
+ struct inode *root_inode;
+ struct inode *sys_root_inode;
+ struct inode *system_inodes[NUM_SYSTEM_INODES];
+
+ /* new */
+ u64 num_clusters;
+ u64 root_blkno;
+ u64 system_dir_blkno;
+ u64 bitmap_blkno;
+ u64 bitmap_blocks;
+ u64 publish_blkno;
+ u64 publish_blocks;
+ u64 vote_blkno;
+ u64 vote_blocks;
+ u64 autoconfig_blkno;
+ u64 autoconfig_blocks;
+ u64 new_autoconfig_blkno;
+ u64 new_autoconfig_blocks;
+ u64 total_autoconfig_blocks;
+ u8 *uuid;
+ u8 *vol_label;
+
+
+ ocfs_vol_node_map *vol_node_map;
+ struct semaphore cfg_lock;
+ BARF_BARF_BARF **node_cfg_info;
+ __u64 cfg_seq_num;
+ int cfg_initialized;
+ u32 max_nodes;
+ u32 num_cfg_nodes;
+ u32 num_nodes;
+ u32 node_num;
+ int reclaim_id; /* reclaim the original node number*/
+ __u32 hbt;
+ int s_sectsize_bits;
+ int s_clustersize;
+ int s_clustersize_bits;
+ int needs_flush;
+
+ ocfs_alloc_bm cluster_bitmap;
+ ocfs_vol_state vol_state;
+ struct semaphore recovery_lock;
+ spinlock_t recovery_map_lock;
+ ocfs_node_map recovery_map;
+ int disable_recovery;
+ atomic_t num_recovery_threads;
+ struct timer_list lock_timer;
+ atomic_t lock_stop;
+ wait_queue_head_t lock_event;
+ atomic_t lock_event_woken;
+ struct semaphore comm_lock; /* protects ocfs_comm_process_vote_reply */
+ atomic_t nm_init;
+ wait_queue_head_t nm_init_event;
+ __u32 prealloc_lock;
+ struct buffer_head **autoconfig_bhs;
+ struct semaphore publish_lock; /* protects r/w to publish sector */
+ atomic_t node_req_vote; /* set when node's vote req pending */
+ int publish_dirty;
+ struct list_head needs_flush_head;
+ wait_queue_head_t flush_event;
+ atomic_t flush_event_woken;
+ struct _ocfs_journal *journal;
+ atomic_t clean_buffer_seq;
+ spinlock_t clean_buffer_lock;
+ struct list_head *lock_recovery_lists;
+ __u64 *last_publ_seq_num;
+ int have_local_alloc;
+ struct buffer_head *local_alloc_bh;
+ /* Protects local alloc */
+ struct semaphore local_alloc_sem;
+ ocfs_inode_hash inode_hash;
+ struct semaphore extend_sem;
+ __u8 check_mounted; /* tell nm to check mounted flag, protected by publish_lock*/
+ ocfs_dlm_stats net_reqst_stats; /* stats of netdlm vote requests */
+ ocfs_dlm_stats net_reply_stats; /* stats of netdlm vote reponses */
+ ocfs_dlm_stats dsk_reqst_stats; /* stats of diskdlm vote requests */
+ ocfs_dlm_stats dsk_reply_stats; /* stats of diskdlm vote reponses */
+ ocfs_alloc_stats alloc_stats;
+ char dev_str[20]; /* "major,minor" of the device */
+ struct semaphore vote_sem; /* protects calls to ocfs_process_vote */
+ struct list_head vote_obj_queue;
+ spinlock_t vote_obj_queue_lock;
+ unsigned long voting_ino; /* only safe from the process_vote pid */
+};
+
+typedef struct _ocfs_comm_info
+{
+ u16 ip_version; /* IP version in NBO */
+ u16 ip_port; /* IP port in NBO */
+ union {
+ __u32 ip_addr4; /* IPv4 address in NBO */
+ __u32 ip_addr6[4]; /* IPv6 address in NBO */
+ } addr_u;
+} ocfs_comm_info;
+
+typedef struct _ocfs_global_ctxt
+{
+ ocfs_obj_id obj_id;
+ struct semaphore global_res;
+ struct list_head osb_next; /* List of all volumes */
+ kmem_cache_t *inode_cache;
+ kmem_cache_t *extent_cache;
+ __u32 flags;
+ __u32 pref_node_num; /* preferred... osb has the real one */
+ ocfs_guid guid; /* uniquely identifies a node */
+ char *node_name; /* human readable node identification */
+ char *cluster_name; /* unused */
+ ocfs_comm_info comm_info; /* ip address, etc for listener */
+ int comm_info_read; /* ipc info loaded from config file */
+ spinlock_t comm_seq_lock; /* protects comm_seq_num */
+ __u64 comm_seq_num; /* local node seq num used in ipcdlm */
+ ocfs_dlm_stats net_reqst_stats; /* stats of netdlm vote requests */
+ ocfs_dlm_stats net_reply_stats; /* stats of netdlm vote reponses */
+ ocfs_dlm_stats dsk_reqst_stats; /* stats of diskdlm vote requests */
+ ocfs_dlm_stats dsk_reply_stats; /* stats of diskdlm vote reponses */
+}
+ocfs_global_ctxt;
+
+struct _ocfs_io_runs
+{
+ __u64 disk_off;
+ __u32 offset;
+ __u32 byte_cnt;
+};
+
+typedef struct _ocfs_ipc_ctxt
+{
+ __u32 dlm_msg_size;
+ __u16 version;
+ int init;
+ struct socket *send_sock;
+ struct socket *recv_sock;
+ struct completion complete;
+ struct task_struct *task;
+}
+ocfs_ipc_ctxt;
+
+
+extern ocfs_ipc_ctxt OcfsIpcCtxt;
+
+typedef struct _ocfs_ipc_dlm_config
+{
+ __u16 version;
+ __u32 msg_size;
+ __u32 num_recv_threads;
+}
+ocfs_ipc_dlm_config;
+
+/*
+** Globals ...
+*/
+extern ocfs_global_ctxt OcfsGlobalCtxt;
+
+typedef struct _ocfs_publish // CLASS
+{
+ __u64 time; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __s32 vote; // BOOL
+ __u32 dirty; // BOOL
+ __u32 vote_type; // FILEFLAG
+ __u32 mounted; /* used for journaling */
+ __u32 reserved1[8]; // this is now used as the vote_map !!!!! was __u8[32]
+ __u64 vote_map; // NODEBITMAP this is now unused!!!!
+ __u64 publ_seq_num; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 lock_id; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ /* last seq num used in comm voting */
+ __u64 comm_seq_num; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 fe_off; /* needed to create inodes. */
+} ocfs_publish; // END CLASS
+
+typedef struct _ocfs_vote // CLASS
+{
+ __u8 type;
+ __u8 node;
+ __u8 reserved1[30]; // used to be vote[32]
+ __u64 vote_seq_num; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u64 lock_id; // NUMBER RANGE(0,ULONG_LONG_MAX)
+ __u8 open_handle; // BOOL
+ __u8 ov_pad[7]; // UNUSED
+} ocfs_vote; // END CLASS
+
+typedef struct _ocfs_dlm_msg_hdr
+{
+ __u64 lock_id;
+ __u64 lock_seq_num;
+ __u32 flags;
+ __u8 open_handle;
+ __u8 odmh_pad[3];
+} ocfs_dlm_msg_hdr;
+
+typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master;
+typedef ocfs_dlm_msg_hdr ocfs_dlm_disk_vote_req;
+
+typedef struct _ocfs_dlm_reply_master
+{
+ ocfs_dlm_msg_hdr h;
+ __u32 status;
+}
+ocfs_dlm_reply_master;
+
+typedef struct _ocfs_dlm_disk_vote_reply
+{
+ ocfs_dlm_msg_hdr h;
+ __u32 status;
+}
+ocfs_dlm_disk_vote_reply;
+
+typedef struct _ocfs_dlm_msg
+{
+ __u32 magic;
+ __u32 msg_len;
+ __u8 vol_id[MAX_VOL_ID_LENGTH];
+ __u32 src_node;
+ __u32 dst_node;
+ __u32 msg_type;
+ __u32 check_sum;
+ __u8 msg_buf[0];
+} ocfs_dlm_msg;
+
+typedef struct _ocfs_vote_obj
+{
+ struct list_head list;
+ wait_queue_head_t voted_event;
+ atomic_t voted_event_woken;
+ atomic_t refcount;
+ spinlock_t lock;
+ __u32 vote_state;
+ __u32 req_lock_type;
+ __u32 vote_status;
+ ocfs_node_map req_vote_map;
+ ocfs_node_map got_vote_map;
+ ocfs_node_map tmp_openmap;
+ __u64 seq_num;
+ pid_t pid;
+ ocfs_dlm_msg m;
+} ocfs_vote_obj;
+
+enum {
+ VOTE_OBJ_STATE_UNSENT,
+ VOTE_OBJ_STATE_SENT,
+ VOTE_OBJ_STATE_PARTIAL_REPLY,
+ VOTE_OBJ_STATE_FULL_REPLY,
+ VOTE_OBJ_STATE_DESTROYING
+};
+
+
+
+typedef struct _ocfs_vote_obj_lookup_data ocfs_vote_obj_lookup_data;
+
+struct _ocfs_vote_obj_lookup_data
+{
+ union {
+ struct {
+ __u64 seq_num;
+ __u64 lock_id;
+ } s;
+ struct {
+ char *page;
+ int *len;
+ int max;
+ } proc;
+ } u;
+ int (*func) (ocfs_vote_obj *obj, struct _ocfs_vote_obj_lookup_data *data);
+ ocfs_vote_obj **ret;
+};
+
+
+
+
+typedef struct _ocfs_recv_ctxt
+{
+ __s32 msg_len;
+ __u8 msg[OCFS_MAX_DLM_PKT_SIZE];
+ int status;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ struct work_struct ipc_wq;
+#else
+ struct tq_struct ipc_tq;
+#endif
+}
+ocfs_recv_ctxt;
+
+typedef struct _ocfs_cfg_task
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ struct work_struct cfg_wq;
+#else
+ struct tq_struct cfg_tq;
+#endif
+ ocfs_super *osb;
+ __u64 lock_off;
+ __u8 *buffer;
+ struct buffer_head *bh;
+}
+ocfs_cfg_task;
+
+typedef enum _ocfs_volcfg_op
+{
+ OCFS_VOLCFG_ADD,
+ OCFS_VOLCFG_UPD
+}
+ocfs_volcfg_op;
+
+typedef struct _ocfs_vote_request_ctxt
+{
+ int request_method;
+ __u32 node_num;
+ int status;
+ union {
+ ocfs_dlm_msg *dlm_msg;
+ ocfs_publish *publish;
+ } u;
+} ocfs_vote_request_ctxt;
+
+typedef struct _ocfs_vote_reply_ctxt
+{
+ int reply_method;
+ int *status;
+ ocfs_node_map *got_vote_map;
+ ocfs_node_map *open_map;
+ __u32 flags;
+ union {
+ ocfs_dlm_reply_master *reply;
+ ocfs_vote *vote;
+ } u;
+} ocfs_vote_reply_ctxt;
+
+
+/* these three used as 'type' in ocfs_bitmap_update */
+#if 0
+#define DISK_ALLOC_DIR_NODE 1
+#endif
+#define DISK_ALLOC_EXTENT_NODE 2
+#define DISK_ALLOC_VOLUME 3
+#define DISK_ALLOC_INODE 4
+
+/* a bitmap update, currently used for freeing bits */
+typedef struct ocfs_bitmap_update
+{
+ __u64 length;
+ __u64 file_off;
+ __u32 type;
+ __u32 node_num;
+}
+ocfs_bitmap_update;
+
+#define FREE_LOG_SIZE 150
+
+typedef struct _ocfs_free_rec
+{
+ __u32 num_updates;
+ struct list_head log_list;
+ ocfs_bitmap_update update[FREE_LOG_SIZE];
+}
+ocfs_free_rec;
+
+typedef struct _ocfs_bitmap_free_head
+{
+ __u32 num_logs;
+ struct _ocfs_free_rec *tail;
+ struct list_head free_logs;
+}
+ocfs_bitmap_free_head;
+
+
+struct ocfs_ioc
+{
+ char name[255]; /* "OCFS" */
+ char version[255]; /* version */
+ __u16 nodenum; /* node number */
+ char nodename[255]; /* node name */
+};
+
+/* timeout structure taken from Ben's aio.c */
+typedef struct _ocfs_timeout {
+ struct timer_list timer;
+ int timed_out;
+ wait_queue_head_t wait;
+} ocfs_timeout;
+
+#define NAMEI_RA_CHUNKS 2
+#define NAMEI_RA_BLOCKS 4
+#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
+
+
+#define __ocfs_wait(wq, condition, timeo, ret) \
+do { \
+ ocfs_timeout __to; \
+ \
+ DECLARE_WAITQUEUE(__wait, current); \
+ DECLARE_WAITQUEUE(__to_wait, current); \
+ \
+ ocfs_init_timeout(&__to); \
+ \
+ if (timeo) { \
+ ocfs_set_timeout(&__to, timeo); \
+ if (__to.timed_out) { \
+ ocfs_clear_timeout(&__to); \
+ } \
+ } \
+ \
+ add_wait_queue(&wq, &__wait); \
+ add_wait_queue(&__to.wait, &__to_wait); \
+ do { \
+ ret = 0; \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ ret = -ETIMEDOUT; \
+ if (__to.timed_out) \
+ break; \
+ schedule(); \
+ if (signal_pending(current)) { \
+ ret = -EINTR; \
+ break; \
+ } \
+ } while (1); \
+ \
+ set_current_state(TASK_RUNNING); \
+ remove_wait_queue(&wq, &__wait); \
+ remove_wait_queue(&__to.wait, &__to_wait); \
+ \
+ if (timeo) \
+ ocfs_clear_timeout(&__to); \
+ \
+} while(0)
+
+#define ocfs_wait(wq, condition, timeout) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __ocfs_wait(wq, condition, timeout, __ret); \
+ __ret; \
+})
+#endif /* !USERSPACE_TOOL */
+
+
+
+static inline unsigned long ino_from_fe_off(struct inode *inode)
+{
+ __u64 block_off = GET_INODE_FEOFF(inode) >> inode->i_sb->s_blocksize_bits;
+
+ return (unsigned long)(block_off & (__u64)ULONG_MAX);
+}
+
+static inline unsigned long ino_from_off(struct super_block *sb,
+ __u64 off)
+{
+ __u64 block_off = off >> sb->s_blocksize_bits;
+
+ return (unsigned long)(block_off & (__u64)ULONG_MAX);
+}
+
+static inline void ocfs_set_de_type(struct super_block *sb,
+ struct ocfs2_dir_entry *de,
+ umode_t mode)
+{
+ de->file_type = ocfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
+static inline ocfs_bitmap_free_head * ocfs_alloc_bitmap_free_head(void)
+{
+ struct _ocfs_bitmap_free_head *f;
+
+ f = ocfs_malloc(sizeof(ocfs_bitmap_free_head));
+ if (f) {
+ f->num_logs = 0;
+ f->tail = NULL;
+ INIT_LIST_HEAD(&(f->free_logs));
+ }
+ return(f);
+}
+
+static inline void ocfs_free_bitmap_free_head(ocfs_bitmap_free_head *f)
+{
+ ocfs_free_rec *log;
+ struct list_head *p, *n;
+
+ if (f && (f->num_logs)) {
+ list_for_each_safe(p, n, &(f->free_logs)) {
+ log = list_entry(p, ocfs_free_rec, log_list);
+ list_del(&(log->log_list));
+ kfree(log);
+ }
+ kfree(f);
+ }
+}
+
+static inline unsigned int ocfs_clusters_for_bytes(struct super_block *sb,
+ __u64 bytes)
+{
+ int cl_bits = OCFS_SB(sb)->s_clustersize_bits;
+ unsigned int clusters;
+
+ bytes += OCFS_SB(sb)->s_clustersize - 1;
+ /* OCFS2 just cannot have enough clusters to overflow this */
+ clusters = (unsigned int)(bytes >> cl_bits);
+
+ return clusters;
+}
+
+static inline __u64 ocfs_align_bytes_to_clusters(struct super_block *sb,
+ __u64 bytes)
+{
+ int cl_bits = OCFS_SB(sb)->s_clustersize_bits;
+ unsigned int clusters;
+
+ clusters = ocfs_clusters_for_bytes(sb, bytes);
+ return (__u64)clusters << cl_bits;
+}
+
+static inline int ocfs_inc_icount(struct inode *inode)
+{
+ if (atomic_read(&inode->i_count) >= 1 &&
+ !(inode->i_state & I_CLEAR)) {
+ atomic_inc(&inode->i_count);
+ return 0;
+ }
+
+ LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, no dentry\n",
+ atomic_read(&inode->i_count), inode->i_state,
+ inode->i_ino);
+
+ LOG_ERROR_ARGS("feoff=%llu, deleted=%u\n",
+ GET_INODE_FEOFF(inode),
+ INODE_DELETED(inode));
+
+ return -EINVAL;
+}
+
+#define ocfs_check_inode(inode) __ocfs_check_inode(inode, __LINE__)
+
+/* call this right before an atomic_dec or iput. */
+static inline void __ocfs_check_inode(struct inode *inode, unsigned int line)
+{
+ struct dentry *dentry = NULL;
+ struct list_head *iter;
+ int max_cnt = 1; /* one for the inode hash */
+ ocfs_super *osb = NULL;
+
+ if (!inode)
+ return;
+
+ osb = OCFS_SB(inode->i_sb);
+
+ if (INODE_DELETED(inode))
+ max_cnt--;
+
+ list_for_each (iter, &(inode->i_dentry)) {
+ dentry = list_entry (iter, struct dentry, d_alias);
+ // what the heck, take the first one ;-)
+ break;
+ }
+
+ /* add one for the dentrys ref... */
+ if (dentry)
+ max_cnt++;
+
+ /* +1 for the ref you're about to remove. */
+ if (atomic_read(&inode->i_count) >= (max_cnt+1))
+ return;
+
+
+ LOG_ERROR_ARGS("max_cnt = %d, line = %u\n", max_cnt, line);
+ if (dentry)
+ LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, name='%*s'\n",
+ atomic_read(&inode->i_count), inode->i_state, inode->i_ino,
+ dentry->d_name.len, dentry->d_name.name);
+ else
+ LOG_ERROR_ARGS("eek! i_count=%d, i_state=%lu, i_ino=%lu, no dentry\n",
+ atomic_read(&inode->i_count), inode->i_state, inode->i_ino);
+
+ LOG_ERROR_ARGS("feoff=%llu, deleted=%u\n",
+ GET_INODE_FEOFF(inode),
+ INODE_DELETED(inode));
+
+ BUG();
+}
+
+
+static inline int ocfs_is_local_cache_lock(ocfs_super *osb, struct inode *inode)
+{
+ ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+ if (lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK &&
+ lockres->master_node_num == osb->node_num)
+ return 1;
+ return 0;
+}
+
+static inline int ocfs_get_right_shift_bits(int num)
+{
+ int i, ret;
+
+ ret = 0;
+ for (i=0; i<32; i++) {
+ if (1 << i == num) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static inline int ocfs_extent_for_offset(struct super_block *sb,
+ ocfs2_extent_block *eb,
+ u64 vbo)
+{
+ int k;
+ ocfs2_extent_list *el = &eb->h_list;
+ ocfs2_extent_rec *ext;
+ u32 cluster;
+ u64 byte_off;
+
+ for (k = 0; k < el->l_count; k++) {
+ ext = &el->l_recs[k];
+
+ cluster = ext->e_cpos + ext->e_clusters;
+ byte_off = cluster << OCFS_SB(sb)->s_clustersize_bits;
+
+ if (byte_off >= vbo)
+ break;
+ }
+
+ return k;
+}
+
+static inline u64 ocfs_clusters_to_blocks(struct super_block *sb,
+ u32 clusters)
+{
+ int c_to_b_bits = OCFS_SB(sb)->s_clustersize_bits -
+ sb->s_blocksize_bits;
+
+ return (u64)clusters << c_to_b_bits;
+}
+
+static inline u32 ocfs_blocks_to_clusters(struct super_block *sb,
+ u64 blocks)
+{
+ int b_to_c_bits = OCFS_SB(sb)->s_clustersize_bits -
+ sb->s_blocksize_bits;
+
+ return (u32)(blocks >> b_to_c_bits);
+}
+
+static inline int ocfs_blocks_for_bits(struct super_block *sb,
+ unsigned int num_bits)
+{
+ int bits_per_chunk = OCFS_BITS_IN_CHUNK(sb);
+
+ return (num_bits + (bits_per_chunk - 1)) / bits_per_chunk;
+}
+
+static inline void ocfs_bitmap_block_for_off(struct super_block *sb,
+ unsigned int bm_off,
+ unsigned int *block,
+ unsigned int *block_off)
+{
+ if (block)
+ *block = bm_off / OCFS_BITS_IN_CHUNK(sb);
+
+ if (block_off)
+ *block_off = bm_off % OCFS_BITS_IN_CHUNK(sb);
+}
+
+static inline unsigned int ocfs_bitmap_off_for_block(struct super_block *sb,
+ unsigned int block,
+ unsigned int block_off)
+{
+ return ((block * OCFS_BITS_IN_CHUNK(sb)) + block_off);
+}
+
+static inline int ocfs_bitmap_blocks_affected(struct super_block *sb,
+ unsigned int start_bit,
+ unsigned int num_bits,
+ unsigned int *start_blk)
+{
+ unsigned int start_tmp, end_tmp;
+
+ ocfs_bitmap_block_for_off(sb, start_bit, &start_tmp, NULL);
+ if (start_blk)
+ *start_blk = start_tmp;
+
+ if (!num_bits)
+ return 0;
+
+ ocfs_bitmap_block_for_off(sb, start_bit + num_bits - 1,
+ &end_tmp, NULL);
+
+ return (end_tmp - start_tmp + 1);
+}
+
+
+typedef struct _ocfs_journal_handle ocfs_journal_handle;
+
+#endif /* !OCFS_H */
Added: trunk/src/ocfs1_fs_compat.h
===================================================================
--- trunk/src/ocfs1_fs_compat.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs1_fs_compat.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,112 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs1_fs_compat.h
+ *
+ * OCFS1 volume header definitions. OCFS2 creates valid but unmountable
+ * OCFS1 volume headers on the first two sectors of an OCFS2 volume.
+ * This allows an OCFS1 volume to see the partition and cleanly fail to
+ * mount it.
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS1_FS_COMPAT_H
+#define _OCFS1_FS_COMPAT_H
+
+#define MAX_VOL_SIGNATURE_LEN_V1 128
+#define MAX_MOUNT_POINT_LEN_V1 128
+#define MAX_VOL_ID_LENGTH_V1 16
+#define MAX_VOL_LABEL_LEN_V1 64
+#define MAX_CLUSTER_NAME_LEN_V1 64
+
+#define OCFS1_MAJOR_VERSION (2)
+#define OCFS1_MINOR_VERSION (0)
+#define OCFS1_VOLUME_SIGNATURE "OracleCFS"
+
+/*
+ * OCFS1 superblock. Lives at sector 0.
+ */
+typedef struct _ocfs1_vol_disk_hdr
+{
+/*00*/ __u32 minor_version;
+ __u32 major_version;
+/*08*/ __u8 signature[MAX_VOL_SIGNATURE_LEN_V1];
+/*88*/ __u8 mount_point[MAX_MOUNT_POINT_LEN_V1];
+/*108*/ __u64 serial_num;
+/*110*/ __u64 device_size;
+ __u64 start_off;
+/*120*/ __u64 bitmap_off;
+ __u64 publ_off;
+/*130*/ __u64 vote_off;
+ __u64 root_bitmap_off;
+/*140*/ __u64 data_start_off;
+ __u64 root_bitmap_size;
+/*150*/ __u64 root_off;
+ __u64 root_size;
+/*160*/ __u64 cluster_size;
+ __u64 num_nodes;
+/*170*/ __u64 num_clusters;
+ __u64 dir_node_size;
+/*180*/ __u64 file_node_size;
+ __u64 internal_off;
+/*190*/ __u64 node_cfg_off;
+ __u64 node_cfg_size;
+/*1A0*/ __u64 new_cfg_off;
+ __u32 prot_bits;
+ __s32 excl_mount;
+/*1B0*/
+} ocfs1_vol_disk_hdr;
+
+
+typedef struct _ocfs1_disk_lock
+{
+/*00*/ __u32 curr_master;
+ __u8 file_lock;
+ __u8 compat_pad[3]; /* Not in orignal definition. Used to
+ make the already existing alignment
+ explicit */
+ __u64 last_write_time;
+/*10*/ __u64 last_read_time;
+ __u32 writer_node_num;
+ __u32 reader_node_num;
+/*20*/ __u64 oin_node_map;
+ __u64 dlock_seq_num;
+/*30*/
+} ocfs1_disk_lock;
+
+/*
+ * OCFS1 volume label. Lives at sector 1.
+ */
+typedef struct _ocfs1_vol_label
+{
+/*00*/ ocfs1_disk_lock disk_lock;
+/*30*/ __u8 label[MAX_VOL_LABEL_LEN_V1];
+/*70*/ __u16 label_len;
+/*72*/ __u8 vol_id[MAX_VOL_ID_LENGTH_V1];
+/*82*/ __u16 vol_id_len;
+/*84*/ __u8 cluster_name[MAX_CLUSTER_NAME_LEN_V1];
+/*A4*/ __u16 cluster_name_len;
+/*A6*/
+} ocfs1_vol_label;
+
+
+#endif /* _OCFS1_FS_COMPAT_H */
+
Added: trunk/src/ocfs2_fs.h
===================================================================
--- trunk/src/ocfs2_fs.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs2_fs.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,526 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs2_fs.h
+ *
+ * On-disk structures for OCFS2.
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS2_FS_H
+#define _OCFS2_FS_H
+
+/* Version */
+#define OCFS2_MAJOR_REV_LEVEL 2
+#define OCFS2_MINOR_REV_LEVEL 0
+
+/*
+ * An OCFS2 volume starts this way:
+ * Sector 0: Valid ocfs1_vol_disk_hdr that cleanly fails to mount v1.
+ * Sector 1: Valid ocfs1_vol_label that cleanly fails to mount v1.
+ * Block OCFS2_SUPER_BLOCK_BLKNO: OCFS2 superblock.
+ *
+ * All other structures are found from the superblock information.
+ *
+ * OCFS2_SUPER_BLOCK_BLKNO is in blocks, not sectors. eg, for a
+ * blocksize of 2K, it is 4096 bytes into disk.
+ */
+#define OCFS2_SUPER_BLOCK_BLKNO 2
+
+/*
+ * As OCFS2 has a minimum clustersize of 4K, it has a maximum blocksize
+ * of 4K
+ */
+#define OCFS2_MAX_BLOCKSIZE 4096
+
+/* Object signatures */
+#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
+#define OCFS2_FILE_ENTRY_SIGNATURE "INODE01"
+#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
+
+/*
+ * Flags on ocfs2_dinode.i_flags
+ */
+#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */
+#define OCFS2_UNUSED2_FL (0x00000002)
+#define OCFS2_ORPHANED_FL (0x00000004) /* On the orphan list */
+#define OCFS2_UNUSED3_FL (0x00000008)
+/* System inode flags */
+#define OCFS2_SYSTEM_FL (0x00000010) /* System inode */
+#define OCFS2_SUPER_BLOCK_FL (0x00000020) /* Super block */
+#define OCFS2_LOCAL_ALLOC_FL (0x00000040) /* Node local alloc bitmap */
+#define OCFS2_BITMAP_FL (0x00000080) /* Allocation bitmap */
+#define OCFS2_JOURNAL_FL (0x00000100) /* Node journal */
+#define OCFS2_DLM_FL (0x00000200) /* DLM area */
+
+
+/* Limit of space in ocfs2_dir_entry */
+#define OCFS2_MAX_FILENAME_LENGTH 255
+
+/* Limit of node map bits in ocfs2_disk_lock */
+#define OCFS2_MAX_NODES 256
+
+#define MAX_VOL_ID_LENGTH 16
+#define MAX_VOL_LABEL_LEN 64
+#define MAX_CLUSTER_NAME_LEN 64
+
+
+#define ONE_MEGA_BYTE (1 * 1024 * 1024) /* in bytes */
+#define OCFS2_DEFAULT_JOURNAL_SIZE (8 * ONE_MEGA_BYTE)
+
+
+/* System file index */
+enum {
+ GLOBAL_BITMAP_SYSTEM_INODE = 0,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+ GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE,
+ //AUTOCONFIG_SYSTEM_INODE,
+ //PUBLISH_SYSTEM_INODE,
+ //VOTE_SYSTEM_INODE,
+ DLM_SYSTEM_INODE,
+ ORPHAN_DIR_SYSTEM_INODE,
+#define OCFS2_LAST_GLOBAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
+ EXTENT_ALLOC_SYSTEM_INODE,
+ EXTENT_ALLOC_BITMAP_SYSTEM_INODE,
+ INODE_ALLOC_SYSTEM_INODE,
+ INODE_ALLOC_BITMAP_SYSTEM_INODE,
+ JOURNAL_SYSTEM_INODE,
+ LOCAL_ALLOC_SYSTEM_INODE,
+ NUM_SYSTEM_INODES
+};
+
+static char *ocfs2_system_inode_names[NUM_SYSTEM_INODES] = {
+ /* Global system inodes (single copy) */
+ [GLOBAL_BITMAP_SYSTEM_INODE] "global_bitmap",
+ [GLOBAL_INODE_ALLOC_SYSTEM_INODE] "global_inode_alloc",
+ [GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE] "global_inode_alloc_bitmap",
+ //[AUTOCONFIG_SYSTEM_INODE] "autoconfig",
+ //[PUBLISH_SYSTEM_INODE] "publish",
+ //[VOTE_SYSTEM_INODE] "vote",
+ [DLM_SYSTEM_INODE] "dlm",
+ [ORPHAN_DIR_SYSTEM_INODE] "orphan_dir",
+
+ /* Node-specific system inodes (one copy per node) */
+ [EXTENT_ALLOC_SYSTEM_INODE] "extent_alloc:%04d",
+ [EXTENT_ALLOC_BITMAP_SYSTEM_INODE] "extent_alloc_bitmap:%04d",
+ [INODE_ALLOC_SYSTEM_INODE] "inode_alloc:%04d",
+ [INODE_ALLOC_BITMAP_SYSTEM_INODE] "inode_alloc_bitmap:%04d",
+ [JOURNAL_SYSTEM_INODE] "journal:%04d",
+ [LOCAL_ALLOC_SYSTEM_INODE] "local_alloc:%04d"
+};
+
+
+/* Default size for the local alloc bitmap */
+#define OCFS2_LOCAL_BITMAP_DEFAULT_SIZE 256
+
+/*
+ * OCFS2 directory file types. Only the low 3 bits are used. The
+ * other bits are reserved for now.
+ */
+#define OCFS2_FT_UNKNOWN 0
+#define OCFS2_FT_REG_FILE 1
+#define OCFS2_FT_DIR 2
+#define OCFS2_FT_CHRDEV 3
+#define OCFS2_FT_BLKDEV 4
+#define OCFS2_FT_FIFO 5
+#define OCFS2_FT_SOCK 6
+#define OCFS2_FT_SYMLINK 7
+
+#define OCFS2_FT_MAX 8
+
+/*
+ * OCFS2_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define OCFS2_DIR_PAD 4
+#define OCFS2_DIR_ROUND (OCFS2_DIR_PAD - 1)
+#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + 12 + \
+ OCFS2_DIR_ROUND) & \
+ ~OCFS2_DIR_ROUND)
+#define OCFS2_LINK_MAX 32000
+
+#define S_SHIFT 12
+static unsigned char ocfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+ [S_IFREG >> S_SHIFT] OCFS2_FT_REG_FILE,
+ [S_IFDIR >> S_SHIFT] OCFS2_FT_DIR,
+ [S_IFCHR >> S_SHIFT] OCFS2_FT_CHRDEV,
+ [S_IFBLK >> S_SHIFT] OCFS2_FT_BLKDEV,
+ [S_IFIFO >> S_SHIFT] OCFS2_FT_FIFO,
+ [S_IFSOCK >> S_SHIFT] OCFS2_FT_SOCK,
+ [S_IFLNK >> S_SHIFT] OCFS2_FT_SYMLINK,
+};
+
+
+/*
+ * Convenience casts
+ */
+#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
+#define DISK_LOCK(dinode) (&((dinode)->i_disk_lock))
+#define LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
+
+/* TODO: change these? */
+#define OCFS2_NODE_CONFIG_HDR_SIGN "NODECFG"
+#define OCFS2_NODE_CONFIG_SIGN_LEN 8
+#define OCFS2_NODE_CONFIG_VER 2
+#define OCFS2_NODE_MIN_SUPPORTED_VER 2
+
+#define MAX_NODE_NAME_LENGTH 32
+
+#define OCFS2_GUID_HOSTID_LEN 20
+#define OCFS2_GUID_MACID_LEN 12
+#define OCFS2_GUID_LEN (OCFS2_GUID_HOSTID_LEN + OCFS2_GUID_MACID_LEN)
+
+
+
+/*
+ * On disk extent record for OCFS2
+ * It describes a range of clusters on disk.
+ */
+typedef struct _ocfs2_extent_rec {
+/*00*/ __u32 e_cpos; /* Offset into the file, in clusters */
+ __u32 e_clusters; /* Clusters covered by this extent */
+ __u64 e_blkno; /* Physical disk offset, in blocks */
+/*10*/
+} ocfs2_extent_rec;
+
+/*
+ * On disk extent list for OCFS2 (node in the tree). Note that this
+ * is contained inside ocfs2_dinode or ocfs2_extent_block, so the
+ * offsets are relative to ocfs2_dinode.id2.i_list or
+ * ocfs2_extent_block.h_list, respectively.
+ */
+typedef struct _ocfs2_extent_list {
+/*00*/ __s16 l_tree_depth; /* Extent tree depth from this
+ point. -1 means data extents
+ hang directly off this
+ header (a leaf) */
+ __u16 l_count; /* Number of extent records */
+ __u16 l_next_free_rec; /* Next unused extent slot */
+ __u16 l_reserved1;
+ __u64 l_reserved2; /* Pad to
+ sizeof(ocfs2_extent_rec) */
+/*10*/ ocfs2_extent_rec l_recs[0]; /* Extent records */
+} ocfs2_extent_list;
+
+/*
+ * On disk extent block (indirect block) for OCFS2
+ */
+typedef struct _ocfs2_extent_block
+{
+/*00*/ __u8 h_signature[8]; /* Signature for verification */
+ __u64 h_suballoc_blkno; /* Node suballocator offset,
+ in blocks */
+/*10*/ __u16 h_suballoc_node; /* Node suballocator this
+ extent_header belongs to */
+ __u16 h_reserved1;
+ __u32 h_reserved2;
+ __u64 h_blkno; /* Offset on disk, in blocks */
+/*20*/ __u64 h_parent_blk; /* Offset on disk, in blocks,
+ of this block's parent in the
+ tree */
+ __u64 h_next_leaf_blk; /* Offset on disk, in blocks,
+ of next leaf header pointing
+ to data */
+/*30*/ ocfs2_extent_list h_list; /* Extent record list */
+/* Actual on-disk size is one block */
+} ocfs2_extent_block;
+
+/*
+ * On disk lock structure for OCFS2
+ */
+typedef struct _ocfs2_disk_lock
+{
+/*00*/ __u32 dl_master; /* Node number of current master */
+ __u8 dl_level; /* Lock level */
+ __u8 dl_reserved1[3]; /* Pad to u64 */
+ __u64 dl_seq_num; /* Lock transaction seqnum */
+/*10*/ __u32 dl_node_map[8]; /* Bitmap of interested nodes,
+ was __u32 */
+/*30*/
+} ocfs2_disk_lock;
+
+/*
+ * On disk superblock for OCFS2
+ * Note that it is contained inside an ocfs2_dinode, so all offsets
+ * are relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_super_block {
+/*00*/ __u16 s_major_rev_level;
+ __u16 s_minor_rev_level;
+ __u16 s_mnt_count;
+ __s16 s_max_mnt_count;
+ __u16 s_state; /* File system state */
+ __u16 s_errors; /* Behaviour when detecting errors */
+ __u32 s_checkinterval; /* Max time between checks */
+/*10*/ __u64 s_lastcheck; /* Time of last check */
+ __u32 s_creator_os; /* OS */
+ __u32 s_feature_compat; /* Compatible feature set */
+/*20*/ __u32 s_feature_incompat; /* Incompatible feature set */
+ __u32 s_feature_ro_compat; /* Readonly-compatible feature set */
+ __u64 s_root_blkno; /* Offset, in blocks, of root directory
+ dinode */
+/*30*/ __u64 s_system_dir_blkno; /* Offset, in blocks, of system
+ directory dinode */
+ __u32 s_blocksize_bits; /* Blocksize for this fs */
+ __u32 s_clustersize_bits; /* Clustersize for this fs */
+/*40*/ __u32 s_max_nodes; /* Max nodes in this cluster before
+ tunefs required */
+ __u32 s_reserved1;
+ __u64 s_reserved2;
+/*50*/ __u8 s_label[64]; /* Label for mounting, etc. */
+/*90*/ __u8 s_uuid[16]; /* Was vol_id */
+/*A0*/
+} ocfs2_super_block;
+
+/*
+ * Local allocation bitmap for OCFS2 nodes
+ * Node that it exists inside an ocfs2_dinode, so all offsets are
+ * relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_local_alloc
+{
+/*00*/ __u32 la_bm_off; /* Starting bit offset in main bitmap */
+ /* Do we want to use id1.bitmap1? */
+ __u16 la_bm_bits; /* Number of bits from main bitmap */
+ __u16 la_bits_set; /* Number of set bits */
+ __u16 la_size; /* Size of included bitmap, in bytes */
+ __u16 la_reserved1;
+ __u32 la_reserved2;
+/*10*/ __u8 la_bitmap[0];
+} ocfs2_local_alloc;
+
+/*
+ * On disk inode for OCFS2
+ */
+typedef struct _ocfs2_dinode {
+/*00*/ __u8 i_signature[8]; /* Signature for validation */
+ __u32 i_generation; /* Generation number */
+ __u16 i_reserved1;
+ __u16 i_suballoc_node; /* Node suballocater this inode
+ belongs to */
+/*10*/ __u64 i_suballoc_blkno; /* Node suballocator offset,
+ in blocks */
+/*18*/ ocfs2_disk_lock i_disk_lock; /* Lock structure */
+/*48*/ __u32 i_uid; /* Owner UID */
+ __u32 i_gid; /* Owning GID */
+/*50*/ __u64 i_size; /* Size in bytes */
+ __u16 i_mode; /* File mode */
+ __u16 i_links_count; /* Links count */
+ __u32 i_flags; /* File flags */
+/*60*/ __u64 i_atime; /* Access time */
+ __u64 i_ctime; /* Creation time */
+/*70*/ __u64 i_mtime; /* Modification time */
+ __u64 i_dtime; /* Deletion time */
+/*80*/ __u64 i_blkno; /* Offset on disk, in blocks */
+ __u32 i_clusters; /* Cluster count */
+ __u32 i_reserved2;
+/*90*/ __u64 i_last_eb_blk; /* Pointer to last extent
+ block */
+ __u64 i_reserved3;
+/*A0*/ __u64 i_reserved4;
+ __u64 i_reserved5;
+/*B0*/ __u64 i_reserved6;
+ union {
+ __u64 i_pad1; /* Generic way to refer to this 64bit
+ union */
+ struct {
+ __u64 i_rdev; /* Device number */
+ } dev1;
+ struct { /* Info for bitmap system inodes */
+ __u32 i_used; /* Bits (ie, clusters) used */
+ __u32 i_total; /* Total bits (clusters) available */
+ } bitmap1;
+ } id1; /* Inode type dependant 1 */
+/*C0*/ union {
+ ocfs2_super_block i_super;
+ ocfs2_local_alloc i_lab;
+ ocfs2_extent_list i_list;
+ } id2;
+/* Actual on-disk size is one block */
+} ocfs2_dinode;
+
+/*
+ * On-disk directory entry structure for OCFS2
+ */
+struct ocfs2_dir_entry {
+/*00*/ __u64 inode; /* Inode number */
+ __u16 rec_len; /* Directory entry length */
+ __u8 name_len; /* Name length */
+ __u8 file_type;
+/*0C*/ char name[OCFS2_MAX_FILENAME_LENGTH]; /* File name */
+/* Actual on-disk length specified by rec_len */
+};
+
+/*
+ * On-disk IPC configuration for an OCFS2 node.
+ */
+typedef struct _ocfs_ipc_config_info
+{
+/*00*/ __u16 ip_version; /* IP version in NBO */
+ __u16 ip_port; /* IP port in NBO */
+ __u32 ip_reserved1;
+ __u64 ip_reserved2;
+/*10*/ union {
+ __u32 ip_addr4; /* IPv4 address in NBO */
+ __u32 ip_addr6[4]; /* IPv6 address in NBO */
+ } addr_u;
+/*20*/
+} ocfs_ipc_config_info;
+
+/*
+ * On-disk structure representing a Global Unique ID for an OCFS2 node.
+ *
+ * The GUID has two parts. The host_id is a generally-randomly-unique
+ * hex-as-ascii string of 20 characters (10 bytes). The mad_id field
+ * is, unsurprisingly, the MAC address of the network card that the
+ * IPC mechanism will be using (the address in
+ * ocfs_ipc_config_info.addr_u). This should (ha-ha) provide a unique
+ * identifier for a node in the OCFS2 cluster. It has the added
+ * benefit of detecting when a node has changed network cards
+ * (host_id is the same, mac_id has changed) or when an identical
+ * mac address is on a different mode (the converse).
+ */
+typedef union _ocfs_guid
+{
+/*00*/ struct
+ {
+ char host_id[OCFS2_GUID_HOSTID_LEN];
+ char mac_id[OCFS2_GUID_MACID_LEN];
+ } id;
+ __u8 guid[OCFS2_GUID_LEN];
+/*20*/
+} ocfs_guid;
+
+/*
+ * On-disk configuration information for an OCFS2 node. A node
+ * populates its own info for other nodes to read and use.
+ */
+typedef struct _ocfs_node_config_info
+{
+/*00*/ ocfs2_disk_lock disk_lock; /* Lock on the info */
+/*30*/ ocfs_guid guid; /* GUID */
+/*50*/ ocfs_ipc_config_info ipc_config; /* IPC info */
+/*70*/ __u8 node_name[MAX_NODE_NAME_LENGTH+1]; /* Name */
+/*91*/ __u8 name_pad[7]; /* Pad to align (UGH) */
+/*98*/
+} ocfs_node_config_info;
+
+/*
+ * On-disk ... for OCFS2. FIXME this description.
+ */
+typedef struct _ocfs_node_config_hdr
+{
+/*00*/ ocfs2_disk_lock disk_lock;
+/*30*/ __u8 signature[OCFS2_NODE_CONFIG_SIGN_LEN];
+ __u32 version;
+ __u32 num_nodes;
+/*40*/ __u32 last_node;
+ __u32 onch_pad;
+ __u64 cfg_seq_num;
+/*50*/
+} ocfs_node_config_hdr;
+
+
+#ifdef __KERNEL__
+static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(struct super_block *sb)
+{
+ /*
+ * Perhaps change one day when we want to be dynamic
+ * based on sb->s_blocksize.
+ */
+ return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#else
+static inline int ocfs2_extent_recs_per_inode(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(int blocksize)
+{
+ return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#endif /* __KERNEL__ */
+
+
+static inline int ocfs2_system_inode_is_global(int type)
+{
+ return ((type >= 0) &&
+ (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE));
+}
+
+static inline int ocfs2_sprintf_system_inode_name(char *buf, int len,
+ int type, int node)
+{
+ int chars;
+
+ /*
+ * Global system inodes can only have one copy. Everything
+ * after OCFS_LAST_GLOBAL_SYSTEM_INODE in the system inode
+ * list has a copy per node.
+ */
+ if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE)
+ chars = snprintf(buf, len,
+ ocfs2_system_inode_names[type]);
+ else
+ chars = snprintf(buf, len,
+ ocfs2_system_inode_names[type], node);
+
+ return chars;
+}
+
+#endif /* _OCFS2_FS_H */
Added: trunk/src/ocfs_buffer_head.h
===================================================================
--- trunk/src/ocfs_buffer_head.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_buffer_head.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,237 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs_buffer_head.h
+ *
+ * Buffer cache handling functions defined
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh
+ */
+
+#ifndef OCFSIO_H
+#define OCFSIO_H
+
+#include <linux/compiler.h>
+
+/* UGLY place to put it, but it is for the best */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+# include <linux/locks.h>
+#else
+# include <linux/buffer_head.h>
+#endif
+
+/* Yosh made me do it. */
+static inline int ocfs_write_bh (ocfs_super *osb,
+ struct buffer_head *bh,
+ int flags,
+ struct inode *inode);
+static inline int ocfs_read_bh (ocfs_super *osb,
+ __u64 off,
+ struct buffer_head **bh,
+ int flags,
+ struct inode *inode);
+
+int ocfs_write_bhs (ocfs_super *osb,
+ struct buffer_head *bh[],
+ int nr,
+ int flags,
+ struct inode *inode);
+int ocfs_read_bhs (ocfs_super *osb,
+ __u64 off,
+ __u64 len,
+ struct buffer_head *bhs[],
+ int flags,
+ struct inode *inode);
+
+
+#define OCFS_BH_CACHED 1
+#define OCFS_BH_COND_CACHED 2
+#define OCFS_BH_CONCURRENT_WRITE 4 /* This should only be used by ocfs_worker */
+#define OCFS_BH_IGNORE_JBD 8 /* This should only be used by ocfs_checkpoint_handle! */
+#define OCFS_BH_READAHEAD 16 /* use this to pass READA down to submit_bh */
+
+
+#define BH_Modified 18
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+/*
+ * Copied right out of the 2.6.2 kernel's buffer_head.h:
+ * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
+ * and buffer_foo() functions.
+ */
+#define BUFFER_FNS(bit, name) \
+static inline void set_buffer_##name(struct buffer_head *bh) \
+{ \
+ set_bit(BH_##bit, &(bh)->b_state); \
+} \
+static inline void clear_buffer_##name(struct buffer_head *bh) \
+{ \
+ clear_bit(BH_##bit, &(bh)->b_state); \
+} \
+static inline int buffer_##name(struct buffer_head *bh) \
+{ \
+ return test_bit(BH_##bit, &(bh)->b_state); \
+}
+
+#undef buffer_uptodate
+#undef buffer_dirty
+BUFFER_FNS(Uptodate, uptodate)
+BUFFER_FNS(Dirty, dirty)
+
+#endif /* 2.4.x kernel */
+
+#define OCFS_BH_GET_DATA(bh) ((void *) ((bh)->b_data))
+#define OCFS_BH_GET_DATA_READ(bh) OCFS_BH_GET_DATA(bh)
+#define OCFS_BH_GET_DATA_WRITE(bh) OCFS_BH_GET_DATA(bh)
+#define OCFS_BH_GET_DATA_WRITE_TRYLOCK(bh) OCFS_BH_GET_DATA(bh)
+#define OCFS_BH_PUT_DATA(bh) do { } while(0)
+
+#define STATE_BH_BITS 8
+#define USED_BH_BITS 23 /* Number of BH bits used up through JBD */
+
+#define STATE_BIT_MAX (1 << STATE_BH_BITS)
+#define STATE_BIT_MAX_MASK ((1 << STATE_BH_BITS)-1)
+#define STATE_BIT_MASK ((~0UL) << USED_BH_BITS)
+
+
+static inline void CLEAR_BH_SEQNUM(struct buffer_head *bh)
+{
+ unsigned int prev = bh->b_state & STATE_BIT_MASK;
+ bh->b_state &= ~prev;
+}
+
+static inline void SET_BH_SEQNUM(struct inode *inode, struct buffer_head *bh)
+{
+ unsigned int seq = (atomic_read(GET_INODE_CLEAN_SEQ(inode)) &
+ STATE_BIT_MAX_MASK) << USED_BH_BITS;
+
+ CLEAR_BH_SEQNUM(bh);
+ bh->b_state |= seq;
+}
+
+static inline int TEST_BH_SEQNUM(struct inode *inode, struct buffer_head *bh)
+{
+ int ret;
+ unsigned int seq =
+ (bh->b_state & STATE_BIT_MASK) >> USED_BH_BITS;
+
+ ret = (seq == atomic_read(GET_INODE_CLEAN_SEQ(inode)));
+ return ret;
+}
+
+static inline int check_block_zero_write(struct buffer_head *bh)
+{
+ if (unlikely(bh->b_blocknr == 0)) {
+ ocfs_vol_disk_hdr *hdr = OCFS_BH_GET_DATA_READ(bh);
+
+ if (hdr == NULL) {
+ printk ("ocfs2: failed to map bh page!!!\n");
+ return -EIO;
+ }
+
+ if (memcmp(hdr->signature,
+ OCFS_VOLUME_SIGNATURE,
+ strlen(OCFS_VOLUME_SIGNATURE)) != 0) {
+ printk("ocfs2: WARNING! attempting to write non "
+ "volume header to block 0\n");
+ OCFS_BH_PUT_DATA(bh);
+ return -EIO;
+ }
+
+ OCFS_BH_PUT_DATA(bh);
+ }
+
+ return 0;
+}
+
+#define OCFS_PRINT_FE_SIZE(bh) \
+do { \
+ if (bh) { \
+ ocfs2_dinode *__fe = bh->b_data; \
+ if (IS_VALID_FILE_ENTRY(__fe)) { \
+ printk("ocfs2: %s:%d fe_check: file_size = (%llu), " \
+ "this_sector = (%llu)\n", \
+ __FUNCTION__, __LINE__, \
+ __fe->file_size, \
+ __fe->this_sector); \
+ } \
+ } \
+} while(0)
+
+/* check the 1st five bytes. */
+static inline int ocfs_poison(struct buffer_head *bh)
+{
+ int i;
+ for(i = 0; i < 512; i++)
+ if ((bh)->b_data[i] != 'P')
+ return(0);
+ return(1);
+}
+
+#define OCFS_DO_HEX_DUMP(bh) \
+do { \
+ printk("bh->b_blocknr = %lu, bh->b_data:\n", bh->b_blocknr); \
+ for(i = 0; i < 512; i++) { \
+ printk("%03x ", bh->b_data[i]); \
+ if ( ((i+1) % 16) == 0 ) \
+ printk("\n"); \
+ } \
+ printk("\n"); \
+} while (0)
+
+static inline int ocfs_write_bh (ocfs_super * osb, struct buffer_head *bh,
+ int flags, struct inode *inode)
+{
+ int status;
+
+ IO_FUNC_TIMING_DECL
+
+ status = ocfs_write_bhs (osb, &bh, 1, flags, inode);
+
+ IO_FUNC_TIMING_PRINT("ocfs_write_bh", status);
+
+ return status;
+}
+
+static inline int ocfs_read_bh (ocfs_super * osb, __u64 off,
+ struct buffer_head **bh, int flags,
+ struct inode *inode)
+{
+ int status = 0;
+
+ IO_FUNC_TIMING_DECL
+
+ if (bh == NULL) {
+ printk("ocfs2: bh == NULL\n");
+ status = -EINVAL;
+ goto bail;
+ }
+
+ status = ocfs_read_bhs(osb, off, osb->sb->s_blocksize, bh,
+ flags, inode);
+
+bail:
+ IO_FUNC_TIMING_PRINT("ocfs_read_bh", status);
+
+ return status;
+}
+
+#endif /* OCFSIO_H */
Added: trunk/src/ocfs_compat.h
===================================================================
--- trunk/src/ocfs_compat.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_compat.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,116 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs_compat.h
+ *
+ * Compatibility stuff for 2.4
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version
+ * 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS_COMPAT_H
+#define OCFS_COMPAT_H
+
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+#ifdef __ia64__
+extern inline void prefetch(const void *x);
+extern inline void prefetchw(const void *x);
+#else
+static inline void prefetch(const void *x);
+static inline void prefetchw(const void *x);
+#endif
+extern inline int generic_fls(int x);
+extern inline int get_bitmask_order(unsigned int count);
+/* XXX Hack to avoid warning */
+struct mem_dqinfo;
+extern inline void mark_info_dirty(struct mem_dqinfo *info);
+
+#include <linux/sched.h>
+
+#define OCFS_GENERIC_SB_MEMBER(sb) ((sb)->u.generic_sbp)
+
+#define ocfs_get_seconds(t) (t)
+
+#define wait_on_page_locked(p) wait_on_page(p)
+#define PageUptodate(p) Page_Uptodate(p)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
+#define unlock_page(p) UnlockPage(p)
+
+static inline struct buffer_head * sb_getblk(struct super_block *sb, int block)
+{
+ return getblk(sb->s_dev, block, sb->s_blocksize);
+}
+#endif
+
+#define io_schedule schedule
+#define io_schedule_timeout schedule_timeout
+#define bdev_hardsect_size(b) get_hardsect_size((b)->bd_dev);
+
+#ifdef HAVE_NPTL
+static inline void dequeue_signal_lock(struct task_struct *task,
+ sigset_t *blocked, siginfo_t *info)
+{
+ spin_lock_irq(&task->sighand->siglock);
+ dequeue_signal(blocked, info);
+ spin_unlock_irq(&task->sighand->siglock);
+}
+#else
+static inline void dequeue_signal_lock(struct task_struct *task,
+ sigset_t *blocked, siginfo_t *info)
+{
+ spin_lock_irq(&task->sigmask_lock);
+ dequeue_signal(blocked, info);
+ spin_unlock_irq(&task->sigmask_lock);
+}
+#endif
+
+static inline dev_t huge_decode_dev(u64 val)
+{
+ u16 val16 = (u16)(val & 0xFFFFULL);
+ return MKDEV((val16 >> 8) & 0xFF, val16 & 0xFF);
+}
+
+static inline u64 huge_encode_dev(dev_t dev)
+{
+ return (u64)((MAJOR(dev) << 8) | MINOR(dev));
+}
+
+#else /* LINUX_VERSION_CODE < 2.6 */
+
+#define OCFS_GENERIC_SB_MEMBER(sb) ((sb)->s_fs_info)
+
+#define ocfs_get_seconds(t) ((t).tv_sec)
+
+#endif /* LINUX_VERSION_CODE < 2.6 */
+
+#ifndef BITS_TO_LONGS
+#define BITS_TO_LONGS(bits) \
+ (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#endif
+
+#endif /* OCFS_COMPAT_H */
+
Added: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_journal.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,391 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * journal.h
+ *
+ * Defines journalling api and structures.
+ *
+ * Copyright (C) 2003, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh
+ */
+
+#ifndef _OCFSJOURNAL_H_
+#define _OCFSJOURNAL_H_
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+
+#define OCFS_JOURNAL_CURRENT_VERSION 1
+#define OCFS_DEFAULT_COMMIT_INTERVAL (HZ * 5)
+#define OCFS_JOURNAL_DEFAULT_SIZE (8 * ONE_MEGA_BYTE)
+
+#undef OCFS_JOURNAL_USE_CB
+
+enum ocfs_journal_state {
+ OCFS_JOURNAL_FREE = 0,
+ OCFS_JOURNAL_LOADED,
+ OCFS_JOURNAL_IN_SHUTDOWN,
+};
+
+struct _ocfs_super;
+struct _ocfs2_dinode;
+struct _ocfs_journal_handle;
+
+typedef struct _ocfs_journal ocfs_journal;
+struct _ocfs_journal {
+ enum ocfs_journal_state state; /* Journals current state */
+
+ journal_t *k_journal; /* The kernels journal type */
+ struct inode *k_inode; /* Kernel inode pointing to
+ * this journal */
+ __u8 version; /* Journal version */
+ __u64 lock_id; /* Lock id for journal file */
+ struct _ocfs_super *osb; /* pointer to the super
+ * block for the node
+ * we're currently
+ * running on -- not
+ * necessarily the super
+ * block from the node
+ * which we usually run
+ * from (recovery,
+ * etc) */
+ __u32 node_num; /* Whose journal are we? */
+ struct buffer_head *lockbh; /* Journal disk lock, used
+ to access file entry */
+ atomic_t num_trans; /* Number of transactions
+ * currently in the system. */
+ /* locking order: trans_lock -> commit_sem -> journal.curr.list_lock */
+ struct semaphore commit_sem; /* protects *everything*
+ * in the commited list
+ * and also protects
+ * 'curr' from
+ * removal/creation. */
+ struct list_head commited; /* doubly linked list of all
+ * commited handles awaiting
+ * checkpointing. */
+ struct rw_semaphore trans_barrier;
+};
+
+typedef struct _ocfs_journal_lock ocfs_journal_lock;
+struct _ocfs_journal_lock {
+ __u32 type;
+ __u32 flags;
+ struct buffer_head *bh;
+ struct inode *inode;
+ int req_io_sem;
+ struct list_head lock_list;
+};
+
+typedef struct _ocfs_journal_copyout ocfs_journal_copyout;
+struct _ocfs_journal_copyout {
+ __u8 forget; /* should we journal_forget this
+ * buffer instead? */
+ unsigned long blocknr; /* what block is this for? */
+ char *data; /* the actual data */
+};
+
+struct _ocfs_journal_handle {
+ handle_t *k_handle; /* kernel handle. */
+ ocfs_journal *journal;
+ struct _ocfs_super *osb; /* what super block we belong to */
+ __u32 flags; /* see flags below. */
+ struct list_head h_list; /* points to whatever list we're
+ * on. */
+ int max_buffs; /* Buffs reserved by this handle */
+
+ /* We know how many buffers (max) we'll have for this
+ * transaction so we can just allocate an array of pointers at
+ * the same time as the creation of this handle. */
+ int num_buffs;
+ struct buffer_head **buffs;
+
+ /* The following three fields are for ocfs_handle_add_lock */
+ spinlock_t list_lock; /* Used to protect the 'locks'
+ * list. Only used if the
+ * handle is the same as
+ * journal->curr. otherwise, we
+ * should be in the commited
+ * list in which case we're
+ * protected by commit_sem */
+ int num_locks;
+ struct list_head locks; /* A bunch of locks to
+ * release on commit/abort. This
+ * should be a list_head */
+
+ int num_co;
+ ocfs_journal_copyout *co_buffs; /* Copy-out buffers. On 1st
+ * journal_access of a buffer
+ * we make a copy of it into
+ * one of these. That way if we
+ * abort we can place the
+ * original copy back into the
+ * buffer. */
+
+ struct _ocfs_bitmap_free_head *commit_bits; /* bits to be
+ * freed ONLY if
+ * we commit the
+ * handle. */
+ __u64 new_file_lockid; /* offset for the
+ * most recently
+ * created file
+ * sitting on this
+ * journal handle */
+ struct list_head inode_list;
+};
+
+/* should we checkpoint this handle on commit? */
+#define OCFS_HANDLE_CHECKPOINT 1
+/* should we sync-commit this handle? */
+#define OCFS_HANDLE_SYNC 2
+/* was local alloc used (should we release the sem?) */
+#define OCFS_HANDLE_LOCAL_ALLOC 4
+
+static inline void ocfs_handle_free_all_copyout(ocfs_journal_handle *handle)
+{
+ while (handle->num_co) {
+ handle->num_co--;
+ if (handle->co_buffs[handle->num_co].data)
+ kfree(handle->co_buffs[handle->num_co].data);
+ }
+ kfree(handle->co_buffs);
+ handle->co_buffs = NULL;
+}
+
+
+static inline void ocfs_handle_set_checkpoint(ocfs_journal_handle *handle,
+ int checkpoint)
+{
+ if (checkpoint)
+ handle->flags |= OCFS_HANDLE_CHECKPOINT;
+ else
+ handle->flags &= ~OCFS_HANDLE_CHECKPOINT;
+}
+
+static inline void ocfs_handle_set_sync(ocfs_journal_handle *handle, int sync)
+{
+ if (sync)
+ handle->flags |= OCFS_HANDLE_SYNC;
+ else
+ handle->flags &= ~OCFS_HANDLE_SYNC;
+}
+
+static inline int ocfs_handle_add_commit_bits(ocfs_journal_handle *handle,
+ __u32 len, __u32 fileoff,
+ __u32 nodenum, __u32 type)
+{
+ int ret = 0;
+ if (!handle->commit_bits)
+ handle->commit_bits = ocfs_alloc_bitmap_free_head();
+
+ if (!handle->commit_bits)
+ ret = -ENOMEM;
+ else
+ ret = ocfs_add_to_bitmap_free_head(handle->osb,
+ handle->commit_bits,
+ len, fileoff,
+ nodenum, type);
+ return ret;
+}
+
+
+/*
+ * Journal Control:
+ * Initialize, Load, Shutdown, Wipe, Create a journal.
+ *
+ * ocfs_journal_init - Initialize journal structures in the OSB.
+ * ocfs_journal_load - Load the given journal off disk. Replay it if
+ * there's transactions still in there.
+ * ocfs_journal_shutdown - Shutdown a journal, this will flush all
+ * uncommited, uncheckpointed transactions.
+ * ocfs_journal_wipe - Wipe transactions from a journal. Optionally
+ * zero out each block.
+ * ocfs_recovery_thread - Perform recovery on a node. osb is our own osb.
+ */
+int ocfs_journal_init(struct _ocfs_super *osb);
+void ocfs_journal_shutdown(struct _ocfs_super *osb);
+int ocfs_journal_wipe(ocfs_journal *journal, int full);
+int ocfs_journal_load(ocfs_journal *journal);
+void ocfs_recovery_thread(struct _ocfs_super *osb, int node_num);
+int ocfs_journal_new_file_search(struct _ocfs_super *osb, __u64 lockid);
+
+/*
+ * Transaction Handling:
+ * Manage the lifetime of a transaction handle.
+ *
+ * ocfs_start_trans - Begin a transaction. Give it an upper estimate of
+ * the number of blocks that will be changed during
+ * this handle.
+ * ocfs_commit_trans - Complete a handle.
+ * ocfs_abort_trans - Abort a running handle.
+ * ocfs_journal_access - Notify the handle that we want to journal this
+ * buffer. Will have to call ocfs_journal_dirty once
+ * we've actually dirtied it. Type is one of . or .
+ * ocfs_journal_dirty - Mark a journalled buffer as having dirty data.
+ * ocfs_handle_add_lock - Sometimes we need to delay lock release
+ * until after a transaction has been completed. Use
+ * ocfs_handle_add_lock to indicate that a lock needs
+ * to be released at the end of that handle. Locks
+ * will be released in the order that they are added.
+ * ocfs_handle_add_inode - Add a locked inode to a transaction.
+ */
+/* You must always start_trans with a number of buffs > 0, but it's
+ * perfectly legal to go through an entire transaction without having
+ * dirtied any buffers. */
+ocfs_journal_handle *ocfs_start_trans(struct _ocfs_super *osb, int max_buffs);
+void ocfs_commit_trans(ocfs_journal_handle *handle);
+void ocfs_abort_trans(ocfs_journal_handle *handle);
+/*
+ * Create access is for when we get a newly created buffer and we're
+ * not gonna read it off disk, but rather fill it ourselves. If it's
+ * not part of an already commiting transaction, we'll mark it to be
+ * passed to journal_forget in case of abort. Otherwise, it's treated
+ * just like a OCFS_JOURNAL_ACCESS_WRITE buffer.
+ *
+ * Write access is for when we read a block off disk and are going to
+ * modify it. This way the journalling layer knows it may need to make
+ * a copy of that block (if it's part of another, uncommited
+ * transaction) before we do so.
+ */
+#define OCFS_JOURNAL_ACCESS_CREATE 0
+#define OCFS_JOURNAL_ACCESS_WRITE 1
+#define OCFS_JOURNAL_ACCESS_UNDO 2
+int ocfs_journal_access(ocfs_journal_handle *handle,
+ struct buffer_head *bh, int type);
+/*
+ * A word about the journal_access/journal_dirty "dance". It is
+ * entirely legal to journal_access a buffer more than once (as long
+ * as the access type is the same -- I'm not sure what will happen if
+ * access type is different but this should never happen anyway) It is
+ * also legal to journal_dirty a buffer more than once. In fact, you
+ * can even journal_access a buffer after you've done a
+ * journal_access/journal_dirty pair. The only thing you cannot do
+ * however, is journal_dirty a buffer which you haven't yet passed to
+ * journal_access at least once.
+ *
+ * That said, 99% of the time this doesn't matter and this is what the
+ * path looks like:
+ *
+ * <read a bh>
+ * ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ * <modify the bh>
+ * ocfs_journal_dirty(handle, bh);
+ */
+int ocfs_journal_dirty(ocfs_journal_handle *handle,
+ struct buffer_head *bh);
+void ocfs_handle_add_lock(ocfs_journal_handle *handle,
+ __u32 type, __u32 flags,
+ struct buffer_head *bh,
+ struct inode *inode,
+ int req_io_sem);
+/*
+ * Some transactions require us to leave inodes in a locked state
+ * until we either commit or abort because the buffer state can change
+ * in abort_trans. Use this function to lock those inodes and put them
+ * on the handle where they'll be cleaned up after the transaction
+ * completes.
+ */
+void ocfs_handle_add_inode(ocfs_journal_handle *handle,
+ struct inode *inode);
+
+/*
+ * Credit Macros:
+ * Convenience macros to calculate number of credits needed.
+ *
+ * For convenience sake, I have a set of macros here which calculate
+ * the *maximum* number of sectors which will be changed for various
+ * metadata updates. I also have a completely arbitrary 'fuzz' value
+ * which I'll add to some of these in case of a miscalculation. The
+ * journal is large enough, and we don't journal as much metadata
+ * that I'm not worried about the 'fuzz' taking up potential credits.
+ */
+#define OCFS_JOURNAL_FUZZ_CREDITS (5)
+#define OCFS_SINGLE_FILE_EXTEND_CREDITS (10)
+
+/* locknode + new fe + dirnode head + new dirnode for parent directory
+ * + extending (diralloc, filealloc, dirallocbitmap, fileallocbitmap)
+ * + a second dirnode for handling mkdir + 2 * 2 = 4 blocks for
+ * setting the bits in the dir alloc bitmap and the metadata alloc
+ * bitmap if dir alloc needs to be extended + some fuzz. */
+#define OCFS_MKNOD_CREDITS (1 + 1 + \
+ (OCFS_SINGLE_FILE_EXTEND_CREDITS * 4) + \
+ OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* single file metadata updates * 3 because we might have to extend
+ * the file alloc and file alloc bitmap files + possible update to
+ * local bitmap. + 2 blocks for bits to set in the metadata alloc
+ * bitmap file */
+#define OCFS_FILE_EXTEND_CREDITS (OCFS_SINGLE_FILE_EXTEND_CREDITS * 3 \
+ + 1 + 2 + OCFS_JOURNAL_FUZZ_CREDITS)
+
+
+/* Now that we journal bitmap writes, this might get a bit more
+ * complicated, use this function to determine how many credits are
+ * needed for an extend. Unfortunately, we're in bytes because the
+ * rest of the file system is.
+ */
+static inline int ocfs_calc_extend_credits(struct super_block *sb,
+ __u32 bytes_wanted)
+{
+ int bitmap_blocks, sysfile_bitmap_blocks;
+ unsigned int bits_wanted;
+ bits_wanted = ocfs_clusters_for_bytes(sb, bytes_wanted);
+ /* take advantage of the fact that we always allocate in one
+ * large chunk. */
+ bitmap_blocks = ocfs_blocks_for_bits(sb, bits_wanted) + 1;
+
+ /* need to account for any bitmap system file extension.
+ * system files extend by up to 2mb, so the corresponding bitmap
+ * file (which are all blocksize allocators today) will require
+ * enough credits to zero the new bitmap data for that 2mb.
+ * even on a 512byte sect size, this is only 512 bytes of bitmap
+ * data, so it's really never more than two extra blocks. add
+ * some extra in case the request spans the new blocks + old ones. */
+
+ /* NOTE: if we ever add an allocator that allocates something
+ * other than blocksize chunks, or expect requests to be lots of
+ * bits at a time, this assumption doesn't hold */
+ sysfile_bitmap_blocks = 4;
+
+ return (bitmap_blocks + sysfile_bitmap_blocks + OCFS_FILE_EXTEND_CREDITS);
+}
+
+/* fe, anything along new 'edge' of tree + fuzz*/
+#define OCFS_FILE_TRUNCATE_CREDITS (1 + 4 + OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* the file entry + the locknode + possibily the parent dirnode + fuzz */
+#define OCFS_FILE_DELETE_CREDITS (1 + 1 + 1 + OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* need to create a new file and extend it to hold the info for the
+ * symlink we add one for a potential write of the main bitmap. Since
+ * we wind up with twice the fuzz because we reuse some macros so we
+ * subtract one.*/
+#define OCFS_SYMLINK_CREDITS (OCFS_MKNOD_CREDITS + OCFS_FILE_EXTEND_CREDITS \
+ + 1 - OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* fe change, locknode change, dirnode head, times two plus a possible
+ * delete, plus a possible dirnode addition in insert_file, and fuzz */
+#define OCFS_FILE_RENAME_CREDITS (2 * (1 + 1 + 1) + OCFS_FILE_DELETE_CREDITS \
+ + OCFS_JOURNAL_FUZZ_CREDITS)
+
+/* On very rare open operations (it's a first open and we're not in
+ * the open map), we have to change the file entry. */
+#define OCFS_OPEN_CREDITS 1
+
+#endif /* _OCFSJOURNAL_H_ */
Added: trunk/src/ocfs_log.h
===================================================================
--- trunk/src/ocfs_log.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ocfs_log.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,324 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs_log.h
+ *
+ * Defines macros and flags used for logging.
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version
+ * 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS_LOG_H
+#define OCFS_LOG_H
+
+extern __u32 debug_context;
+extern __u32 debug_level;
+extern __u32 debug_exclude;
+
+
+
+/* Tracing Levels */
+#define OCFS_DEBUG_LEVEL_ERROR 0x00000001
+#define OCFS_DEBUG_LEVEL_TRACE 0x00000002
+
+#define OCFS_DEBUG_LEVEL_ENTRY 0x00000010
+#define OCFS_DEBUG_LEVEL_EXIT 0x00000020
+
+#define OCFS_DEBUG_LEVEL_TIMING 0x00000100
+#define OCFS_DEBUG_LEVEL_STACK 0x00000200
+
+#define OCFS_DEBUG_LEVEL_PRINTK 0x00001000
+#define OCFS_DEBUG_LEVEL_MALLOC 0x00002000
+
+/* Tracing Contexts */
+#define OCFS_DEBUG_CONTEXT_ALLOC 0x00000001 /* alloc.c */
+#define OCFS_DEBUG_CONTEXT_DIR 0x00000002 /* dir.c */
+#define OCFS_DEBUG_CONTEXT_EXTMAP 0x00000004 /* extmap.c */
+#define OCFS_DEBUG_CONTEXT_HEARTBEAT 0x00000008 /* hearbeat.c */
+#define OCFS_DEBUG_CONTEXT_IOCTL 0x00000010 /* ioctl.c */
+#define OCFS_DEBUG_CONTEXT_NM 0x00000020 /* nm.c */
+#define OCFS_DEBUG_CONTEXT_PROC 0x00000040 /* proc.c */
+#define OCFS_DEBUG_CONTEXT_SYMLINK 0x00000080 /* symlink.c */
+#define OCFS_DEBUG_CONTEXT_BITMAP 0x00000100 /* bitmap.c */
+#define OCFS_DEBUG_CONTEXT_FILE 0x00000200 /* file.c */
+#define OCFS_DEBUG_CONTEXT_INODE 0x00000400 /* inode.c */
+#define OCFS_DEBUG_CONTEXT_JOURNAL 0x00000800 /* journal.c */
+#define OCFS_DEBUG_CONTEXT_UNUSED1 0x00001000 /* */
+#define OCFS_DEBUG_CONTEXT_UNUSED2 0x00002000 /* */
+#define OCFS_DEBUG_CONTEXT_SYSFILE 0x00004000 /* sysfile.c */
+#define OCFS_DEBUG_CONTEXT_VOLCFG 0x00008000 /* volcfg.c */
+#define OCFS_DEBUG_CONTEXT_DCACHE 0x00010000 /* dcache.c */
+#define OCFS_DEBUG_CONTEXT_DLM 0x00020000 /* dlm.c */
+#define OCFS_DEBUG_CONTEXT_HASH 0x00040000 /* hash.c */
+#define OCFS_DEBUG_CONTEXT_IO 0x00080000 /* io.c */
+#define OCFS_DEBUG_CONTEXT_NAMEI 0x00100000 /* namei.c */
+#define OCFS_DEBUG_CONTEXT_OSB 0x00200000 /* osb.c */
+#define OCFS_DEBUG_CONTEXT_SUPER 0x00400000 /* super.c */
+#define OCFS_DEBUG_CONTEXT_UTIL 0x00800000 /* util.c */
+#define OCFS_DEBUG_CONTEXT_VOTE 0x01000000 /* vote.c */
+#define OCFS_DEBUG_CONTEXT_LOCKRES 0x02000000 /* lockres.c */
+
+
+#ifdef OCFS_DBG_TIMING
+typedef union _my_timing_t
+{
+ __u64 q;
+ __u32 lohi[2];
+} my_timing_t;
+
+#define IO_FUNC_TIMING_DECL my_timing_t begin, end; rdtsc (begin.lohi[0], begin.lohi[1]);
+
+#define IO_FUNC_TIMING_PRINT(_fn,_ret) \
+ do { \
+ rdtsc (end.lohi[0], end.lohi[1]); \
+ IF_LEVEL_NO_CONTEXT(OCFS_DEBUG_LEVEL_TIMING) \
+ printk("(%d) EXIT : %s() = %d => [%llu]\n", \
+ current->pid, _fn, _ret, \
+ end.q-begin.q); \
+ } while(0)
+#else
+#define IO_FUNC_TIMING_DECL
+#define IO_FUNC_TIMING_PRINT(_fn,_ret)
+#endif
+
+
+#ifndef OCFS_DBG_TIMING
+#define DECL_U8_ARRAY(__t, __s)
+#define INIT_U8_ARRAY(__s)
+#define PRINT_STRING(__t) printk("\n");
+#define PRINT_ENTRY(__t) printk("(%d) ENTRY: %s", current->pid, __FUNCTION__)
+#else
+#define DECL_U8_ARRAY(__t, __s) __u8 (__t)[(__s)]
+#define INIT_U8_ARRAY(__s) *(__s) = '\0'
+#define PRINT_STRING(__t) printk("%s\n", (__t))
+#define PRINT_ENTRY(__t) printk("(%d) %sENTRY: %s", current->pid, (__t), __FUNCTION__)
+#endif
+
+
+#ifndef OCFS_DBG_TIMING
+# define GET_STACK(s)
+#else
+# define GET_STACK(s) \
+ IF_LEVEL(OCFS_DEBUG_LEVEL_STACK) { \
+ __s32 esp; \
+ __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : \
+ "0" (8191)); \
+ esp -= sizeof(struct task_struct); \
+ sprintf((s), "[%ld] ", esp); \
+ }
+#endif
+
+/* privately used macros */
+# define IF_LEVEL(level) \
+ if ((debug_context & OCFS_DEBUG_CONTEXT) && (debug_level & level) && \
+ current->pid!=debug_exclude)
+# define IF_LEVEL_NO_CONTEXT(level) \
+ if ((debug_level & level) && current->pid!=debug_exclude)
+
+#ifndef OCFS_DBG_TIMING
+# define ENTRY_TIMING_DECLS
+# define GET_TIMING(s, hi, lo)
+#else
+# define ENTRY_TIMING_DECLS __u32 _HI = 0, _LO = 0
+# define GET_TIMING(s, hi, lo) \
+ do { \
+ IF_LEVEL(OCFS_DEBUG_LEVEL_TIMING) { \
+ __u32 _lo, _hi; \
+ rdtsc (_lo, _hi); \
+ if ((s) == NULL) { \
+ (hi) = _hi; (lo) = _lo; \
+ } else { \
+ __u64 _b, _e; \
+ _b = hi; _b <<= 32; _b |= lo; \
+ _e = _hi; _e <<= 32; _e |= _lo; \
+ _e -= _b; \
+ sprintf((s), " => [%llu]", _e); \
+ } \
+ } \
+ } while (0)
+#endif
+
+/* IF macro */
+#define IF_TRACE(func) \
+ do { \
+ if ((debug_context & OCFS_DEBUG_CONTEXT) && \
+ (debug_level & OCFS_DEBUG_LEVEL_TRACE)) \
+ func; \
+ } while (0)
+
+#define IF_LEVEL_PID() \
+ if ((debug_level & OCFS_DEBUG_LEVEL_PRINTK) && \
+ (current->pid!=debug_exclude))
+
+static inline void eat_value_int(int val)
+{
+ return;
+}
+
+static inline void eat_value_uint(unsigned int val)
+{
+ return;
+}
+
+static inline void eat_value_long(long val)
+{
+ return;
+}
+
+static inline void eat_value_ulong(unsigned long val)
+{
+ return;
+}
+
+static inline void eat_value_ptr(void *val)
+{
+ return;
+}
+
+/* TRACE disabled. ERROR macros are never disabled. */
+#if !defined(TRACE)
+# define LOG_ENTRY()
+# define LOG_EXIT()
+# define LOG_EXIT_STATUS(val) eat_value_int(val)
+# define LOG_EXIT_INT(val) eat_value_int(val)
+# define LOG_EXIT_UINT(val) eat_value_uint(val)
+# define LOG_EXIT_LONG(val) eat_value_long(val)
+# define LOG_EXIT_ULONG(val) eat_value_ulong(val)
+# define LOG_EXIT_PTR(val) eat_value_ptr(val)
+# define LOG_TRACE_STR(str)
+# define LOG_TRACE_STATUS(val) eat_value_int(val)
+# define LOG_ENTRY_ARGS(fmt, arg...)
+# define LOG_EXIT_ARGS(fmt, arg...)
+# define LOG_TRACE_ARGS(fmt, arg...)
+# define LOG_PID_PRINTK(fmt, arg...)
+# define LOG_PID_STR(str)
+#endif /* !defined(TRACE) */
+
+
+
+/* TRACE enabled */
+#if defined(TRACE)
+
+#define LOG_PID_PRINTK(fmt, arg...) \
+ do { \
+ IF_LEVEL_PID() { \
+ printk("(%d) %s(): ", current->pid, \
+ __FUNCTION__); \
+ printk(fmt, ## arg); \
+ } \
+ } while (0)
+
+#define LOG_PID_STR(str) LOG_PID_PRINTK("%s\n", str)
+
+/* ENTRY macros */
+/* LOG_ENTRY_ARGS()
+ *
+ * Note: The macro expects the args to be terminated by a newline.
+ */
+#define LOG_ENTRY_ARGS(fmt, arg...) \
+ ENTRY_TIMING_DECLS; \
+ do { \
+ DECL_U8_ARRAY(_t, 16); \
+ INIT_U8_ARRAY(_t); \
+ GET_STACK(_t); \
+ GET_TIMING(NULL, _HI, _LO); \
+ IF_LEVEL(OCFS_DEBUG_LEVEL_ENTRY) { \
+ PRINT_ENTRY(_t); \
+ printk(fmt, ##arg); \
+ } \
+ } while (0)
+
+#define LOG_ENTRY() LOG_ENTRY_ARGS("() \n")
+
+
+
+/* EXIT macros */
+/* LOG_EXIT_ARGS()
+ *
+ */
+#define LOG_EXIT_ARGS(fmt, arg...) \
+ do { \
+ IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) { \
+ DECL_U8_ARRAY(_t, 50); \
+ INIT_U8_ARRAY(_t); \
+ GET_TIMING(_t, _HI, _LO); \
+ printk("(%d) EXIT : %s() %s", \
+ current->pid, __FUNCTION__, \
+ (*fmt == '\n' ? "" : "= ")); \
+ if (*fmt != '\n') \
+ printk(fmt, ## arg); \
+ PRINT_STRING(_t); \
+ } \
+ } while (0)
+
+#define LOG_EXIT() LOG_EXIT_ARGS("\n") /* I hate you Sunil */
+#define LOG_EXIT_STATUS(val) LOG_EXIT_ARGS("%d ", val)
+#define LOG_EXIT_INT(val) LOG_EXIT_ARGS("%d ", val)
+#define LOG_EXIT_UINT(val) LOG_EXIT_ARGS("%u ", val)
+#define LOG_EXIT_LONG(val) LOG_EXIT_ARGS("%ld ", val)
+#define LOG_EXIT_ULONG(val) LOG_EXIT_ARGS("%lu ", val)
+#define LOG_EXIT_PTR(val) LOG_EXIT_ARGS("0x%p ", val)
+
+
+/* TRACE macros */
+/* LOG_TRACE_ARGS()
+ *
+ * Note: The macro expects the args to be terminated by a newline.
+ */
+#define LOG_TRACE_ARGS(fmt, arg...) \
+ do { \
+ IF_LEVEL(OCFS_DEBUG_LEVEL_TRACE) { \
+ printk("(%d) TRACE: %s() ", current->pid, \
+ __FUNCTION__); \
+ printk(fmt, ## arg); \
+ } \
+ } while (0)
+
+#define LOG_TRACE_STR(str) LOG_TRACE_ARGS("%s\n", str)
+#define LOG_TRACE_STATUS(val) LOG_TRACE_ARGS("%d\n", val);
+
+#endif /* TRACE */
+
+
+
+/* ERROR macros are not compiled out */
+/* LOG_ERROR_ARGS()
+ *
+ * Note: The macro expects the args to be terminated by a newline.
+ */
+#define LOG_ERROR_ARGS(fmt, arg...) \
+ do { \
+ printk(KERN_ERR "(%d) ERROR at %s, %d: ", current->pid, __FILE__, __LINE__); \
+ printk(fmt, ## arg); \
+ } while (0)
+
+#define LOG_ERROR_STR(str) LOG_ERROR_ARGS("%s\n", str)
+#define LOG_ERROR_STATUS(st) LOG_ERROR_ARGS("status = %d\n", st)
+
+#define ocfs_trace(p) \
+ do { \
+ printk("Trace in %s line %d called from [<%lx>]\n", \
+ __FILE__, __LINE__, \
+ __builtin_return_address(0) ); \
+ ocfs_show_trace(NULL); \
+ } while (0)
+
+#endif /* OCFS_LOG_H */
Modified: trunk/src/proc.c
===================================================================
--- trunk/src/proc.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/proc.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* proc.c
*
* proc interface
@@ -26,10 +28,20 @@
#define OCFSPROC_PRIVATE_DECLS
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/socket.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "proc.h"
+#include "vote.h"
+
extern __u32 comm_voting;
@@ -47,9 +59,6 @@
static int ocfs_proc_device (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_nodes (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_net_vote_obj (char *page, char **start, off_t off, int count, int *eof, void *data);
-#ifdef OCFS_LINUX_MEM_DEBUG
-static int ocfs_proc_memallocs (char *page, char **start, off_t off, int count, int *eof, void *data);
-#endif
static int ocfs_proc_alloc_stat(char *page, char **start, off_t off,
int count, int *eof, void *data);
@@ -71,9 +80,6 @@
{ OCFS2_PROC_BASENAME "/nodename", NULL, ocfs_proc_nodename },
{ OCFS2_PROC_BASENAME "/globalctxt", NULL, ocfs_proc_globalctxt },
{ OCFS2_PROC_BASENAME "/lockstat", NULL, ocfs_proc_dlm_stats },
-#ifdef OCFS_LINUX_MEM_DEBUG
- { OCFS2_PROC_BASENAME "/memallocs", NULL, ocfs_proc_memallocs },
-#endif
{ NULL, }
};
@@ -101,9 +107,6 @@
remove_proc_entry (OCFS2_PROC_BASENAME "/nodename", NULL);
remove_proc_entry (OCFS2_PROC_BASENAME "/globalctxt", NULL);
remove_proc_entry (OCFS2_PROC_BASENAME "/lockstat", NULL);
-#ifdef OCFS_LINUX_MEM_DEBUG
- remove_proc_entry (OCFS2_PROC_BASENAME "/memallocs", NULL);
-#endif
remove_proc_entry (OCFS2_PROC_BASENAME, NULL);
LOG_EXIT ();
@@ -136,90 +139,7 @@
return len;
} /* ocfs_proc_calc_metrics */
-#ifdef OCFS_LINUX_MEM_DEBUG
-/*
- * ocfs_proc_memallocs()
- *
- */
-static int ocfs_proc_memallocs (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int ret = 0;
- int proc_overflow = 0;
- struct list_head *iter;
- struct list_head *temp_iter;
- alloc_item *item;
- int len = 0;
- char *slabname;
- char *tmpstr = NULL;
- LOG_ENTRY ();
-
-#define MEMDBG_LEN 255
- tmpstr = ocfs_malloc(MEMDBG_LEN);
- if (!tmpstr) {
- LOG_ERROR_STATUS (-ENOMEM);
- goto bail;
- }
-
- sprintf (tmpstr, "%-8s %-9s %s\n", "Pointer", "Size/Slab", "Line:File");
- printk("%s", tmpstr);
- ret = sprintf ((char *) (page + len), "%s", tmpstr);
- len += ret;
-
- list_for_each_safe (iter, temp_iter, &OcfsGlobalCtxt.item_list) {
- if (len >= 4096)
- proc_overflow = 1;
- item = list_entry (iter, alloc_item, list);
- switch (item->type) {
- case SLAB_ITEM:
- if (item->u.slab == OcfsGlobalCtxt.fe_cache)
- slabname = "fe";
- else
- slabname = "unknown";
-
- sprintf(tmpstr, "%08x %9s %-40s\n", item->address,
- slabname, item->tag);
-
- printk("%s", tmpstr);
- if (!proc_overflow) {
- ret = snprintf ((char *) (page + len),
- (4096 - len), "%s", tmpstr);
- len += ret;
- }
- break;
- case KMALLOC_ITEM:
- case VMALLOC_ITEM:
- default:
- sprintf(tmpstr, "%08x %9d %s\n", item->address,
- item->u.length, item->tag);
- printk("%s", tmpstr);
- if (!proc_overflow) {
- ret = snprintf ((char *) (page + len),
- (4096 - len), "%s", tmpstr);
- len += ret;
- }
-
- break;
- }
- if (ret < 0)
- proc_overflow = 1;
- }
-
-
- if (proc_overflow)
- LOG_ERROR_STR ("proc output truncated");
-
- ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
-
-bail:
- if (tmpstr)
- kfree(tmpstr);
- LOG_EXIT_LONG (ret);
- return ret;
-} /* ocfs_proc_memallocs */
-#endif
-
/*
* ocfs_proc_globalctxt()
*
@@ -232,17 +152,17 @@
LOG_ENTRY ();
- len += sprintf (page + len, "comm voting : %d\n", comm_voting);
- len += sprintf (page + len, "ip addr/port : %s/%d\n",
- OcfsGlobalCtxt.comm_info.ip_addr,
- OcfsGlobalCtxt.comm_info.ip_port);
- len += sprintf (page + len, "guid : ");
- strncat (page + len, OcfsGlobalCtxt.guid.guid, GUID_LEN);
- len += GUID_LEN;
- strncat (page + len, "\n", 1);
+ len += sprintf(page + len, "comm voting : %d\n", comm_voting);
+ len += sprintf(page + len, "ip addr/port : 0x%08u/%u\n",
+ ntohl(OcfsGlobalCtxt.comm_info.addr_u.ip_addr4),
+ ntohs(OcfsGlobalCtxt.comm_info.ip_port));
+ len += sprintf(page + len, "guid : ");
+ strncat(page + len, OcfsGlobalCtxt.guid.guid, OCFS2_GUID_LEN);
+ len += OCFS2_GUID_LEN;
+ strncat(page + len, "\n", 1);
len++;
- ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
+ ret = ocfs_proc_calc_metrics(page, start, off, count, eof, len);
LOG_EXIT_INT (ret);
return ret;
@@ -471,7 +391,8 @@
}
ProcList[0].data = (char *) osb;
- ProcList[1].data = osb->vol_layout.mount_point;
+#warning fix proc mountpoint
+ ProcList[1].data = "unknown";
ProcList[2].data = (char *) osb;
ProcList[3].data = (char *) osb;
ProcList[4].data = (char *) osb;
@@ -561,14 +482,12 @@
int len;
char *pubmap = NULL;
ocfs_super *osb;
- ocfs_vol_layout *vol;
int ret = 0, i;
char *ptr;
LOG_ENTRY ();
osb = data;
- vol = &(osb->vol_layout);
pubmap = ocfs_malloc (100);
if (!pubmap) {
@@ -577,8 +496,8 @@
}
ptr = pubmap;
- for (i = 0; i < 32; i++) {
- if (osb->publ_map & (1 << i))
+ for (i = 0; i < osb->max_nodes; i++) {
+ if (ocfs_node_map_test_bit(&osb->publ_map, i))
ptr += sprintf (ptr, "%d ", i);
}
if (pubmap != ptr)
@@ -588,17 +507,15 @@
"File open count : %lld\n" \
"Publish map : %s\n" \
"Number of nodes : %u\n" \
- "Cluster size : %u\n" \
+ "Cluster size : %d\n" \
"Volume size : %llu\n" \
- "Dir node size : %llu\n" \
- "File node size : %llu\n" \
"Failed Large Allocs : %u\n" \
"Retry Large Allocs : %u\n" \
"Inode Hash Objects : %u\n"
len = sprintf (page, PROC_STATS, osb->file_open_cnt, pubmap,
- vol->num_nodes, vol->cluster_size, vol->size,
- vol->dir_node_size, vol->file_node_size,
+ osb->num_nodes, osb->s_clustersize,
+ (u64)osb->cluster_bitmap.validbits << osb->s_clustersize_bits,
osb->cluster_bitmap.failed,
osb->cluster_bitmap.ok_retries,
osb->inode_hash.num_ents);
@@ -646,7 +563,6 @@
int ret;
ocfs_super *osb;
BARF_BARF_BARF *node;
- __u32 publ_map;
char mount;
LOG_ENTRY ();
@@ -655,19 +571,19 @@
if (osb) {
down (&(osb->cfg_lock));
- publ_map = (__u32)osb->publ_map;
- for (i = 0; i < OCFS_MAXIMUM_NODES; ++i, publ_map >>= 1) {
+ for (i = 0; i < osb->max_nodes; i++) {
node = osb->node_cfg_info[i];
if (!node)
continue;
- mount = (publ_map & 0x1) ? 'M' : ' ';
+ mount = ocfs_node_map_test_bit(&osb->publ_map, i) ? 'M' : ' ';
len += sprintf (page + len,
- "%2d %c %-32s %-15s %-6d ",
+ "%2d %c %-32s 0x%08u %-6u ",
i, mount, node->node_name,
- node->ipc_config.ip_addr,
- node->ipc_config.ip_port);
- strncat (page + len, node->guid.guid, GUID_LEN);
- len += GUID_LEN;
+ ntohl(node->ipc_config.addr_u.ip_addr4),
+ ntohs(node->ipc_config.ip_port));
+ strncat(page + len, node->guid.guid,
+ OCFS2_GUID_LEN);
+ len += OCFS2_GUID_LEN;
len += sprintf (page + len, "\n");
}
up (&(osb->cfg_lock));
Added: trunk/src/proc.h
===================================================================
--- trunk/src/proc.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/proc.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,37 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * proc.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_PROC_H
+#define OCFS2_PROC_H
+
+void ocfs_proc_add_volume(ocfs_super *osb);
+void ocfs_proc_deinit(void);
+int ocfs_proc_init(void);
+void ocfs_proc_remove_volume(ocfs_super *osb);
+
+#endif /* OCFS2_PROC_H */
Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/super.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* super.c
*
* load/unload driver, mount/dismount volumes
@@ -24,13 +26,47 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
#include <linux/utsname.h>
-#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <linux/statfs.h>
+#include <linux/moduleparam.h>
+#endif
+#include <linux/blkdev.h>
+#include <linux/socket.h>
+#include <linux/inet.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+/* this should be the only file to include a version 1 header */
+#include "ocfs1_fs_compat.h"
+
+#include "alloc.h"
+#include "bitmap.h"
+#include "heartbeat.h"
+#include "inode.h"
+#include "journal.h"
+#include "nm.h"
+#include "proc.h"
+#include "super.h"
+#include "sysfile.h"
+#include "util.h"
+#include "ver.h"
+#include "volcfg.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_SUPER
/*
@@ -145,17 +181,18 @@
static int ocfs_statfs (struct super_block *sb, struct statfs *buf);
#endif
-static int ocfs_init_system_inodes(ocfs_super *osb);
+static int ocfs_init_global_system_inodes(ocfs_super *osb);
+static int ocfs_init_local_system_inodes(ocfs_super *osb);
static int ocfs_release_system_inodes(ocfs_super *osb);
static int ocfs_publish_set_unmounted(ocfs_super *osb, int node_num);
static int ocfs_publish_set_mounted(ocfs_super *osb, int node_num);
static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num,
int value);
static int ocfs_check_volume(ocfs_super * osb);
-static int ocfs_verify_volume(ocfs_vol_disk_hdr *vdh);
-static int ocfs_initialize_osb(ocfs_super *osb, ocfs_vol_disk_hdr *vdh,
- ocfs_vol_label *vol_label,
- __u32 sect_size);
+static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
+ __u32 sectsize);
+static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh);
+static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size);
static struct super_operations ocfs_sops = {
.statfs = ocfs_statfs,
@@ -194,78 +231,71 @@
}
/*
- * ocfs_init_system_inodes()
+ * ocfs_init_global_system_inodes()
*/
-static int ocfs_init_system_inodes(ocfs_super *osb)
+static int ocfs_init_global_system_inodes(ocfs_super *osb)
{
struct inode *new = NULL;
- __u64 sys_off = 0;
int status = 0;
+ int i;
LOG_ENTRY();
- /* the vol bitmap */
- sys_off = OCFS_BITMAP_LOCK_OFFSET;
- new = ocfs_iget(osb, sys_off);
+ new = ocfs_iget(osb, osb->root_blkno << osb->sb->s_blocksize_bits);
if (!new) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
+ LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
- osb->system_inodes[GLOBAL_BITMAP_SYSTEM_INODE] = new;
+ osb->root_inode = new;
- /* file alloc bitmap */
- sys_off = osb->vol_layout.root_int_off +
- ((OCFS_FILE_FILE_ALLOC_BITMAP + osb->node_num)
- * osb->sect_size);
- new = ocfs_iget(osb, sys_off);
+ new = ocfs_iget(osb, osb->system_dir_blkno << osb->sb->s_blocksize_bits);
if (!new) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
+ LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
- osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE] = new;
+ osb->sys_root_inode = new;
- /* inode alloc bitmap */
- sys_off = osb->vol_layout.root_int_off +
- ((OCFS_INODE_BITMAP + osb->node_num)
- * osb->sect_size);
- new = ocfs_iget(osb, sys_off);
- if (!new) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
+ for (i = 0; i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
+ new = ocfs_get_system_file_inode(osb, i, osb->node_num);
+ if (!new) {
+ ocfs_release_system_inodes(osb);
+ LOG_ERROR_STATUS(status = -EINVAL);
+ goto bail;
+ }
+ osb->system_inodes[i] = new;
}
- osb->system_inodes[INODE_ALLOC_BITMAP_SYSTEM_INODE] = new;
- /* journal file */
- sys_off = osb->vol_layout.root_int_off +
- ((OCFS_JOURNAL_FILE + osb->node_num) * osb->sect_size);
- new = ocfs_iget(osb, sys_off);
- if (!new) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- osb->system_inodes[JOURNAL_SYSTEM_INODE] = new;
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_init_global_system_inodes */
- /* orphan inode dir */
- sys_off = osb->vol_layout.root_int_off +
- (OCFS_ORPHAN_DIR * osb->sect_size);
- new = ocfs_iget(osb, sys_off);
- if (!new) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto bail;
+/*
+ * ocfs_init_local_system_inodes()
+ */
+static int ocfs_init_local_system_inodes(ocfs_super *osb)
+{
+ struct inode *new = NULL;
+ int status = 0;
+ int i;
+
+ LOG_ENTRY();
+
+ for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES ; i++) {
+ new = ocfs_get_system_file_inode(osb, i, osb->node_num);
+ if (!new) {
+ ocfs_release_system_inodes(osb);
+ LOG_ERROR_STATUS(status = -EINVAL);
+ goto bail;
+ }
+ osb->system_inodes[i] = new;
}
- osb->system_inodes[ORPHAN_DIR_SYSTEM_INODE] = new;
bail:
LOG_EXIT_STATUS(status);
return(status);
-} /* ocfs_init_system_inodes */
+} /* ocfs_init_local_system_inodes */
-
/*
* ocfs_release_system_inodes()
*/
@@ -275,9 +305,18 @@
LOG_ENTRY();
- for (i=0; i<NUM_SYSTEM_INODES; i++) {
- iput(osb->system_inodes[i]);
+ for (i = 0; i < NUM_SYSTEM_INODES; i++) {
+ if (osb->system_inodes[i]) {
+ iput(osb->system_inodes[i]);
+ osb->system_inodes[i] = NULL;
+ }
}
+
+ if (osb->sys_root_inode) {
+ iput(osb->sys_root_inode);
+ osb->sys_root_inode = NULL;
+ }
+
LOG_EXIT_STATUS(status);
return(status);
} /* ocfs_release_system_inodes */
@@ -469,15 +508,16 @@
*/
static int __init ocfs_driver_entry (void)
{
- int status = 0;
+ int status;
LOG_ENTRY ();
ocfs_version_print();
ocfs_hostname = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
- if (ocfs_hostname==NULL)
+ if (ocfs_hostname == NULL)
return -EINVAL;
+
strcpy(ocfs_hostname, system_utsname.nodename);
printk("ocfs2: hostname is %s\n", ocfs_hostname);
@@ -490,9 +530,6 @@
memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt));
memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt));
-#ifdef OCFS_LINUX_MEM_DEBUG
- INIT_LIST_HEAD (&(OcfsGlobalCtxt.item_list));
-#endif
INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
@@ -515,8 +552,6 @@
goto leave;
}
- OcfsGlobalCtxt.hbm = DISK_HBEAT_COMM_ON;
-
spin_lock_init (&osb_id_lock);
spin_lock (&osb_id_lock);
osb_id = 0;
@@ -575,24 +610,25 @@
/* Read remaining insmod params */
if (node_number != OCFS_INVALID_NODE_NUM) {
- if (node_number >= 0 && node_number < OCFS_MAXIMUM_NODES) {
- OcfsGlobalCtxt.pref_node_num = node_number;
- LOG_TRACE_ARGS("Preferred node number: %d\n",
- node_number);
- }
- else {
- status = -EINVAL;
- LOG_ERROR_STR("'node_number' must be between 0 and 31");
- }
+ // this will be validated later
+ OcfsGlobalCtxt.pref_node_num = node_number;
+ LOG_TRACE_ARGS("Preferred node number: %d\n", node_number);
}
if (ip_port_v2 == 0)
- OcfsGlobalCtxt.comm_info.ip_port = OCFS_IPC_DEFAULT_PORT;
+ OcfsGlobalCtxt.comm_info.ip_port =
+ htons(OCFS_IPC_DEFAULT_PORT);
+ else if (ip_port_v2 & 0xFFFF0000) {
+ status = -EINVAL;
+ LOG_ERROR_STR("'ip_port_v2' is too large'");
+ }
else
- OcfsGlobalCtxt.comm_info.ip_port = ip_port_v2;
- LOG_TRACE_ARGS("IP port: %d\n", OcfsGlobalCtxt.comm_info.ip_port);
+ OcfsGlobalCtxt.comm_info.ip_port =
+ htons((u16)ip_port_v2);
+ LOG_TRACE_ARGS("IP port: %d\n",
+ ntohs(OcfsGlobalCtxt.comm_info.ip_port));
- if (node_name && strlen (node_name) < MAX_NODE_NAME_LENGTH) {
+ if (node_name && strlen(node_name) < MAX_NODE_NAME_LENGTH) {
OcfsGlobalCtxt.node_name = node_name;
LOG_TRACE_ARGS ("Node name: %s\n", OcfsGlobalCtxt.node_name);
} else {
@@ -600,16 +636,18 @@
LOG_ERROR_STR ("'node_name' not set or too long");
}
- if (ip_address && strlen (ip_address) < MAX_IP_ADDR_LEN) {
- OcfsGlobalCtxt.comm_info.ip_addr = ip_address;
+#define MAX_IPv4_ADDR_STR_LEN 15 /* 3x '255' + 3x '.' */
+ if (ip_address && strlen (ip_address) < MAX_IPv4_ADDR_STR_LEN) {
+ OcfsGlobalCtxt.comm_info.addr_u.ip_addr4 =
+ in_aton(ip_address);
LOG_TRACE_ARGS ("IP address: %s\n", ip_address);
} else {
status = -EINVAL;
LOG_ERROR_STR ("'ip_address' not set or too long");
}
- if (guid && strlen (guid) == GUID_LEN) {
- memcpy (&OcfsGlobalCtxt.guid.guid, guid, GUID_LEN);
+ if (guid && strlen (guid) == OCFS2_GUID_LEN) {
+ memcpy(&OcfsGlobalCtxt.guid.guid, guid, OCFS2_GUID_LEN);
LOG_TRACE_ARGS ("Node guid: %s\n", guid);
} else {
status = -EINVAL;
@@ -617,7 +655,7 @@
}
if (status == 0) {
- for (i = 0; i < GUID_LEN; ++i)
+ for (i = 0; i < OCFS2_GUID_LEN; ++i)
check_sum += (__u32) guid[i];
if (cs != check_sum) {
status = -EINVAL;
@@ -626,56 +664,13 @@
}
/* hardcoding... not used yet */
- OcfsGlobalCtxt.comm_info.type = OCFS_UDP;
- OcfsGlobalCtxt.comm_info.ip_mask = NULL;
+ OcfsGlobalCtxt.comm_info.ip_version = htons(4);
return status;
} /* ocfs_read_params */
-#ifdef OCFS_LINUX_MEM_DEBUG
/*
- * ocfs_memcheck()
- *
- */
-static void ocfs_memcheck (void)
-{
- struct list_head *iter;
- struct list_head *temp_iter;
- alloc_item *item;
- char *memtype;
- char sizeinfo[20];
-
- list_for_each_safe (iter, temp_iter, &OcfsGlobalCtxt.item_list) {
- item = list_entry (iter, alloc_item, list);
- switch (item->type)
- {
- case SLAB_ITEM:
- /* TODO: use the actual slab name */
- memtype = "SLAB";
- snprintf(sizeinfo, 20, "slab=%p", item->u.slab);
- break;
- case KMALLOC_ITEM:
- memtype = "KMALLOC";
- snprintf(sizeinfo, 20, "size=%d", item->u.length);
- break;
- case VMALLOC_ITEM:
- memtype = "VMALLOC";
- snprintf(sizeinfo, 20, "size=%d", item->u.length);
- break;
- default:
- memtype = "UNKNOWN";
- snprintf(sizeinfo, 20, "size=%d", item->u.length);
- break;
- }
- LOG_ERROR_ARGS ("unfreed %s mem %x: %s tag='%s'", memtype,
- item->address, sizeinfo, item->tag);
- }
-} /* ocfs_memcheck */
-
-#endif /* OCFS_LINUX_MEM_DEBUG */
-
-/*
* ocfs_driver_exit()
*
* Called on rmmod
@@ -702,10 +697,6 @@
unregister_filesystem (&ocfs_fs_type);
-#ifdef OCFS_LINUX_MEM_DEBUG
- ocfs_memcheck ();
-#endif
-
printk("Unloaded OCFS Driver module\n");
LOG_EXIT ();
return;
@@ -737,57 +728,68 @@
*
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static int ocfs_statfs (struct super_block *sb, struct kstatfs *buf)
+static int ocfs_statfs(struct super_block *sb, struct kstatfs *buf)
#else
-static int ocfs_statfs (struct super_block *sb, struct statfs *buf)
+static int ocfs_statfs(struct super_block *sb, struct statfs *buf)
#endif
{
- ocfs_super *osb = NULL;
- __u32 numbits, freebits = 0;
+ ocfs_super *osb;
+ __u32 numbits, freebits;
unsigned int reserved_bits;
- int status = 0;
- ocfs_file_entry *bm_lock = NULL;
+ int status;
+ ocfs2_dinode *bm_lock;
struct buffer_head *bh = NULL;
+ struct inode *inode = NULL;
LOG_ENTRY_ARGS ("(%p, %p)\n", sb, buf);
osb = OCFS_SB(sb);
- numbits = osb->cluster_bitmap.validbits;
-
- status = ocfs_read_bh (osb, OCFS_BITMAP_LOCK_OFFSET, &bh, 0, NULL);
+
+ inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+ if (!inode) {
+ LOG_ERROR_STR("failed to get bitmap inode");
+ return -EIO;
+ }
+
+ /* must this be a non cached read? */
+ status = ocfs_read_bh (osb, GET_INODE_FEOFF(inode), &bh, 0, inode);
if (status < 0) {
LOG_ERROR_STR("failed to read bitmap data");
return -EIO;
}
+
bm_lock = OCFS_BH_GET_DATA_READ(bh);
- if (numbits >= bm_lock->u.bitinfo.used_bits)
- freebits = numbits - bm_lock->u.bitinfo.used_bits;
+ freebits = numbits = osb->cluster_bitmap.validbits;
+
+ if (numbits >= bm_lock->id1.bitmap1.i_used)
+ freebits -= bm_lock->id1.bitmap1.i_used;
+
/* take out the space reserved for system files */
reserved_bits = ocfs_clusters_for_bytes(sb, 8 * ONE_MEGA_BYTE);
freebits -= reserved_bits;
buf->f_type = OCFS_MAGIC;
buf->f_bsize = sb->s_blocksize;
- buf->f_namelen = OCFS_MAX_FILENAME_LENGTH;
+ buf->f_namelen = OCFS2_MAX_FILENAME_LENGTH;
buf->f_bavail = buf->f_bfree;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
buf->f_blocks =
(unsigned long) ((unsigned long) (numbits) *
- (unsigned long) (osb->vol_layout.cluster_size >> osb->sect_size_bits) -
+ (unsigned long) (osb->s_clustersize >> osb->sb->s_blocksize_bits) -
reserved_bits);
buf->f_bfree =
- (unsigned long) (freebits * (osb->vol_layout.cluster_size >> osb->sect_size_bits));
+ (unsigned long) (freebits * (osb->s_clustersize >> osb->sb->s_blocksize_bits));
buf->f_bavail = buf->f_bfree;
buf->f_files = (unsigned long) (numbits);
buf->f_ffree = (unsigned long) (numbits) - freebits;
#else
buf->f_blocks =
(sector_t) ((unsigned long) (numbits) *
- (unsigned long) (osb->vol_layout.cluster_size >> osb->sect_size_bits) -
+ (unsigned long) (osb->s_clustersize >> osb->sb->s_blocksize_bits) -
reserved_bits);
buf->f_bfree =
- (sector_t) (freebits * (osb->vol_layout.cluster_size >> osb->sect_size_bits));
+ (sector_t) (freebits * (osb->s_clustersize >> osb->sb->s_blocksize_bits));
buf->f_files = (sector_t) (numbits);
buf->f_ffree = (sector_t) (numbits) - freebits;
#endif
@@ -810,9 +812,6 @@
sizeof(ocfs_inode_private), 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
NULL, NULL);
- OcfsGlobalCtxt.fe_cache = kmem_cache_create ("ocfs2_fileentry",
- 512, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN, NULL, NULL);
-
OcfsGlobalCtxt.extent_cache = kmem_cache_create ("ocfs2_extent",
sizeof(ocfs_extent) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
NULL, NULL);
@@ -829,60 +828,137 @@
static void ocfs_free_mem_lists (void)
{
kmem_cache_destroy (OcfsGlobalCtxt.inode_cache);
- kmem_cache_destroy (OcfsGlobalCtxt.fe_cache);
kmem_cache_destroy (OcfsGlobalCtxt.extent_cache);
OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
} /* ocfs_free_mem_lists */
-
-/*
- * ocfs_mount_volume()
- *
- */
-static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root)
+static int ocfs2_sb_probe(struct super_block *sb,
+ struct buffer_head **bh,
+ int *sector_size)
{
- int status = 0;
- ocfs_super *osb = NULL;
- ocfs_vol_disk_hdr *vol_header = NULL;
- ocfs_vol_label *vol_label = NULL;
- int child_pid, i, sect_size;
- struct buffer_head *bhs[] = { NULL, NULL };
- struct inode *inode = NULL;
+ int status = 0, tmpstat;
+ ocfs1_vol_disk_hdr *hdr;
+ ocfs2_dinode *di;
+ int blksize;
- LOG_ENTRY ();
+ *bh = NULL;
- /* Force 512B sector size for vol_header reads */
- status = ocfs_set_blocksize(sb, 512, 9);
+ /* may be > 512 */
+ *sector_size = bdev_hardsect_size(sb->s_bdev);
+ if (*sector_size > 4096) {
+ LOG_ERROR_ARGS("Hardware sector size too large: %d (max=4096)\n", *sector_size);
+ status = -EINVAL;
+ goto bail;
+ }
+
+ /* check block zero for old format */
+ status = ocfs2_get_sector(sb, bh, 0, *sector_size);
if (status < 0) {
- LOG_ERROR_STR("unable to set vol_header blocksize");
- goto leave;
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
+ hdr = OCFS_BH_GET_DATA_READ(*bh);
+ if (hdr->major_version == OCFS1_MAJOR_VERSION) {
+ LOG_ERROR_ARGS("incompatible version: %u.%u\n",
+ hdr->major_version, hdr->minor_version);
+ status = -EINVAL;
+ }
+ if (memcmp (hdr->signature, OCFS1_VOLUME_SIGNATURE,
+ strlen (OCFS1_VOLUME_SIGNATURE)) == 0) {
+ LOG_ERROR_ARGS("incompatible volume signature: %8s\n",
+ hdr->signature);
+ status = -EINVAL;
+ }
+ OCFS_BH_PUT_DATA(*bh);
+ brelse(*bh);
+ *bh = NULL;
+ if (status < 0) {
+ LOG_ERROR_STR("This is an ocfs v1 filesystem which must be upgraded "
+ "before mounting with ocfs v2");
+ goto bail;
+ }
- /* get first two blocks */
- for (i=0; i<2; i++) {
- bhs[i] = getblk (OCFS_GET_BLOCKDEV(sb), i, 512);
- if (bhs[i] == NULL) {
- LOG_ERROR_STATUS(status = -EIO);
- goto leave;
+ /*
+ * Now check at magic offset for 512, 1024, 2048, 4096
+ * blocksizes. 4096 is the maximum blocksize because it is
+ * the minimum clustersize.
+ */
+ status = -EINVAL;
+ for (blksize = *sector_size; blksize <= OCFS2_MAX_BLOCKSIZE; blksize <<= 1) {
+ tmpstat = ocfs2_get_sector(sb, bh,
+ OCFS2_SUPER_BLOCK_BLKNO,
+ blksize);
+ if (tmpstat < 0) {
+ LOG_ERROR_STATUS(status = tmpstat);
+ goto bail;
}
+ di = (ocfs2_dinode *) OCFS_BH_GET_DATA_READ(*bh);
+ status = ocfs_verify_volume(di, *bh, blksize);
+ OCFS_BH_PUT_DATA(*bh);
+ if (status >= 0)
+ goto bail;
+ brelse(*bh);
+ *bh = NULL;
+ if (status != -EAGAIN)
+ break;
+ }
- lock_buffer(bhs[i]);
- if (!buffer_dirty(bhs[i]))
- clear_buffer_uptodate(bhs[i]);
- unlock_buffer(bhs[i]);
+bail:
+ return status;
+}
+
+static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size)
+{
+ int i, bits=0, status = 0;
+
+ for (i=32; i>=0; i--) {
+ if ((1 << i) == sect_size)
+ bits = i;
}
+ if (!bits) {
+ LOG_ERROR_STR("unable to get blocksize bits");
+ return -EINVAL;
+ }
+
+ status = ocfs_set_blocksize(sb, sect_size, bits);
+ if (status < 0) {
+ LOG_ERROR_STR("unable to set blocksize");
+ return status;
+ }
- ll_rw_block(READ, 2, bhs);
- for (i=0; i<2; i++)
- wait_on_buffer(bhs[i]);
+ *bh = sb_getblk(sb, block);
+ if (!*bh) {
+ LOG_ERROR_STATUS(-EIO);
+ return -EIO;
+ }
+ lock_buffer(*bh);
+ if (!buffer_dirty(*bh))
+ clear_buffer_uptodate(*bh);
+ unlock_buffer(*bh);
+ ll_rw_block(READ, 1, bh);
+ wait_on_buffer(*bh);
+ return 0;
+}
- vol_header = OCFS_BH_GET_DATA_READ(bhs[0]);
- vol_label = OCFS_BH_GET_DATA_READ(bhs[1]);
- LOG_TRACE_STR ("ocfs_verify_volume...");
- status = ocfs_verify_volume (vol_header);
+/*
+ * ocfs_mount_volume()
+ *
+ */
+static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root)
+{
+ int status;
+ ocfs_super *osb;
+ int child_pid;
+ struct buffer_head *bh = NULL;
+ int sector_size;
+
+ LOG_ENTRY ();
+
+ /* probe for superblock */
+ status = ocfs2_sb_probe(sb, &bh, §or_size);
if (status < 0) {
- LOG_ERROR_STATUS (status);
+ LOG_ERROR_STR("superblock probe failed!");
goto leave;
}
@@ -896,30 +972,20 @@
osb->sb = sb;
+ /* Save off for ocfs_rw_direct */
+ osb->s_sectsize_bits = ocfs_get_right_shift_bits(sector_size);
+ if (!osb->s_sectsize_bits)
+ BUG();
+
osb->reclaim_id = reclaim_id;
- /* FIXME: here it should use the actual blocksize */
- sect_size = OCFS_SECTOR_SIZE;
- status = ocfs_initialize_osb(osb, vol_header, vol_label,
- sect_size);
+ /* s_blocksize was set in the probe */
+ status = ocfs_initialize_osb(osb, bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- /* Now set real blocksize */
- status = ocfs_set_blocksize(sb, osb->sect_size,
- osb->sect_size_bits);
- if (status < 0) {
- LOG_ERROR_STR("unable to set final blocksize");
- goto leave;
- }
-
- OCFS_BH_PUT_DATA(bhs[0]);
- vol_header = NULL;
- OCFS_BH_PUT_DATA(bhs[1]);
- vol_label = NULL;
-
down(&(osb->osb_res));
/* Launch the NM thread for the mounted volume */
@@ -948,7 +1014,8 @@
OcfsIpcCtxt.dlm_msg_size = OCFS_DLM_MAX_MSG_SIZE;
OcfsIpcCtxt.version = OCFS_IPC_DLM_VERSION;
/* start the listener thread */
- status = ocfs_init_udp_sock (&OcfsIpcCtxt.send_sock, &OcfsIpcCtxt.recv_sock);
+ status = ocfs_init_udp_sock(&OcfsIpcCtxt.send_sock,
+ &OcfsIpcCtxt.recv_sock);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
@@ -977,31 +1044,14 @@
ocfs_wait (osb->nm_init_event, (atomic_read (&osb->nm_init) >= OCFS_HEARTBEAT_INIT ), 0);
down(&(osb->osb_res));
- osb->hbm = DISK_HBEAT_COMM_ON;
-
down (&(osb->publish_lock));
ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 1);
up (&(osb->publish_lock));
-
- osb->publ_map |= (1 << osb->node_num);
+
+ ocfs_node_map_set_bit(&osb->publ_map, osb->node_num);
osb->vol_state = VOLUME_ENABLED;
up (&(osb->osb_res));
- inode = ocfs_iget(osb, OCFS_ROOT_INODE_FE_OFF(osb));
- if (!inode) {
- status = -EIO;
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- osb->root_inode = inode;
-
- status = ocfs_init_system_inodes(osb);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
- }
-
/* Read the publish sector for this node and cleanup dirent being */
/* modified when we crashed. */
LOG_TRACE_STR ("ocfs_check_volume...");
@@ -1040,16 +1090,8 @@
osb->vol_state = VOLUME_MOUNTED;
leave:
- if (bhs[0] != NULL) {
- if (vol_header)
- OCFS_BH_PUT_DATA(bhs[0]);
- brelse(bhs[0]);
- }
- if (bhs[1] != NULL) {
- if (vol_label)
- OCFS_BH_PUT_DATA(bhs[1]);
- brelse(bhs[1]);
- }
+ if (bh != NULL)
+ brelse(bh);
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_mount_volume */
@@ -1061,11 +1103,9 @@
*/
int ocfs_dismount_volume (struct super_block *sb)
{
- int status = 0;
+ int status;
int AcquiredOSB = 0;
ocfs_super *osb = NULL;
- __u32 nodemap;
- __u32 tempmap;
int i;
LOG_ENTRY_ARGS ("(0x%p)\n", sb);
@@ -1077,7 +1117,7 @@
osb = OCFS_SB(sb);
- if ( osb == NULL) {
+ if (osb == NULL) {
LOG_ERROR_STATUS (status = -EFAIL);
goto leave;
}
@@ -1135,13 +1175,9 @@
}
- /* create map of all active nodes except self */
- nodemap = (__u32)osb->publ_map;
- tempmap = (1 << osb->node_num);
- nodemap &= (~tempmap);
/* send dismount msg to all */
- status = ocfs_send_dismount_msg (osb, (__u64)nodemap);
+ status = ocfs_send_dismount_msg (osb);
if (status < 0)
LOG_ERROR_STATUS (status);
@@ -1175,7 +1211,7 @@
osb->node_cfg_info[osb->node_num]->node_name, osb->node_num);
/* Free all nodecfgs */
- for (i = 0; i < OCFS_MAXIMUM_NODES; ++i) {
+ for (i = 0; i < osb->max_nodes; ++i) {
BARF_BARF_BARF *p;
p = osb->node_cfg_info[i];
@@ -1212,18 +1248,17 @@
/* true if mounted, false otherwise */
int ocfs_publish_get_mount_state(ocfs_super *osb, int node_num)
{
- int status = 0;
- ocfs_publish *publish = NULL;
+ int status;
+ ocfs_publish *publish;
struct buffer_head *publish_bh = NULL;
- __u64 offset = 0;
+ __u64 offset;
int retval = 0;
int flags = 0;
LOG_ENTRY();
/* read it in */
- offset = osb->vol_layout.publ_sect_off +
- (node_num * osb->sect_size);
+ offset = (osb->publish_blkno + node_num) << osb->sb->s_blocksize_bits;
/* we may be called during mount in which case our publish
* sector might be dirty. */
if (node_num == osb->node_num)
@@ -1249,16 +1284,15 @@
static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num, int value)
{
- int status = 0;
- ocfs_publish *publish = NULL;
+ int status;
+ ocfs_publish *publish;
struct buffer_head * publish_bh = NULL;
- __u64 offset = 0;
+ __u64 offset;
LOG_ENTRY_ARGS("(node_num=%d, value=%d)\n", node_num, value);
/* read it in */
- offset = osb->vol_layout.publ_sect_off +
- (node_num * osb->sect_size);
+ offset = (osb->publish_blkno + node_num) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -1327,26 +1361,45 @@
return(retval);
}
-
/*
* ocfs_initialize_osb()
*
*/
-static int ocfs_initialize_osb(ocfs_super *osb, ocfs_vol_disk_hdr *vdh,
- ocfs_vol_label *vol_label,
- __u32 sect_size)
+static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh)
{
int status = 0;
ocfs_publish *publish = NULL;
- __u32 bitmap_bits, length;
- __u64 offset;
- ocfs_vol_layout *vol_layout;
+ __u32 length;
+ __u64 offset, ret;
struct buffer_head *publish_bh = NULL; /* our own publish sector */
- struct buffer_head *publish_bhs[OCFS_MAXIMUM_NODES]; /* all the publish sectors */
+ struct buffer_head **publish_bhs = NULL; /* all the publish sectors */
int i;
+ ocfs2_dinode *di = NULL;
+ struct inode *inode = NULL;
LOG_ENTRY ();
+
+ osb->vol_label = kmalloc(64, GFP_KERNEL);
+ if (!osb->vol_label) {
+ LOG_ERROR_STR("unable to alloc vol label");
+ status = -ENOMEM;
+ goto done_nojournal;
+ }
+ osb->uuid = kmalloc(MAX_VOL_ID_LENGTH, GFP_KERNEL);
+ if (!osb->uuid) {
+ LOG_ERROR_STR("unable to alloc uuid");
+ status = -ENOMEM;
+ goto done_nojournal;
+ }
+
+
+ /* this needs to be done before most other initializations */
+ di = OCFS_BH_GET_DATA_READ(bh);
+ osb->max_nodes = le32_to_cpu(di->id2.i_super.s_max_nodes);
+ printk("max_nodes for this device: %u\n", osb->max_nodes);
+ OCFS_BH_PUT_DATA(bh);
+
/* FIXME
* This should be done in ocfs_journal_init(), but unknown
* ordering issues will cause the filesystem to crash.
@@ -1355,6 +1408,7 @@
* be my guest.
*/
/* initialize our journal structure */
+
osb->journal = kmalloc(sizeof(ocfs_journal), GFP_KERNEL);
if (!osb->journal) {
LOG_ERROR_STR("unable to alloc journal");
@@ -1363,17 +1417,46 @@
}
memset(osb->journal, 0, sizeof(ocfs_journal));
- memset(publish_bhs, 0, OCFS_MAXIMUM_NODES * sizeof (struct buffer_head *));
- if (osb == NULL) {
- LOG_ERROR_STATUS(status = -EFAIL);
+ publish_bhs = kmalloc(sizeof(struct buffer_head *) * osb->max_nodes, GFP_KERNEL);
+ if (publish_bhs == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
goto finally;
}
+ memset(publish_bhs, 0, sizeof(struct buffer_head *) * osb->max_nodes);
+ osb->vol_node_map = kmalloc(sizeof(ocfs_vol_node_map) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->vol_node_map) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->vol_node_map, 0, sizeof(ocfs_vol_node_map) * osb->max_nodes);
+
+ osb->lock_recovery_lists = kmalloc(sizeof(struct list_head) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->lock_recovery_lists) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->lock_recovery_lists, 0, sizeof(struct list_head) * osb->max_nodes);
+
+ osb->last_publ_seq_num = kmalloc(sizeof(__u64) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->last_publ_seq_num) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->last_publ_seq_num, 0, sizeof(__u64) * osb->max_nodes);
+
+ osb->node_cfg_info = kmalloc(sizeof(BARF_BARF_BARF *) * osb->max_nodes, GFP_KERNEL);
+ if (!osb->node_cfg_info) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->node_cfg_info, 0, sizeof(BARF_BARF_BARF *) * osb->max_nodes);
+
+ ocfs_node_map_init(osb, &osb->publ_map);
+
+
OCFS_CLEAR_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN);
- vol_layout = &(osb->vol_layout);
-
- vol_layout->cluster_size = (__u32) (vdh->cluster_size);
osb->obj_id.type = OCFS_TYPE_OSB;
osb->obj_id.size = sizeof (ocfs_super);
INIT_LIST_HEAD (&(osb->osb_next));
@@ -1390,7 +1473,7 @@
init_MUTEX (&(osb->local_alloc_sem));
spin_lock_init(&osb->recovery_map_lock);
- osb->recovery_map = 0;
+ ocfs_node_map_init(osb, &osb->recovery_map);
osb->needs_flush = 0;
osb->disable_recovery = 0;
@@ -1413,89 +1496,162 @@
INIT_LIST_HEAD (&(osb->vote_obj_queue));
INIT_LIST_HEAD (&(osb->cache_lock_list));
INIT_LIST_HEAD (&(osb->needs_flush_head));
- for (i=0; i<32; i++) {
+ for (i=0; i<osb->max_nodes; i++) {
INIT_LIST_HEAD(&(osb->lock_recovery_lists[i]));
}
- osb->sect_size = sect_size;
-
osb->node_num = OCFS_INVALID_NODE_NUM;
- memcpy (vol_layout->mount_point, vdh->mount_point, strlen (vdh->mount_point));
- vol_layout->serial_num = vdh->serial_num;
- vol_layout->size = vdh->device_size;
- vol_layout->start_off = vdh->start_off;
- vol_layout->bitmap_off = vdh->bitmap_off;
- vol_layout->publ_sect_off = vdh->publ_off;
- vol_layout->vote_sect_off = vdh->vote_off;
- vol_layout->root_bitmap_off = vdh->root_bitmap_off;
- vol_layout->root_start_off = vdh->root_off;
- vol_layout->root_int_off = vdh->internal_off;
- vol_layout->root_size = vdh->root_size;
- vol_layout->cluster_size = (__u32) vdh->cluster_size;
- vol_layout->num_nodes = (__u32) vdh->num_nodes;
- vol_layout->data_start_off = vdh->data_start_off;
- vol_layout->root_bitmap_size = vdh->root_bitmap_size;
- vol_layout->num_clusters = vdh->num_clusters;
- vol_layout->dir_node_size = vdh->dir_node_size;
- vol_layout->file_node_size = vdh->file_node_size;
- vol_layout->node_cfg_off = vdh->node_cfg_off;
- vol_layout->node_cfg_size = vdh->node_cfg_size;
- vol_layout->new_cfg_off = vdh->new_cfg_off;
- vol_layout->prot_bits = vdh->prot_bits;
- vol_layout->uid = vdh->uid;
- vol_layout->gid = vdh->gid;
+ /* not using any of these sb fields yet */
+#if 0
+di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
+di->i_mtime = cpu_to_le64(format_time); // use this as s_mtime (mount time)
+di->i_blkno = cpu_to_le64(super_off >> blocksize_bits);
+di->id2.i_super.s_mnt_count = 0;
+di->id2.i_super.s_max_mnt_count = cpu_to_le16(OCFS2_DFL_MAX_MNT_COUNT);
+di->id2.i_super.s_state = 0;
+di->id2.i_super.s_errors = 0;
+di->id2.i_super.s_lastcheck = cpu_to_le64(format_time);
+di->id2.i_super.s_checkinterval = cpu_to_le32(OCFS2_DFL_CHECKINTERVAL);
+di->id2.i_super.s_creator_os = cpu_to_le32(OCFS2_OS_LINUX);
+#endif
- memcpy (vol_layout->vol_id, vol_label->vol_id, MAX_VOL_ID_LENGTH);
+ di = OCFS_BH_GET_DATA_READ(bh);
- if (vol_layout->dir_node_size == 0)
- vol_layout->dir_node_size = OCFS_DEFAULT_DIR_NODE_SIZE;
+ /* get some pseudo constants for clustersize bits */
+ osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
+ osb->s_clustersize = 1 << osb->s_clustersize_bits;
+ printk("clusterbits=%d\n", osb->s_clustersize_bits);
+ OCFS_ASSERT(osb->s_clustersize_bits);
- if (vol_layout->file_node_size == 0)
- vol_layout->file_node_size = OCFS_DEFAULT_FILE_NODE_SIZE;
+ strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
+ osb->vol_label[63] = '\0';
+ memcpy(osb->uuid, di->id2.i_super.s_uuid, MAX_VOL_ID_LENGTH);
+ osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
+ osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
+ printk("vol_label: %s\n", osb->vol_label);
+ {
+ int ttt;
+ printk("uuid: ");
+ for (ttt=0; ttt<16; ttt++)
+ printk("%02x ", di->id2.i_super.s_uuid[ttt]);
+ printk("\n");
+ }
+ printk("root_blkno=%llu, system_dir_blkno=%llu\n", osb->root_blkno, osb->system_dir_blkno);
- osb->inode_size = OCFS_DEFAULT_INODE_SIZE;
+ OCFS_BH_PUT_DATA(bh);
- /* get some pseudo constants for >> bits */
- osb->sect_size_bits = ocfs_get_right_shift_bits(sect_size);
- osb->cluster_size_bits = ocfs_get_right_shift_bits(vol_layout->cluster_size);
- osb->dir_alloc_bits = ocfs_get_right_shift_bits(vol_layout->dir_node_size);
- osb->file_alloc_bits = ocfs_get_right_shift_bits(vol_layout->file_node_size);
- osb->inode_alloc_bits = ocfs_get_right_shift_bits(osb->inode_size);
- printk("sectbits=%d, clusterbits=%d, dirbits=%d, filebits=%d, inodebits=%d\n",
- osb->sect_size_bits, osb->cluster_size_bits,
- osb->dir_alloc_bits, osb->file_alloc_bits, osb->inode_alloc_bits);
+ /* load root, system_dir, and all global system inodes */
+ status = ocfs_init_global_system_inodes(osb);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ /*
+ * an inode for the whole DLM area
+ * autoconfig, new autoconfig, publish, vote
+ */
+ inode = ocfs_get_system_file_inode(osb, DLM_SYSTEM_INODE, -1);
+ if (!inode) {
+ LOG_ERROR_STATUS(status = -EINVAL);
+ goto bail;
+ }
+ /* We're in the mount path, pretend locked=1 */
+ status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, 1, NULL,
+ inode, 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ // i_size must be at least
+ // (2 + osb->max_nodes + 4) + osb->max_nodes + osb->max_nodes
+ if (inode->i_size >> osb->sb->s_blocksize_bits <
+ (OCFS_VOLCFG_HDR_SECTORS + osb->max_nodes) + // autoconfig
+ OCFS_VOLCFG_NEWCFG_SECTORS + // new autoconfig
+ osb->max_nodes + // publish
+ osb->max_nodes ) { // vote
+ LOG_ERROR_ARGS("dlm area size incorrect: "
+ "found=%llu, need=%u\n",
+ inode->i_size,
+ (OCFS_VOLCFG_HDR_SECTORS +
+ OCFS_VOLCFG_NEWCFG_SECTORS +
+ (osb->max_nodes*3)) <<
+ osb->sb->s_blocksize_bits);
+ status = -EINVAL;
+ goto bail;
+
+ }
+ osb->autoconfig_blkno = ret >> osb->sb->s_blocksize_bits;
+ osb->autoconfig_blocks = OCFS_VOLCFG_HDR_SECTORS + osb->max_nodes;
- OCFS_ASSERT(osb->sect_size_bits);
- OCFS_ASSERT(osb->cluster_size_bits);
- OCFS_ASSERT(osb->dir_alloc_bits);
- OCFS_ASSERT(osb->file_alloc_bits);
- OCFS_ASSERT(osb->inode_alloc_bits);
+ osb->new_autoconfig_blkno = osb->autoconfig_blkno + osb->autoconfig_blocks;
+ osb->new_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS;
+ osb->total_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS + osb->max_nodes;
+
+ osb->publish_blkno = osb->new_autoconfig_blkno + osb->new_autoconfig_blocks;
+ osb->publish_blocks = osb->max_nodes;
+
+ osb->vote_blkno = osb->publish_blkno + osb->publish_blocks;
+ osb->vote_blocks = osb->max_nodes;
+ printk("autoconfig: blkno=%llu, blocks=%llu newblkno=%llu newblocks=%llu\n",
+ osb->autoconfig_blkno, osb->autoconfig_blocks,
+ osb->new_autoconfig_blkno, osb->new_autoconfig_blocks);
+ printk("publish: blkno=%llu, blocks=%llu\n", osb->publish_blkno,
+ osb->publish_blocks);
+ printk("vote: blkno=%llu, blocks=%llu\n", osb->vote_blkno, osb->vote_blocks);
- osb->max_dir_node_ent = (__u32) (vol_layout->dir_node_size >> osb->sect_size_bits) - 2;
- bitmap_bits = (__u32) vol_layout->num_clusters;
+ osb->autoconfig_bhs = ocfs_malloc (osb->total_autoconfig_blocks
+ * sizeof(struct buffer_head *));
+ if (!osb->autoconfig_bhs) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+ memset(osb->autoconfig_bhs, 0,
+ osb->total_autoconfig_blocks * sizeof(struct buffer_head *));
- ocfs_initialize_bitmap (&osb->cluster_bitmap, bitmap_bits, ONE_MEGA_BYTE * 8);
+ iput(inode);
+
+
+
+ /*
+ * global bitmap
+ */
+ inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+ if (!inode) {
+ LOG_ERROR_STATUS(status = -EINVAL);
+ goto bail;
+ }
+ /* We're in the mount path, pretend locked=1 */
+ status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, 1, NULL,
+ inode, 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ /* for now, just one extent... but in the future... */
+ osb->bitmap_blkno = ret >> osb->sb->s_blocksize_bits;
+ osb->bitmap_blocks = OCFS_I(inode)->alloc_size >> osb->sb->s_blocksize_bits;
+ osb->num_clusters = OCFS_I(inode)->u.ip_bitinfo.total_bits;
+ printk("bitmap_blkno=%llu, bitmap_blocks=%llu, num_clusters=%llu\n",
+ osb->bitmap_blkno, osb->bitmap_blocks, osb->num_clusters);
+
+ ocfs_initialize_bitmap(osb->sb, &osb->cluster_bitmap,
+ osb->num_clusters,
+ OCFS_I(inode)->alloc_size << 3);
/* read the whole cluster bitmap off disk, even though we only
* need the beginning of it. */
- status = ocfs_read_bhs(osb, vol_layout->bitmap_off, ONE_MEGA_BYTE, osb->cluster_bitmap.chunk, 0, NULL);
+ status = ocfs_read_bhs(osb, ret, OCFS_I(inode)->alloc_size,
+ osb->cluster_bitmap.chunk, 0, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
+ iput(inode);
+
osb->prealloc_lock = 0;
- osb->cfg_numblocks = OCFS_MAXIMUM_NODES + OCFS_VOLCFG_NEWCFG_SECTORS;
- osb->cfg_len = osb->cfg_numblocks * osb->sect_size;
- osb->cfg_bhs = ocfs_malloc (osb->cfg_numblocks
- * sizeof(struct buffer_head *));
- if (!osb->cfg_bhs) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto bail;
- }
- memset(osb->cfg_bhs, 0,
- osb->cfg_numblocks * sizeof(struct buffer_head *));
status = ocfs_get_config (osb);
if (status < 0) {
@@ -1504,7 +1660,7 @@
}
/* Read the Publish Sector of local Node */
- offset = vol_layout->publ_sect_off + (osb->node_num * osb->sect_size);
+ offset = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -1512,7 +1668,14 @@
}
publish = OCFS_BH_GET_DATA_WRITE(publish_bh);
- publish->time = ocfs_get_publish_time();
+ /*
+ * FIXME: This really ought to be something exported by the
+ * identical code in heartbeat.c
+ */
+ publish->time = jiffies;
+ /* Disallow 0 */
+ if (!publish->time)
+ publish->time = 1;
OCFS_BH_PUT_DATA(publish_bh);
publish = NULL;
@@ -1525,19 +1688,27 @@
brelse(publish_bh);
/* Read disk for all Publish Sectors */
- length = OCFS_MAXIMUM_NODES * osb->sect_size;
- status = ocfs_read_bhs(osb, vol_layout->publ_sect_off, length,
- publish_bhs, 0, NULL);
+ length = osb->max_nodes << osb->sb->s_blocksize_bits;
+ offset = osb->publish_blkno << osb->sb->s_blocksize_bits;
+ status = ocfs_read_bhs(osb, offset, length, publish_bhs, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- ocfs_update_publish_map (osb, publish_bhs, 1);
+ ocfs_update_publish_map(osb, publish_bhs, 1);
- for(i = 0; i < OCFS_MAXIMUM_NODES; i++)
+ for(i = 0; i < osb->max_nodes; i++)
osb->last_publ_seq_num[i] = (__u64) (-1);
+
+ /* load all node-local system inodes */
+ status = ocfs_init_local_system_inodes(osb);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
/* We might need to add a variable in Global List of osb to */
/* delay any creation, if any other node is already creating a file */
@@ -1567,9 +1738,16 @@
goto finally;
bail:
- if (osb->cfg_bhs)
- kfree(osb->cfg_bhs);
-
+ if (osb->autoconfig_bhs)
+ kfree(osb->autoconfig_bhs);
+ if (osb->vol_node_map)
+ kfree(osb->vol_node_map);
+ if (osb->lock_recovery_lists)
+ kfree(osb->lock_recovery_lists);
+ if (osb->last_publ_seq_num)
+ kfree(osb->last_publ_seq_num);
+ if (osb->node_cfg_info)
+ kfree(osb->node_cfg_info);
finally:
if (publish) {
if (publish_bh) {
@@ -1579,7 +1757,7 @@
}
if (publish_bhs[0]) {
int i;
- for(i = 0; i < OCFS_MAXIMUM_NODES; i++)
+ for(i = 0; i < osb->max_nodes; i++)
if (publish_bhs[i])
brelse(publish_bhs[i]);
}
@@ -1591,53 +1769,59 @@
/*
* ocfs_verify_volume()
- *
+ * will return: -EAGAIN if it is ok to keep searching for superblocks
+ * -EINVAL if there is a bad superblock
+ * 0 on success
*/
-static int ocfs_verify_volume(ocfs_vol_disk_hdr *vdh)
+static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
+ __u32 sectsize)
{
- int status = 0;
+ int status = -EAGAIN;
LOG_ENTRY ();
- if (vdh == NULL) {
- LOG_ERROR_STATUS (status = -EFAIL);
- goto bail;
- }
-
- /* Compare the Signature with the one we read from disk */
- if (memcmp (vdh->signature, OCFS_VOLUME_SIGNATURE,
- strlen (OCFS_VOLUME_SIGNATURE)) != 0) {
- LOG_ERROR_STR ("Invalid volume signature");
+ if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
+ strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
status = -EINVAL;
- goto bail;
+ if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != sectsize) {
+ LOG_ERROR_ARGS("found superblock with incorrect sectorsize: "
+ "found %u, should be %u\n",
+ 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
+ sectsize);
+ } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
+ OCFS2_MAJOR_REV_LEVEL ||
+ le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
+ OCFS2_MINOR_REV_LEVEL) {
+#warning dont know what is appropriate on minor rev difference and also need to check compat flags
+ LOG_ERROR_ARGS("found superblock with bad version: "
+ "found %u.%u, should be %u.%u\n",
+ le16_to_cpu(di->id2.i_super.s_major_rev_level),
+ le16_to_cpu(di->id2.i_super.s_minor_rev_level),
+ OCFS2_MAJOR_REV_LEVEL,
+ OCFS2_MINOR_REV_LEVEL);
+ } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
+ LOG_ERROR_ARGS("bad block number on superblock: "
+ "found %llu, should be %lu\n",
+ le64_to_cpu(di->i_blkno), bh->b_blocknr);
+ } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
+ le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
+ LOG_ERROR_ARGS("bad cluster size found: %u\n",
+ 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
+ } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
+ LOG_ERROR_STR("bad root_blkno: 0");
+ } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
+ LOG_ERROR_STR("bad system_dir_blkno: 0");
+ } else if (le32_to_cpu(di->id2.i_super.s_max_nodes) > OCFS2_MAX_NODES) {
+ LOG_ERROR_ARGS("max_nodes found greater than maximum: "
+ "found %u, max %u\n",
+ le32_to_cpu(di->id2.i_super.s_max_nodes),
+ OCFS2_MAX_NODES);
+ } else {
+ /* found it! */
+ status = 0;
+ }
}
- /* Check the Volume Length and the ClusterSize. */
- if (vdh->device_size == 0) {
- LOG_ERROR_STR ("Device size cannot be zero");
- status = -EINVAL;
- goto bail;
- }
-
- if (vdh->cluster_size == 0) {
- LOG_ERROR_STR ("Cluster size cannot be zero");
- status = -EINVAL;
- goto bail;
- }
-
- if (vdh->major_version != OCFS_MAJOR_VERSION) {
- LOG_ERROR_ARGS ("Version number not compatible: %u.%u",
- vdh->major_version, vdh->minor_version);
- status = -EINVAL;
- goto bail;
- }
-
- if (vdh->root_off == 0) {
- LOG_ERROR_STR("No OCFS version 2 root directory found");
- status = -EINVAL;
- goto bail;
- }
-bail:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_verify_volume */
@@ -1658,8 +1842,7 @@
LOG_ENTRY ();
/* Read the node's publish sector */
- offset = osb->vol_layout.publ_sect_off +
- (osb->node_num * osb->sect_size);
+ offset = (osb->publish_blkno + osb->node_num) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, offset, &publish_bh, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -1750,16 +1933,24 @@
list_del (&(osb->osb_next));
up (&(OcfsGlobalCtxt.global_res));
- for (i=0; i<32; i++)
+ for (i=0; i<osb->max_nodes; i++)
ocfs_recover_oin_locks(osb, i);
- if (osb->cfg_bhs) {
- for(i = 0; i < osb->cfg_numblocks; i++)
- if (osb->cfg_bhs[i])
- brelse(osb->cfg_bhs[i]);
- kfree(osb->cfg_bhs);
- }
+ for(i = 0; i < osb->total_autoconfig_blocks; i++)
+ if (osb->autoconfig_bhs[i])
+ brelse(osb->autoconfig_bhs[i]);
+ if (osb->autoconfig_bhs)
+ kfree(osb->autoconfig_bhs);
+ if (osb->vol_node_map)
+ kfree(osb->vol_node_map);
+ if (osb->lock_recovery_lists)
+ kfree(osb->lock_recovery_lists);
+ if (osb->last_publ_seq_num)
+ kfree(osb->last_publ_seq_num);
+ if (osb->node_cfg_info)
+ kfree(osb->node_cfg_info);
+
/* FIXME
* This belongs in journal shutdown, but because we have to
* allocate osb->journal at the start of ocfs_initalize_osb(),
Added: trunk/src/super.h
===================================================================
--- trunk/src/super.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/super.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * super.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_SUPER_H
+#define OCFS2_SUPER_H
+
+void ocfs_delete_osb(ocfs_super *osb);
+int ocfs_dismount_volume(struct super_block *sb);
+int ocfs_publish_get_mount_state(ocfs_super *osb, int node_num);
+
+#endif /* OCFS2_SUPER_H */
Modified: trunk/src/symlink.c
===================================================================
--- trunk/src/symlink.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/symlink.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* linux/cluster/ssi/cfs/symlink.c
*
* This program is free software; you can redistribute it and/or
@@ -30,12 +32,19 @@
* for CDSL support
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
-
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <linux/utsname.h>
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "symlink.h"
+
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_SYMLINK
struct ocfs_symlink_ops {
@@ -157,15 +166,8 @@
NULL);
if (IS_ERR(page))
goto sync_fail;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
wait_on_page_locked(page);
if (!PageUptodate(page))
-
-#else
- wait_on_page(page);
- if (!Page_Uptodate(page))
-
-#endif
goto async_fail;
*ppage = page;
return kmap(page);
Added: trunk/src/symlink.h
===================================================================
--- trunk/src/symlink.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/symlink.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * symlink.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_SYMLINK_H
+#define OCFS2_SYMLINK_H
+
+int ocfs_follow_link(struct dentry *dentry, struct nameidata *nd);
+
+#endif /* OCFS2_SYMLINK_H */
Modified: trunk/src/sysfile.c
===================================================================
--- trunk/src/sysfile.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/sysfile.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* sysfile.c
*
* Initialize, read, write, etc. system files.
@@ -24,608 +26,166 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
-#ifdef __KERNEL__
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#include "inc/ocfs_journal.h"
-#else
-#include <dummy.h>
-#include <ocfsmalloc.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "alloc.h"
+#include "dir.h"
+#include "inode.h"
+#include "sysfile.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "ocfs_buffer_head.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_SYSFILE
-static int ocfs_find_extents_of_system_file (ocfs_super * osb, __u64 file_off, __u64 Length, ocfs_file_entry * fe, void **Buffer, __u32 * NumEntries);
+static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
+static inline int is_global_system_inode(int type);
+static inline int is_in_system_inode_array(ocfs_super *osb, int type, __u32 node);
-/*
- * ocfs_read_system_file()
- *
- * make sure that the bhs array is either all NULL'd out or you really
- * know what you're doing! Also, those bh's will have to be brelse'd
- * after you're done with them!
- *
- */
-int ocfs_read_system_file (ocfs_super * osb, __u32 FileId, struct buffer_head *bhs[], __u64 Length, __u64 Offset)
+static inline int is_global_system_inode(int type)
{
- int status = 0;
- ocfs_file_entry *fe = NULL;
- void *extentBuffer = NULL;
- __u32 numExts = 0, i, j, numblocks;
- ocfs_io_runs *IoRuns = NULL;
- __u64 tmplen;
- __u64 tmpoff = 0;
- struct buffer_head * fe_bh = NULL;
+ return (type >= GLOBAL_BITMAP_SYSTEM_INODE &&
+ type <= ORPHAN_DIR_SYSTEM_INODE);
+}
- LOG_ENTRY_ARGS ("(FileId = %u, offset = (%llu), "
- "Length = (%llu))\n", FileId, Offset, Length);
+static inline int is_in_system_inode_array(ocfs_super *osb, int type, __u32 node)
+{
+ return (node == osb->node_num || is_global_system_inode(type));
+}
- status = ocfs_read_bh(osb, (FileId * osb->sect_size) +
- osb->vol_layout.root_int_off, &fe_bh, OCFS_BH_CACHED, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- fe = OCFS_BH_GET_DATA_READ(fe_bh);
-
- if (!IS_VALID_FILE_ENTRY (fe)) {
- LOG_ERROR_STATUS(status = -EINVAL);
- goto leave;
- }
-
- status = ocfs_find_extents_of_system_file (osb, Offset, Length,
- fe, &extentBuffer, &numExts);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- OCFS_ASSERT (extentBuffer);
-
- if (numExts < 1) {
- LOG_ERROR_ARGS("numExts < 1 (= %d)\n", numExts);
- goto leave;
- }
- IoRuns = extentBuffer;
- tmpoff = 0;
- tmplen = 0;
- j = 0;
- numblocks = 0;
-
- for (i = 0; i < numExts; i++) {
- //tempBuffer += tmplen;
- /* ?? need to align both the length and buffer and also */
- /* offset ( atleast the starting one) */
- tmpoff = IoRuns[i].disk_off;
- tmplen = IoRuns[i].byte_cnt;
-
- status = ocfs_read_bhs(osb, tmpoff, tmplen, &(bhs[j]), OCFS_BH_CACHED, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- numblocks = tmplen >> osb->sect_size_bits;
-
- j += numblocks;
- }
-
-leave:
- if (fe) {
- OCFS_BH_PUT_DATA(fe_bh);
- brelse(fe_bh);
- }
-
- if (extentBuffer)
- vfree(extentBuffer);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_read_system_file */
-
-/*
- * ocfs_file_to_disk_off()
- *
- */
-__u64 ocfs_file_to_disk_off (ocfs_super * osb, __u32 FileId, __u64 Offset)
+struct inode *ocfs_get_system_file_inode(ocfs_super *osb, int type,
+ __u32 node)
{
- int status = 0;
- __u64 StartOffset = 0;
- void *Buffer = NULL;
- ocfs_file_entry *fe = NULL;
- ocfs_io_runs *IoRuns;
- __u32 NumExts = 0;
- struct buffer_head *fe_bh = NULL;
+ struct inode *inode = NULL;
+ struct inode **arr = NULL;
- LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId);
+ /* avoid the lookup if cached in local system file array */
+ if (is_in_system_inode_array(osb, type, node))
+ arr = &(osb->system_inodes[type]);
- /* Read the File Entry corresponding to File Id */
- status = ocfs_read_bh (osb, (FileId * osb->sect_size) +
- osb->vol_layout.root_int_off, &fe_bh, OCFS_BH_CACHED, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
+ if (arr && (inode = *arr) != NULL) {
+ /* get a ref in addition to the array ref */
+ igrab(inode);
+ return inode;
}
+
+ /* this gets one ref thru iget */
+ inode = _ocfs_get_system_file_inode(osb, type, node);
- fe = OCFS_BH_GET_DATA_READ(fe_bh);
+ /* add one more if putting into array for first time */
+ if (arr)
+ *arr = igrab(inode);
+ return inode;
+}
- if (!IS_VALID_FILE_ENTRY (fe)) {
- LOG_ERROR_STATUS(status = -EINVAL);
- goto leave;
- }
-
- status = ocfs_find_extents_of_system_file (osb, Offset, osb->sect_size,
- fe, &Buffer, &NumExts);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- OCFS_ASSERT (Buffer);
-
- IoRuns = Buffer;
- /* Return the disk offset of first run . */
- StartOffset = (IoRuns[0].disk_off);
-
-leave:
- if (fe_bh) {
- OCFS_BH_PUT_DATA(fe_bh);
- brelse(fe_bh);
- }
-
- if (Buffer)
- vfree(Buffer);
-
- LOG_EXIT_ARGS ("%llu", StartOffset);
- return StartOffset;
-} /* ocfs_file_to_disk_off */
-
-
-/*
- * ocfs_get_system_file_size()
- *
- */
-int ocfs_get_system_file_size (ocfs_super * osb, __u32 FileId, __u64 * Length, __u64 * AllocSize)
+static struct inode * _ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node)
{
+ char namebuf[40];
+ struct inode *inode = NULL;
+ __u64 fe_off = 0;
+ struct buffer_head *dirent_bh = NULL;
+ struct ocfs2_dir_entry *de = NULL;
int status = 0;
- ocfs_file_entry *fe = NULL;
- __u64 offset;
- struct buffer_head *fe_bh = NULL;
- LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId);
-
- *AllocSize = *Length = 0;
-
- offset = (FileId * osb->sect_size) + osb->vol_layout.root_int_off;
-
- status = ocfs_read_bh (osb, offset, &fe_bh, 0, NULL);
+ ocfs2_sprintf_system_inode_name(namebuf,
+ sizeof(namebuf),
+ type, node);
+
+ status = ocfs_find_files_on_disk(osb, namebuf, strlen(namebuf),
+ &fe_off, osb->sys_root_inode,
+ 0, &dirent_bh, &de);
if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
+ goto bail;
}
- fe = OCFS_BH_GET_DATA_READ(fe_bh);
-
- if (!IS_VALID_FILE_ENTRY (fe)) {
- LOG_ERROR_ARGS("offset=%llu", offset);
- status = -EINVAL;
- goto leave;
+ inode = ocfs_iget(osb, fe_off);
+ if (!inode) {
+ LOG_ERROR_STR("Could not create inode!");
+ goto bail;
}
+bail:
+ if (dirent_bh)
+ brelse(dirent_bh);
+ return inode;
+}
- *Length = fe->file_size;
- *AllocSize = fe->alloc_size;
-leave:
- if (fe) {
- OCFS_BH_PUT_DATA(fe_bh);
- brelse(fe_bh);
- }
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_get_system_file_size */
-
-#ifdef __KERNEL__
/*
- * ocfs_extend_system_file()
- *
- * fe_bh can be null and we'll just read it off disk.
- * of course, if you've already read it off disk, then give us fe_bh to avoid
- * an extra read. We always do the write out of the new fe.
+ * ocfs_read_system_file()
*
- * You need to be holding node_alloc_sem!
+ * make sure that the bhs array is either all NULL'd out or you really
+ * know what you're doing! Also, those bh's will have to be brelse'd
+ * after you're done with them!
+ *
*/
-int ocfs_extend_system_file (ocfs_super * osb, __u32 FileId, __u64 FileSize, struct buffer_head *fe_bh, ocfs_journal_handle *handle, int zero)
+int ocfs_read_system_file(ocfs_super *osb, int type, __u32 node,
+ struct buffer_head *bhs[], __u64 Length)
{
- int status = 0;
- __u64 actualDiskOffset = 0, actualLength = 0;
- int local_fe = 0;
- ocfs_file_entry *fe = NULL;
- __u64 alloc_size;
- int numbhs, i;
- char *data;
- struct buffer_head **bhs;
- struct inode *ext_alloc_inode = NULL;
+ int status = 0, i;
+ __u64 ret = 0;
+ struct inode *inode = NULL;
+ __u32 blocks;
+ __u64 off, contig_blocks, contig_bytes;
- LOG_ENTRY_ARGS ("(FileId = %u, Size = %llu)\n", FileId, FileSize);
+ LOG_ENTRY_ARGS ("type=%d, node=%u, Length = %llu\n", type, node, Length);
- OCFS_ASSERT (osb);
-
- if (!fe_bh) {
- local_fe = 1;
- status = ocfs_read_bh(osb, (FileId * osb->sect_size) + osb->vol_layout.root_int_off, &fe_bh, OCFS_BH_COND_CACHED, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- }
- fe = OCFS_BH_GET_DATA_READ(fe_bh);
-
- if (!IS_VALID_FILE_ENTRY (fe)) {
- OCFS_BH_PUT_DATA(fe_bh);
- LOG_ERROR_STATUS (status = -EINVAL);
+ inode = ocfs_get_system_file_inode(osb, type, node);
+ if (!inode) {
+ LOG_ERROR_STATUS(status=-EINVAL);
goto leave;
}
- alloc_size = fe->alloc_size;
- OCFS_BH_PUT_DATA(fe_bh);
- fe = NULL;
-
- status = ocfs_journal_access(handle, fe_bh,
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- if (FileSize > alloc_size) {
- /* We need to allocate from bitmap */
- __u64 numClusterAlloc = 0, BitmapOffset = 0;
-
- status =
- ocfs_find_contiguous_space_from_bitmap (osb, handle,
- FileSize - alloc_size,
- &BitmapOffset,
- &numClusterAlloc, 1,
- NULL, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
+
+ off = 0;
+ i = 0;
+ blocks = Length >> osb->sb->s_blocksize_bits;
+ while (blocks > 0) {
+ status = ocfs_lookup_file_allocation(osb, off, &ret,
+ blocks,
+ &contig_bytes,
+ inode, 1);
+ contig_blocks =
+ contig_bytes >> osb->sb->s_blocksize_bits;
+ if (contig_bytes !=
+ (contig_blocks << osb->sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("unaligned system file read! off=%llu, "
+ "contig_bytes=%llu\n", off, contig_bytes);
+ LOG_ERROR_STATUS(status=-EINVAL);
goto leave;
}
-
- actualDiskOffset =
- (BitmapOffset * osb->vol_layout.cluster_size) +
- osb->vol_layout.data_start_off;
- actualLength = numClusterAlloc * osb->vol_layout.cluster_size;
-
- ext_alloc_inode = igrab(osb->system_inodes[FILE_ALLOC_BITMAP_SYSTEM_INODE]);
- if (!ext_alloc_inode) {
- status = -EFAIL;
- LOG_ERROR_STATUS(status);
+ if (status == 0) {
+ // found all remaining
+ } else if (status == -EFAIL && contig_blocks > 0) {
+ // found some
+ } else {
+ // failed
+ LOG_ERROR_STATUS(status = -EIO);
goto leave;
}
-
- ocfs_handle_add_inode(handle, ext_alloc_inode);
- status = ocfs_allocate_extent (osb, fe_bh, handle,
- actualDiskOffset, actualLength, NULL);
+ status = ocfs_read_bhs(osb, ret, contig_bytes, &(bhs[i]), OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
-
- if (zero) {
- /* I think at this point, this can be journalled too */
- numbhs = actualLength >> osb->sect_size_bits;
-
- bhs = ocfs_malloc(numbhs * sizeof(struct buffer_head *));
- if (!bhs) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- memset(bhs, 0, numbhs * sizeof(struct buffer_head *));
-
- status = ocfs_read_bhs(osb, actualDiskOffset,
- actualLength, bhs, 0, NULL);
- if (status < 0) {
- ocfs_free(bhs);
- LOG_ERROR_STATUS(status);
- goto leave;
- }
-
- for(i = 0; i < numbhs; i++) {
- data = OCFS_BH_GET_DATA_WRITE(bhs[i]);
- memset(data, 0, osb->sect_size);
- OCFS_BH_PUT_DATA(bhs[i]);
- }
-
- status = ocfs_write_bhs(osb, bhs, numbhs, 0, NULL);
- for(i = 0; i < numbhs; i++)
- brelse(bhs[i]);
- ocfs_free(bhs);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- }
-
- atomic_inc (&osb->alloc_stats.bitmap_meta);
- } else
- actualLength = 0;
-
- fe = OCFS_BH_GET_DATA_WRITE(fe_bh);
- fe->alloc_size += actualLength;
- fe->file_size = FileSize;
-
- DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num;
- DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_ENABLE_CACHE_LOCK;
-
- OCFS_BH_PUT_DATA(fe_bh);
- fe = NULL;
-
- status = ocfs_journal_dirty(handle, fe_bh);
- if (status < 0)
- LOG_ERROR_STATUS (status);
-
-leave:
- if (fe)
- OCFS_BH_PUT_DATA(fe_bh);
- if (local_fe)
- brelse(fe_bh);
- if (ext_alloc_inode)
- iput(ext_alloc_inode);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_extend_system_file */
-
-#endif /* __KERNEL__ */
-
-/*
- * ocfs_find_extents_of_system_file()
- *
- * Searches for the extents in the file entry passed starting from
- * file offset up to the length specified.
- */
-static int ocfs_find_extents_of_system_file (ocfs_super * osb, __u64 file_off, __u64 Length, ocfs_file_entry * fe, void **Buffer, __u32 * NumEntries)
-{
- int status = -EFAIL;
- __u32 size;
- __u32 k = 0, j;
- __u32 Runs, Runoffset;
- struct buffer_head *extent_bh = NULL;
- ocfs_extent_group *extent = NULL;
- ocfs_io_runs *IoRuns;
- __u64 newOffset = 0, searchVbo, remainingLength = 0;
-
-
- LOG_ENTRY ();
-
- OCFS_ASSERT (osb);
-
- if (!IS_VALID_FILE_ENTRY (fe)) {
- LOG_ERROR_STATUS (status = -EFAIL);
- goto leave;
+ i += contig_blocks;
+ blocks -= contig_blocks;
+ off += contig_bytes;
}
- if (fe->local_ext)
- {
- size = OCFS_MAX_FILE_ENTRY_EXTENTS * sizeof (ocfs_io_runs);
- }
- else
- {
- int pow = fe->granularity + 1;
- /* extent tree looks like
- * fe[0] fe[1] fe[2]
- * hdr[0]...hdr[17] .....
- * dat[0]..dat[17]
- *
- * granularity of fe is tree height
- * so max runs (total of all leaves) is
- * 3 x 18 ^ (granularity+1)
- * (OCFS_MAX_DATA_EXTENTS = 18)
- *
- * g=0: 1296 bytes
- * g=1: 23328 bytes
- * g=2: 419904 bytes!
- */
- size = 3;
- while (pow)
- {
- size *= OCFS_MAX_DATA_EXTENTS;
- pow--;
- }
- size *= sizeof (ocfs_io_runs);
- }
- size = OCFS_SECTOR_ALIGN (size);
- IoRuns = vmalloc(size);
-
- if (IoRuns == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto leave;
- }
-
- memset (IoRuns, 0, size);
-
- remainingLength = Length;
- Runs = 1;
- Runoffset = 0;
- newOffset = file_off;
-
- if (fe->local_ext) {
- for (j = 0; j < OCFS_MAX_FILE_ENTRY_EXTENTS; j++) {
- if ((fe->extents[j].file_off +
- fe->extents[j].num_bytes) > newOffset) {
- IoRuns[Runoffset].disk_off =
- fe->extents[j].disk_off +
- (newOffset - fe->extents[j].file_off);
- IoRuns[Runoffset].byte_cnt =
- (__u32) ((fe->extents[j].file_off +
- fe->extents[j].num_bytes) -
- newOffset);
- if (IoRuns[Runoffset].byte_cnt >=
- remainingLength) {
- IoRuns[Runoffset].byte_cnt =
- (__u32) remainingLength;
- status = 0;
- break;
- } else {
- newOffset += IoRuns[Runoffset].byte_cnt;
- remainingLength -=
- IoRuns[Runoffset].byte_cnt;
- Runs++;
- Runoffset++;
- }
- }
- }
-
- *NumEntries = Runs;
- *Buffer = IoRuns;
- goto leave;
- } else {
- /* Extents are branched and we are no longer using Local
- * Extents for this File Entry. */
-
- while (1) {
- /* Keep going downwards looking for the Entry, till
- * we hit the last Data entry */
- for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) {
- if ((__s64) (fe->extents[k].file_off +
- fe->extents[k].num_bytes) >
- newOffset) {
- break;
- }
- }
-
- if (k == OCFS_MAX_FILE_ENTRY_EXTENTS) {
- LOG_ERROR_STR ("data extents maxed");
- }
-
- if (fe->extents[k].disk_off == 0) {
- LOG_ERROR_STR ("disk_off=0");
- }
-
- status = ocfs_read_bh(osb, fe->extents[k].disk_off,
- &extent_bh, OCFS_BH_COND_CACHED, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- extent = OCFS_BH_GET_DATA_READ(extent_bh);
- while (extent->type != OCFS_EXTENT_DATA) {
- __u64 diskoffset;
-
- if (!IS_VALID_EXTENT_HEADER (extent)) {
- LOG_ERROR_STATUS(status = -EFAIL);
- goto leave;
- }
-
- OCFS_GET_EXTENT ((__s64) newOffset, extent, k);
- if (k == OCFS_MAX_DATA_EXTENTS) {
- LOG_ERROR_STR ("data extents maxed");
- }
-
- if (extent->extents[k].disk_off == 0) {
- LOG_ERROR_STR ("disk_off=0");
- }
-
- diskoffset = extent->extents[k].disk_off;
-
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
- extent = NULL; extent_bh = NULL;
-
- status = ocfs_read_bh(osb, diskoffset,
- &extent_bh,
- OCFS_BH_CACHED, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- extent = OCFS_BH_GET_DATA_READ(extent_bh);
- }
-
- searchVbo = newOffset;
-
- OCFS_ASSERT (extent->type ==
- OCFS_EXTENT_DATA);
-
- if (!IS_VALID_EXTENT_DATA (extent)) {
- LOG_ERROR_STATUS(status = -EFAIL);
- goto leave;
- }
-
- for (j = 0; j < OCFS_MAX_DATA_EXTENTS; j++) {
- if ((extent->extents[j].file_off +
- extent->extents[j].
- num_bytes) > newOffset) {
- IoRuns[Runoffset].disk_off =
- extent->extents[j].
- disk_off + (newOffset -
- extent->
- extents[j].
- file_off);
- IoRuns[Runoffset].byte_cnt =
- (__u32) ((extent->
- extents[j].
- file_off +
- extent->
- extents[j].
- num_bytes) -
- newOffset);
-
- if (IoRuns[Runoffset].
- byte_cnt >=
- remainingLength) {
- IoRuns[Runoffset].
- byte_cnt = (__u32)
- remainingLength;
- status = 0;
- break;
- } else {
- newOffset +=
- IoRuns[Runoffset].
- byte_cnt;
- remainingLength -=
- IoRuns[Runoffset].
- byte_cnt;
- Runs++;
- Runoffset++;
- if (Runs >=
- OCFS_MAX_DATA_EXTENTS)
- {
- LOG_ERROR_ARGS ("Runs=%d", Runs);
- }
- }
- }
- }
-
- if (j == OCFS_MAX_DATA_EXTENTS) {
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
- extent = NULL; extent_bh = NULL;
- continue;
- } else {
- *NumEntries = Runs;
- *Buffer = IoRuns;
- goto leave;
- }
- } /* while (1) */
- }
-
leave:
- /* Don't free the IoRuns Memory here */
- if (extent_bh) {
- if (extent)
- OCFS_BH_PUT_DATA(extent_bh);
- brelse(extent_bh);
- }
+ if (inode)
+ iput(inode);
LOG_EXIT_STATUS (status);
return status;
-} /* ocfs_find_extents_of_system_file */
-
+} /* ocfs_read_system_file */
Added: trunk/src/sysfile.h
===================================================================
--- trunk/src/sysfile.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/sysfile.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * sysfile.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_SYSFILE_H
+#define OCFS2_SYSFILE_H
+
+struct inode * ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
+int ocfs_read_system_file (ocfs_super *osb, int type, __u32 node,
+ struct buffer_head *bhs[], __u64 Length);
+
+#endif /* OCFS2_SYSFILE_H */
Modified: trunk/src/util.c
===================================================================
--- trunk/src/util.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/util.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* util.c
*
* General purpose code
@@ -24,15 +26,18 @@
* Manish Singh
*/
-#ifdef __KERNEL__
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
-#else
-#include <debugocfs.h>
-#endif
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "util.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_UTIL
@@ -119,10 +124,6 @@
return 0;
} /* ocfs_sleep */
-#ifdef OCFS_LINUX_MEM_DEBUG
-#define SUPER_VERBOSE_MEM_DEBUG 1
-#endif
-
/*
* ocfs_dbg_slab_alloc()
*
@@ -131,23 +132,10 @@
{
void *m;
m = kmem_cache_alloc(slab, GFP_NOFS);
-#ifdef OCFS_LINUX_MEM_DEBUG
- if (m == NULL) {
+#if 0 /* FIXME: Should we check? */
+ if (m == NULL)
LOG_ERROR_ARGS("failed to alloc from slab = %p", slab);
- } else {
- alloc_item *new;
- new = kmalloc (sizeof (alloc_item), GFP_NOFS);
- new->type = SLAB_ITEM;
- new->address = m;
- new->u.slab = slab;
- snprintf (new->tag, 30, "%d:%s", line, file);
- new->tag[29] = '\0';
- list_add (&new->list, &OcfsGlobalCtxt.item_list);
-#ifdef SUPER_VERBOSE_MEM_DEBUG
- LOG_TRACE_ARGS (" + %x (%p, '%s')\n", m, slab, new->tag);
#endif
- }
-#endif
return m;
} /* ocfs_dbg_slab_alloc */
@@ -157,105 +145,10 @@
*/
void ocfs_dbg_slab_free (kmem_cache_t *slab, void *m)
{
-
-#ifdef OCFS_LINUX_MEM_DEBUG
- struct list_head *iter, *tmpiter;
- alloc_item *item = NULL;
- int do_free = 0;
-
- list_for_each_safe (iter, tmpiter, &OcfsGlobalCtxt.item_list) {
- item = list_entry (iter, alloc_item, list);
-
- if (item->address == m && item->type == SLAB_ITEM) {
-#ifdef SUPER_VERBOSE_MEM_DEBUG
- LOG_TRACE_ARGS (" - %x (%p, '%s')\n", m, item->u.slab, item->tag);
-#endif
- list_del (&item->list);
- do_free = 1;
- break;
- }
- }
-
- if (do_free) {
- kmem_cache_free(slab, m);
- kfree (item);
- return;
- }
- LOG_ERROR_ARGS ("tried to free mem never allocated: %x", m);
-#endif
-#ifndef OCFS_LINUX_MEM_DEBUG
kmem_cache_free(slab, m);
-#endif
} /* ocfs_dbg_slab_free */
-
-/*
- * ocfs_linux_dbg_alloc()
- *
- */
-void *ocfs_linux_dbg_alloc (int Size, char *file, int line)
-{
- void *m;
-
- m = kmalloc (Size, GFP_NOFS);
-#ifdef OCFS_LINUX_MEM_DEBUG
- if (m == NULL) {
- LOG_ERROR_ARGS ("failed! (size=%d)", Size);
- } else {
- alloc_item *new;
- new = kmalloc (sizeof (alloc_item), GFP_NOFS);
- new->type = KMALLOC_ITEM;
- new->address = m;
- new->u.length = Size;
- snprintf (new->tag, 30, "%d:%s", line, file);
- new->tag[29] = '\0';
- list_add (&new->list, &OcfsGlobalCtxt.item_list);
-#ifdef SUPER_VERBOSE_MEM_DEBUG
- LOG_TRACE_ARGS (" + %x (%d, '%s')\n", m, Size, new->tag);
-#endif
- }
-#endif
- return m;
-} /* ocfs_linux_dbg_alloc */
-
-/*
- * ocfs_linux_dbg_free()
- *
- */
-void ocfs_linux_dbg_free (const void *Buffer)
-{
-
-#ifdef OCFS_LINUX_MEM_DEBUG
- struct list_head *iter, *tmpiter;
- alloc_item *item = NULL;
- int do_free = 0;
-
- list_for_each_safe (iter, tmpiter, &OcfsGlobalCtxt.item_list) {
- item = list_entry (iter, alloc_item, list);
-
- if (item->address == Buffer && item->type == KMALLOC_ITEM) {
-#ifdef SUPER_VERBOSE_MEM_DEBUG
- LOG_TRACE_ARGS (" - %x (%d, '%s')\n", Buffer,
- item->u.length, item->tag);
-#endif
- list_del (&item->list);
- do_free = 1;
- break;
- }
- }
- if (do_free) {
- kfree (Buffer);
- kfree (item);
- return;
- }
- LOG_ERROR_ARGS ("tried to free mem never allocated: %x", Buffer);
-#endif
-#ifndef OCFS_LINUX_MEM_DEBUG
- kfree (Buffer);
-#endif
-} /* ocfs_linux_dbg_free */
-
/* prefetch has been declared to allow to build in debug mode */
#ifdef DEBUG
#ifndef ARCH_HAS_PREFETCH
Added: trunk/src/util.h
===================================================================
--- trunk/src/util.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/util.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,46 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * util.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_UTIL_H
+#define OCFS2_UTIL_H
+
+int ocfs_compare_qstr(struct qstr *s1, struct qstr *s2);
+void ocfs_clear_timeout(ocfs_timeout *to);
+void ocfs_daemonize(char *name, int len);
+void *ocfs_dbg_slab_alloc(kmem_cache_t *slab, char *file, int line);
+void ocfs_dbg_slab_free(kmem_cache_t *slab, void *m);
+void ocfs_init_timeout(ocfs_timeout *to);
+void *ocfs_linux_dbg_alloc(int Size, char *file, int line);
+void ocfs_linux_dbg_free(const void *Buffer);
+void ocfs_set_timeout(ocfs_timeout *to, __u32 timeout);
+void ocfs_show_stack(unsigned long *esp);
+void ocfs_show_trace(unsigned long *stack);
+int ocfs_sleep(__u32 ms);
+void ocfs_truncate_inode_pages(struct inode *inode, loff_t off);
+
+#endif /* OCFS2_UTIL_H */
Modified: trunk/src/ver.c
===================================================================
--- trunk/src/ver.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ver.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* ver.c
*
* version string
@@ -24,14 +26,20 @@
* Manish Singh
*/
+#include "ocfs_compat.h"
+
+#include <linux/module.h>
+#include <linux/fs.h>
#include <linux/types.h>
-
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <linux/slab.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "ver.h"
+
+
MODULE_DESCRIPTION("The Oracle Cluster Filesystem (version "
OCFS_BUILD_VERSION
")");
Added: trunk/src/ver.h
===================================================================
--- trunk/src/ver.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/ver.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ver.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_VER_H
+#define OCFS2_VER_H
+
+void ocfs_version_print(void);
+
+#endif /* OCFS2_VER_H */
Modified: trunk/src/volcfg.c
===================================================================
--- trunk/src/volcfg.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/volcfg.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* volcfg.c
*
* Auto configuration, namely, node number.
@@ -24,11 +26,21 @@
* Manish Singh, Neeraj Goyal, Suchit Kaura
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "util.h"
+#include "volcfg.h"
+
+#include "ocfs_buffer_head.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOLCFG
@@ -68,7 +80,7 @@
/* Obtain the volume for which we need to reiterate the lock */
osb = cfg_task->osb;
bh = cfg_task->bh;
- length = osb->sect_size;
+ length = osb->sb->s_blocksize;
offset = cfg_task->lock_off;
/* Write the sector back */
@@ -132,21 +144,22 @@
ocfs_node_config_info *disk_node = NULL;
char *buf;
__u32 node_num;
- __u32 sect_size;
__u32 size;
- struct buffer_head *cfg_bhs[OCFS_MAXIMUM_NODES];
+ struct buffer_head **cfg_bhs = NULL;
int done = 0;
LOG_ENTRY ();
- memset(cfg_bhs, 0, OCFS_MAXIMUM_NODES * sizeof(*cfg_bhs));
+ cfg_bhs = kmalloc(sizeof(struct buffer_head *) * osb->max_nodes, GFP_KERNEL);
+ if (cfg_bhs == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto finally;
+ }
+ memset(cfg_bhs, 0, osb->max_nodes * sizeof(struct buffer_head *));
- sect_size = osb->sect_size;
-
/* Read the nodecfg info for all nodes from disk */
- size = OCFS_VOLCFG_HDR_SECTORS * sect_size;
- offset = osb->vol_layout.node_cfg_off + size;
- size = osb->vol_layout.node_cfg_size - size;
+ offset = (osb->autoconfig_blkno + OCFS_VOLCFG_HDR_SECTORS) << osb->sb->s_blocksize_bits;
+ size = (osb->autoconfig_blocks - OCFS_VOLCFG_HDR_SECTORS) << osb->sb->s_blocksize_bits;
status = ocfs_read_bhs(osb, offset, size, cfg_bhs, 0, NULL);
if (status < 0) {
@@ -156,7 +169,7 @@
/* Check if preferred node num is available */
node_num = OCFS_INVALID_NODE_NUM;
- if (pref_node_num >= 0 && pref_node_num < OCFS_MAXIMUM_NODES) {
+ if (pref_node_num >= 0 && pref_node_num < osb->max_nodes) {
disk_node = OCFS_BH_GET_DATA_READ(cfg_bhs[pref_node_num]);
if (disk_node->node_name[0] == '\0')
node_num = pref_node_num;
@@ -165,7 +178,7 @@
/* if not, find the first available empty slot */
if (node_num == OCFS_INVALID_NODE_NUM) {
- for (node_num = 0; node_num < OCFS_MAXIMUM_NODES; node_num++) {
+ for (node_num = 0; node_num < osb->max_nodes; node_num++) {
disk_node = OCFS_BH_GET_DATA_READ(cfg_bhs[node_num]);
if (disk_node->node_name[0] == '\0')
done = 1;
@@ -176,7 +189,7 @@
}
/* If no free slots, error out */
- if (node_num >= OCFS_MAXIMUM_NODES) {
+ if (node_num >= osb->max_nodes) {
LOG_ERROR_STR ("Unable to allocate node number as no slots " \
"are available");
status = -ENOSPC;
@@ -185,7 +198,7 @@
/* Copy the new nodecfg into the memory buffer */
buf = OCFS_BH_GET_DATA_WRITE(cfg_bhs[node_num]);
- memcpy (buf, new_disk_node, sect_size);
+ memcpy (buf, new_disk_node, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(cfg_bhs[node_num]);
/* Write the new node details on disk */
@@ -203,9 +216,11 @@
}
finally:
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++)
+ for (i = 0; i < osb->max_nodes; i++)
if (cfg_bhs[i])
brelse(cfg_bhs[i]);
+ if (cfg_bhs)
+ kfree(cfg_bhs);
LOG_EXIT_STATUS (status);
return status;
@@ -227,14 +242,14 @@
node_cfg_bhs[0] = node_cfg_bhs[1] = NULL;
/* Read the nodecfg header */
- offset = osb->vol_layout.node_cfg_off;
+ offset = osb->autoconfig_blkno << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, offset, &node_cfg_bhs[0], 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
- offset = osb->vol_layout.new_cfg_off + osb->sect_size;
+ offset = (osb->new_autoconfig_blkno + 1) << osb->sb->s_blocksize_bits;
status = ocfs_read_bh(osb, offset, &node_cfg_bhs[1], 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -250,7 +265,7 @@
/* Increment the seq# to trigger other nodes to re-read node cfg */
hdr->cfg_seq_num++;
- memcpy(hdr_copy, hdr, osb->sect_size);
+ memcpy(hdr_copy, hdr, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(node_cfg_bhs[0]);
OCFS_BH_PUT_DATA(node_cfg_bhs[1]);
/* Write the nodecfg header */
@@ -303,17 +318,14 @@
int tried_acq = 0;
int break_lock = 0;
int bh_locked = 0;
- ocfs_disk_lock *disk_lock;
+ ocfs2_disk_lock *disk_lock;
ocfs_cfg_task *cfg_task;
- __u32 sect_size;
__u64 lock_node_num = OCFS_INVALID_NODE_NUM;
struct buffer_head *bh = NULL;
int i;
LOG_ENTRY ();
- sect_size = osb->sect_size;
-
cfg_task = ocfs_malloc (sizeof (ocfs_cfg_task));
if (cfg_task == NULL)
{
@@ -353,21 +365,21 @@
lock_buf = OCFS_BH_GET_DATA_WRITE(bh);
bh_locked = 1;
- disk_lock = (ocfs_disk_lock *) lock_buf;
- lock_node_num = DISK_LOCK_CURRENT_MASTER(disk_lock);
+ disk_lock = (ocfs2_disk_lock *)lock_buf;
+ lock_node_num = disk_lock->dl_master;
- if (DISK_LOCK_FILE_LOCK (disk_lock) == 0 || break_lock) {
- if (DISK_LOCK_FILE_LOCK (disk_lock) != 0)
+ if (disk_lock->dl_level == 0 || break_lock) {
+ if (disk_lock->dl_level != 0)
LOG_TRACE_STR ("Try to break node config lock");
else
LOG_TRACE_STR ("Lock node config");
/* Attempt to lock volcfg */
- memcpy(disk_lock, cfg_buf, osb->sect_size);
+ memcpy(disk_lock, cfg_buf, osb->sb->s_blocksize);
- DISK_LOCK_CURRENT_MASTER (disk_lock) = osb->node_num;
- DISK_LOCK_FILE_LOCK (disk_lock) = 1;
- memcpy(cfg_buf, disk_lock, osb->sect_size);
+ disk_lock->dl_master = osb->node_num;
+ disk_lock->dl_level = 1;
+ memcpy(cfg_buf, disk_lock, osb->sb->s_blocksize);
/* Write into volcfg lock sector... */
OCFS_BH_PUT_DATA(bh);
@@ -396,8 +408,8 @@
lock_buf = OCFS_BH_GET_DATA_WRITE(bh);
/* If we tried to acquire and we still own it we take it... */
- if ((tried_acq) && (memcmp (lock_buf, cfg_buf, sect_size) == 0)) {
- memcpy (lock_buf, cfg_buf, sect_size);
+ if ((tried_acq) && (memcmp (lock_buf, cfg_buf, osb->sb->s_blocksize) == 0)) {
+ memcpy (lock_buf, cfg_buf, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(bh);
/* Set timer to reiterate lock every few jiffies */
@@ -410,10 +422,9 @@
add_timer(&osb->lock_timer);
/* Write the config info into the disk */
- disk_lock = (ocfs_disk_lock *) cfg_buf;
- DISK_LOCK_CURRENT_MASTER (disk_lock) =
- OCFS_INVALID_NODE_NUM;
- DISK_LOCK_FILE_LOCK (disk_lock) = 0;
+ disk_lock = (ocfs2_disk_lock *)cfg_buf;
+ disk_lock->dl_master = OCFS_INVALID_NODE_NUM;
+ disk_lock->dl_level = 0;
if (op == OCFS_VOLCFG_ADD)
status = ocfs_add_to_disk_config (osb, node_num,
@@ -429,12 +440,12 @@
}
break;
} else {
- disk_lock = (ocfs_disk_lock *) lock_buf;
- if (DISK_LOCK_CURRENT_MASTER (disk_lock) == lock_node_num)
+ disk_lock = (ocfs2_disk_lock *)lock_buf;
+ if (disk_lock->dl_master == lock_node_num)
break_lock = 1;
else {
LOG_TRACE_ARGS ("Node config locked by node: %d\n",
- DISK_LOCK_CURRENT_MASTER (disk_lock));
+ disk_lock->dl_master);
ocfs_sleep (OCFS_VOLCFG_LOCK_TIME);
}
OCFS_BH_PUT_DATA(bh);
@@ -468,7 +479,6 @@
{
int status = 0;
__s8 *buffer = NULL;
- __u32 sect_size = osb->sect_size;
struct buffer_head *bh = NULL;
struct super_block *sb;
__u64 blocknum;
@@ -477,14 +487,8 @@
sb = osb->sb;
- if (sect_size != sb->s_blocksize) {
- LOG_ERROR_STR("sector_size != sb->s_blocksize!");
- status = -999;
- goto finally;
- }
-
blocknum = lock_off >> sb->s_blocksize_bits;
- bh = getblk(OCFS_GET_BLOCKDEV(sb), blocknum, sb->s_blocksize);
+ bh = sb_getblk(sb, blocknum);
if (bh == NULL) {
LOG_ERROR_STATUS (status = -EIO);
goto finally;
@@ -493,7 +497,7 @@
buffer = OCFS_BH_GET_DATA_WRITE(bh);
/* reset lock... */
- memset (buffer, 0, sect_size);
+ memset (buffer, 0, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(bh);
/* Release the lock */
@@ -512,7 +516,7 @@
buffer = OCFS_BH_GET_DATA_WRITE(bh);
/* reset lock... */
- memset (buffer, 0, sect_size);
+ memset (buffer, 0, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(bh);
/* Release the lock */
@@ -539,16 +543,15 @@
ocfs_node_config_info *disk;
void *buffer;
__u64 offset;
- __u32 sect_size = osb->sect_size;
LOG_ENTRY ();
- buffer = ocfs_malloc (sect_size);
+ buffer = ocfs_malloc (osb->sb->s_blocksize);
if (buffer == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto bail;
}
- memset (buffer, 0, sect_size);
+ memset (buffer, 0, osb->sb->s_blocksize);
disk = buffer;
@@ -556,7 +559,7 @@
ocfs_volcfg_gblctxt_to_disknode (disk);
/* Write this nodes config onto disk */
- offset = osb->vol_layout.new_cfg_off;
+ offset = (osb->new_autoconfig_blkno << osb->sb->s_blocksize_bits);
status = ocfs_config_with_disk_lock (osb, offset, (__u8 *) disk,
OcfsGlobalCtxt.pref_node_num,
OCFS_VOLCFG_ADD);
@@ -600,14 +603,12 @@
strncpy ((*node)->node_name, disk->node_name, MAX_NODE_NAME_LENGTH);
- memcpy((*node)->guid.guid, disk->guid.guid, GUID_LEN);
+ memcpy((*node)->guid.guid, disk->guid.guid, OCFS2_GUID_LEN);
- (*node)->ipc_config.type = disk->ipc_config.type;
(*node)->ipc_config.ip_port = disk->ipc_config.ip_port;
- strncpy((*node)->ipc_config.ip_addr, disk->ipc_config.ip_addr,
- MAX_IP_ADDR_LEN);
- strncpy((*node)->ipc_config.ip_mask, disk->ipc_config.ip_mask,
- MAX_IP_ADDR_LEN);
+ (*node)->ipc_config.addr_u.ip_addr4 =
+ disk->ipc_config.addr_u.ip_addr4;
+ (*node)->ipc_config.ip_version = disk->ipc_config.ip_version;
bail:
LOG_EXIT_STATUS (status);
@@ -621,7 +622,6 @@
static int ocfs_update_disk_config (ocfs_super * osb, __u32 node_num, ocfs_node_config_info * disk)
{
int status = 0;
- __u64 offset;
__u32 blocknum;
struct buffer_head *bh = NULL;
struct super_block *sb = NULL;
@@ -631,11 +631,9 @@
sb = osb->sb;
/* Write the node details */
- offset = osb->vol_layout.node_cfg_off +
- ((node_num + OCFS_VOLCFG_HDR_SECTORS) * osb->sect_size);
- blocknum = offset >> sb->s_blocksize_bits;
+ blocknum = osb->autoconfig_blkno + OCFS_VOLCFG_HDR_SECTORS + node_num;
- bh = getblk(OCFS_GET_BLOCKDEV(sb), blocknum, sb->s_blocksize);
+ bh = sb_getblk(sb, blocknum);
if (bh == NULL) {
status = -EIO;
LOG_ERROR_STATUS(status);
@@ -643,7 +641,7 @@
}
buf = OCFS_BH_GET_DATA_WRITE(bh);
- memcpy(buf, disk, osb->sect_size);
+ memcpy(buf, disk, osb->sb->s_blocksize);
OCFS_BH_PUT_DATA(bh);
status = ocfs_write_bh(osb, bh, 0, NULL);
@@ -684,14 +682,12 @@
strncpy (disk->node_name, OcfsGlobalCtxt.node_name,
MAX_NODE_NAME_LENGTH);
- memcpy(disk->guid.guid, OcfsGlobalCtxt.guid.guid, GUID_LEN);
+ memcpy(disk->guid.guid, OcfsGlobalCtxt.guid.guid,
+ OCFS2_GUID_LEN);
- ipc->type = g_ipc->type;
ipc->ip_port = g_ipc->ip_port;
- if (g_ipc->ip_addr)
- strncpy (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN);
- if (g_ipc->ip_mask)
- strncpy (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN);
+ ipc->ip_version = g_ipc->ip_version;
+ ipc->addr_u.ip_addr4 = g_ipc->addr_u.ip_addr4;
LOG_EXIT ();
return ;
@@ -715,14 +711,12 @@
strncpy (node->node_name, OcfsGlobalCtxt.node_name,
MAX_NODE_NAME_LENGTH);
- memcpy(node->guid.guid, OcfsGlobalCtxt.guid.guid, GUID_LEN);
+ memcpy(node->guid.guid, OcfsGlobalCtxt.guid.guid,
+ OCFS2_GUID_LEN);
- ipc->type = g_ipc->type;
ipc->ip_port = g_ipc->ip_port;
- if (g_ipc->ip_addr)
- strncpy (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN);
- if (g_ipc->ip_mask)
- strncpy (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN);
+ ipc->ip_version = g_ipc->ip_version;
+ ipc->addr_u.ip_addr4 = g_ipc->addr_u.ip_addr4;
LOG_EXIT ();
return ;
@@ -746,8 +740,8 @@
LOG_ENTRY ();
/* Read in the config on the disk */
- offset = osb->vol_layout.node_cfg_off;
- numblocks = osb->vol_layout.node_cfg_size >> sb->s_blocksize_bits;
+ offset = osb->autoconfig_blkno << sb->s_blocksize_bits;
+ numblocks = osb->autoconfig_blocks;
cfg_bhs = ocfs_malloc(numblocks * sizeof(*cfg_bhs));
if (cfg_bhs == NULL) {
@@ -757,7 +751,8 @@
}
memset(cfg_bhs, 0, numblocks * sizeof(*cfg_bhs));
- status = ocfs_read_bhs(osb, offset, osb->vol_layout.node_cfg_size, cfg_bhs, 0, NULL);
+ status = ocfs_read_bhs(osb, offset, numblocks << sb->s_blocksize_bits,
+ cfg_bhs, 0, NULL);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
@@ -766,18 +761,18 @@
/* 1st block in buffer is the NodeCfgHdr */
hdr = OCFS_BH_GET_DATA_READ(cfg_bhs[0]);
- if (strncmp (hdr->signature, NODE_CONFIG_HDR_SIGN,
- NODE_CONFIG_SIGN_LEN)) {
+ if (strncmp (hdr->signature, OCFS2_NODE_CONFIG_HDR_SIGN,
+ OCFS2_NODE_CONFIG_SIGN_LEN)) {
LOG_ERROR_STR ("Invalid node config signature");
status = -EINVAL;
goto finally;
}
- if (hdr->version < NODE_MIN_SUPPORTED_VER ||
- hdr->version > NODE_CONFIG_VER) {
+ if (hdr->version < OCFS2_NODE_MIN_SUPPORTED_VER ||
+ hdr->version > OCFS2_NODE_CONFIG_VER) {
LOG_ERROR_ARGS ("Node config version mismatch, (%d) < minimum" \
" (%d) or > current (%d)", hdr->version,
- NODE_MIN_SUPPORTED_VER, NODE_CONFIG_VER);
+ OCFS2_NODE_MIN_SUPPORTED_VER, OCFS2_NODE_CONFIG_VER);
status = -EINVAL;
goto finally;
}
@@ -791,7 +786,7 @@
/* Read the nodecfg for all possible nodes as there may be holes */
/* i.e., node numbers need not be dolled out in sequence */
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {//, p += sect_size) {
+ for (i = 0; i < osb->max_nodes; i++) {
int which;
which = i + OCFS_VOLCFG_HDR_SECTORS;
disk = OCFS_BH_GET_DATA_READ(cfg_bhs[which]);
@@ -821,14 +816,15 @@
* the volume with the reclaimid option. Else, error.
*/
if (!memcmp(&OcfsGlobalCtxt.guid.guid, disk->guid.guid,
- GUID_LEN)) {
+ OCFS2_GUID_LEN)) {
osb->node_num = i;
goto loop;
}
/* If the hostid does not match, goto next... */
if (memcmp(&OcfsGlobalCtxt.guid.id.host_id,
- disk->guid.id.host_id, HOSTID_LEN))
+ disk->guid.id.host_id,
+ OCFS2_GUID_HOSTID_LEN))
goto loop;
/* ...else allow node to reclaim the number if reclaimid set */
@@ -941,20 +937,15 @@
MAX_NODE_NAME_LENGTH))
chg = 1;
- if (!chg && ipc->type != g_ipc->type)
+ if (!chg && (ipc->ip_version != g_ipc->ip_version))
chg = 1;
- if (!chg && ipc->ip_port != g_ipc->ip_port)
+ if (!chg && (ipc->ip_port != g_ipc->ip_port))
chg = 1;
- if (!chg && g_ipc->ip_addr &&
- strncmp (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN))
+ if (!chg && (ipc->addr_u.ip_addr4 != g_ipc->addr_u.ip_addr4))
chg = 1;
- if (!chg && g_ipc->ip_mask &&
- strncmp (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN))
- chg = 1;
-
LOG_EXIT_INT (chg);
return chg;
} /* ocfs_has_node_config_changed */
@@ -973,13 +964,13 @@
LOG_ENTRY ();
- buffer = ocfs_malloc (osb->sect_size);
+ buffer = ocfs_malloc (osb->sb->s_blocksize);
if (buffer == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto bail;
}
- memset (buffer, 0, osb->sect_size);
+ memset (buffer, 0, osb->sb->s_blocksize);
disk = (ocfs_node_config_info *) buffer;
/* populate the nodecfg info in disk from global context */
@@ -990,7 +981,7 @@
ocfs_volcfg_gblctxt_to_node (node);
/* Update the nodecfg on disk with the new info */
- offset = osb->vol_layout.new_cfg_off;
+ offset = (osb->new_autoconfig_blkno << osb->sb->s_blocksize_bits);
status = ocfs_config_with_disk_lock (osb, offset, (__u8 *) disk,
osb->node_num, OCFS_VOLCFG_UPD);
if (status < 0) {
@@ -1014,15 +1005,17 @@
BARF_BARF_BARF *node;
__u32 i;
- for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
+ for (i = 0; i < osb->max_nodes; i++) {
node = osb->node_cfg_info[i];
if (!node || node->node_name[0] == '\0')
continue;
- LOG_TRACE_ARGS ("Node (%u) is (%s)\n", i, node->node_name);
- LOG_TRACE_ARGS ("ip=%s, port=%d\n", node->ipc_config.ip_addr,
- node->ipc_config.ip_port);
+ LOG_TRACE_ARGS ("Node (%u) is (%s)\n", i,
+ node->node_name);
+ LOG_TRACE_ARGS ("ip=0x%08u, port=%d\n",
+ ntohl(node->ipc_config.addr_u.ip_addr4),
+ ntohs(node->ipc_config.ip_port));
}
return;
Added: trunk/src/volcfg.h
===================================================================
--- trunk/src/volcfg.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/volcfg.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,35 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * volcfg.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_VOLCFG_H
+#define OCFS2_VOLCFG_H
+
+int ocfs_chk_update_config(ocfs_super *osb);
+int ocfs_get_config(ocfs_super *osb);
+
+#endif /* OCFS2_VOLCFG_H */
Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/vote.c 2004-06-17 21:06:33 UTC (rev 1128)
@@ -1,4 +1,6 @@
-/*
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
* vote.c
*
* netdlm listener, receive, verify and send messages
@@ -24,11 +26,25 @@
* Manish Singh
*/
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/inet.h>
+#include <linux/net.h>
+#include <linux/in.h>
-#include "inc/ocfs_log.h"
-#include "inc/ocfs.h"
+#include <asm/uaccess.h>
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "dlm.h"
+#include "nm.h"
+#include "util.h"
+#include "vote.h"
+
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOTE
@@ -38,13 +54,14 @@
static const char vote_state_str[] = { 'U', 'S', 'P', 'F', 'D' };
-static ocfs_vote_obj * ocfs_alloc_vote_obj (int bytes, __u32 reqlock, __u64 votemap);
+static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map * votemap);
static void ocfs_dlm_recv_msg (void *val);
static int ocfs_check_ipc_msg (__u8 * msg, __u32 msg_len);
static int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg);
static int ocfs_comm_process_msg (__u8 * msg);
static void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, __u32 msg_len, __u32 type);
+static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg);
static spinlock_t vote_obj_lock = SPIN_LOCK_UNLOCKED;
@@ -134,42 +151,57 @@
return status;
} /* ocfs_recv_udp_msg */
+
+static inline int ocfs2_comm_ip_version_to_family(u16 ip_version)
+{
+ switch (ntohs(ip_version)) {
+ case 4:
+ return PF_INET;
+ case 6:
+ return PF_INET6;
+ default:
+ BUG();
+ }
+
+ return 4;
+}
+
/*
* ocfs_send_bcast()
*
*/
-static int ocfs_send_bcast (ocfs_super * osb, __u64 votemap, ocfs_dlm_msg * dlm_msg)
+static int ocfs_send_bcast (ocfs_super * osb, ocfs_node_map *votemap, ocfs_dlm_msg * dlm_msg)
{
int status = 0, error;
- __u32 map, num;
+ __u32 num;
BARF_BARF_BARF *node;
struct sockaddr_in sin;
mm_segment_t oldfs;
- LOG_ENTRY_ARGS ("(votemap=0x%x)\n",
- (__u32)(votemap & 0xFFFFFFFFULL));
+ LOG_ENTRY ();
oldfs = get_fs ();
- for (map = (__u32)(votemap & 0xFFFFFFFFULL), num = 0; map != 0; map >>= 1, num++) {
+ for (num=0; num<osb->max_nodes; num++) {
if (num == osb->node_num)
continue;
- if (!(map & 0x1))
+ if (!ocfs_node_map_test_bit(votemap, num))
continue;
node = osb->node_cfg_info[num];
if (!node)
continue;
- LOG_TRACE_ARGS ("Sending msg to node=%u, name=%s\n",
- num, node->node_name);
+ LOG_TRACE_ARGS("Sending msg to node=%u, name=%s\n",
+ num, node->node_name);
memset (&sin, 0, sizeof (sin));
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = in_aton(node->ipc_config.ip_addr);
- sin.sin_port = htons(node->ipc_config.ip_port);
+ sin.sin_family = ocfs2_comm_ip_version_to_family(node->ipc_config.ip_version);
+ sin.sin_addr.s_addr = node->ipc_config.addr_u.ip_addr4;
+ sin.sin_port = node->ipc_config.ip_port;
- LOG_TRACE_ARGS ("about to send to %s:%u\n", node->ipc_config.ip_addr,
- node->ipc_config.ip_port);
+ LOG_TRACE_ARGS("about to send to 0x%08u:%u\n",
+ ntohl(node->ipc_config.addr_u.ip_addr4),
+ ntohs(node->ipc_config.ip_port));
status = -EFAIL;
if (OcfsIpcCtxt.send_sock) {
@@ -210,7 +242,8 @@
* ocfs_init_udp_sock()
*
*/
-int ocfs_init_udp_sock (struct socket **send_sock, struct socket **recv_sock)
+int ocfs_init_udp_sock(struct socket **send_sock,
+ struct socket **recv_sock)
{
struct sockaddr_in sin;
int status = -EFAIL;
@@ -218,43 +251,50 @@
LOG_ENTRY ();
+ comm = &(OcfsGlobalCtxt.comm_info);
+
/* Create Send Socket */
- status = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, send_sock);
+ status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
+ SOCK_DGRAM, IPPROTO_UDP,
+ send_sock);
if (status < 0) {
LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
goto bail;
}
/* Bind Send Socket */
- memset (&sin, 0, sizeof (sin));
- sin.sin_family = AF_INET;
+ memset(&sin, 0, sizeof (sin));
+ sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
sin.sin_addr.s_addr = htonl (INADDR_ANY);
- sin.sin_port = htons (0);
+ sin.sin_port = htons(0);
- status = (*send_sock)->ops->bind (*send_sock, (struct sockaddr *) &sin,
- sizeof (sin));
+ status = (*send_sock)->ops->bind(*send_sock,
+ (struct sockaddr *)&sin,
+ sizeof(sin));
if (status < 0) {
LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
goto bail;
}
/* Create Receive Socket */
- status = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, recv_sock);
+ status = sock_create(ocfs2_comm_ip_version_to_family(comm->ip_version),
+ SOCK_DGRAM, IPPROTO_UDP,
+ recv_sock);
if (status < 0) {
LOG_ERROR_ARGS ("unable to create socket, error=%d", status);
goto bail;
}
- comm = &(OcfsGlobalCtxt.comm_info);
/* Bind Receive Socket */
- memset (&sin, 0, sizeof (sin));
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl (INADDR_ANY);
- sin.sin_port = htons (comm->ip_port);
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = ocfs2_comm_ip_version_to_family(comm->ip_version);
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = comm->ip_port;
- status = (*recv_sock)->ops->bind (*recv_sock, (struct sockaddr *) &sin,
- sizeof (sin));
+ status = (*recv_sock)->ops->bind(*recv_sock,
+ (struct sockaddr *)&sin,
+ sizeof(sin));
if (status < 0) {
LOG_ERROR_ARGS ("unable to bind socket, error=%d", status);
goto bail;
@@ -276,19 +316,21 @@
ocfs_dlm_reply_master *reply_master;
ocfs_dlm_msg *send_dlm_msg;
ocfs_vote_obj *obj;
- __u64 vote_map;
int status = 0;
__u8 *buf = NULL;
__u32 msg_len, obj_len;
+ ocfs_node_map vote_map;
LOG_ENTRY ();
+ ocfs_node_map_init(osb, &vote_map);
+
req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf;
msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_reply_master);
obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_reply_master);
- obj = ocfs_alloc_vote_obj (obj_len, 0, 0ULL);
+ obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
if (obj == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto finally;
@@ -304,14 +346,15 @@
reply_master->h.lock_seq_num = req_master->lock_seq_num;
reply_master->h.open_handle = inode_open;
reply_master->h.flags = req_master->flags;
- vote_map = (1 << dlm_msg->src_node);
- obj->req_vote_map = vote_map;
+ ocfs_node_map_set_bit(&vote_map, dlm_msg->src_node);
+ ocfs_node_map_set(&obj->req_vote_map, &vote_map);
+
spin_lock(&osb->vote_obj_queue_lock);
list_add_tail(&obj->list, &osb->vote_obj_queue);
spin_unlock(&osb->vote_obj_queue_lock);
- ocfs_send_bcast (osb, vote_map, send_dlm_msg);
+ ocfs_send_bcast (osb, &vote_map, send_dlm_msg);
spin_lock (&obj->lock);
obj->vote_state = VOTE_OBJ_STATE_SENT;
spin_unlock (&obj->lock);
@@ -363,12 +406,6 @@
goto bail;
}
- if ((dlm_msg->src_node < 0) ||
- (dlm_msg->src_node > OCFS_MAXIMUM_NODES)) {
- LOG_TRACE_ARGS ("Invalid source node in netdlm message: %d\n",
- dlm_msg->src_node);
- goto bail;
- }
ret = 1;
@@ -388,6 +425,7 @@
int max = data->u.proc.max - *len;
char *p = data->u.proc.page + *len;
int ret = 0;
+ char *reqstr=NULL, *gotstr=NULL, *openstr=NULL;
/* just run thru everything to populate /proc */
/* return -ENOENT to keep going */
@@ -396,23 +434,32 @@
switch (dlm_msg->msg_type) {
case OCFS_VOTE_REQUEST:
request = (ocfs_dlm_msg_hdr *) dlm_msg->msg_buf;
- ret = snprintf(p, max, "REQST: %d %c %3d %08x %21llu %21llu %08x | %08x %08x\n",
+
+ if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
+ break;
+ if (ocfs_node_map_stringify(&obj->got_vote_map, &gotstr) < 0)
+ break;
+ if (ocfs_node_map_stringify(&obj->tmp_openmap, &openstr) < 0)
+ break;
+ ret = snprintf(p, max, "REQST: %d %c %3d %s %21llu %21llu %08x | %s %s\n",
obj->pid,
vote_state_str[obj->vote_state],
obj->vote_status,
- (__u32)(obj->req_vote_map & 0xFFFFFFFFULL),
+ reqstr,
request->lock_id,
- request->lock_seq_num, request->flags,
- (__u32)(obj->got_vote_map & 0xFFFFFFFFULL),
- (__u32)(obj->tmp_openmap & 0xFFFFFFFFULL));
+ request->lock_seq_num,
+ request->flags,
+ gotstr, openstr);
break;
case OCFS_VOTE_REPLY:
reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf;
- ret = snprintf(p, max, "REPLY: %d %c %3d %08x %21llu %21llu %08x | %3d %c\n",
+ if (ocfs_node_map_stringify(&obj->req_vote_map, &reqstr) < 0)
+ break;
+ ret = snprintf(p, max, "REPLY: %d %c %3d %s %21llu %21llu %08x | %3d %c\n",
obj->pid,
vote_state_str[obj->vote_state],
obj->vote_status,
- (__u32)(obj->req_vote_map & 0xFFFFFFFFULL),
+ reqstr,
reply->h.lock_id,
reply->h.lock_seq_num,
reply->h.flags,
@@ -429,6 +476,13 @@
}
(*len) += ret;
p[max-1] = '\0';
+
+ if (reqstr)
+ kfree(reqstr);
+ if (gotstr)
+ kfree(gotstr);
+ if (openstr)
+ kfree(openstr);
return status;
}
@@ -568,7 +622,7 @@
ocfs_process_one_vote_reply(osb, &ctxt, dlm_msg->src_node);
- if (obj->got_vote_map == obj->req_vote_map)
+ if (ocfs_node_map_is_equal(&obj->got_vote_map, &obj->req_vote_map))
obj->vote_state = VOTE_OBJ_STATE_FULL_REPLY;
else
obj->vote_state = VOTE_OBJ_STATE_PARTIAL_REPLY;
@@ -623,7 +677,6 @@
int status = 0;
ocfs_super *osb = NULL;
ocfs_dlm_msg *dlm_msg;
- __u64 nodemap;
ocfs_dlm_req_master *req_master;
struct list_head *iter_osb, *temp_iter;
ocfs_vote_request_ctxt ctxt;
@@ -636,7 +689,7 @@
down (&(OcfsGlobalCtxt.global_res));
list_for_each_safe (iter_osb, temp_iter, &(OcfsGlobalCtxt.osb_next)) {
osb = list_entry (iter_osb, ocfs_super, osb_next);
- if (!memcmp (osb->vol_layout.vol_id, dlm_msg->vol_id,
+ if (!memcmp (osb->uuid, dlm_msg->vol_id,
MAX_VOL_ID_LENGTH))
break;
osb = NULL;
@@ -644,12 +697,18 @@
up (&(OcfsGlobalCtxt.global_res));
if (osb == NULL) {
- LOG_TRACE_STR("Ignoring netdlm message with invalid volume id");
+ LOG_ERROR_STR("Ignoring netdlm message with invalid volume id");
goto bail;
}
- nodemap = (1 << dlm_msg->src_node);
- if (!(osb->publ_map & nodemap)) {
+ if ((dlm_msg->src_node < 0) ||
+ (dlm_msg->src_node > osb->max_nodes)) {
+ LOG_ERROR_ARGS ("Invalid source node in netdlm message: %d\n",
+ dlm_msg->src_node);
+ goto bail;
+ }
+
+ if (!ocfs_node_map_test_bit(&osb->publ_map, dlm_msg->src_node)) {
LOG_TRACE_STR("Ignoring netdlm message from dead node");
goto bail;
}
@@ -674,7 +733,7 @@
"from %s (node %d)\n", MAJOR(osb->sb->s_dev),
MINOR(osb->sb->s_dev), osb->node_cfg_info[src_node]->node_name,
src_node);
- atomic_set (&(osb->vol_node_map.dismount[src_node]), 1);
+ atomic_set (&(osb->vol_node_map[src_node].dismount), 1);
break;
default:
@@ -687,25 +746,29 @@
} /* ocfs_comm_process_msg */
+
/*
* ocfs_send_dismount_msg()
*
*/
-int ocfs_send_dismount_msg (ocfs_super * osb, __u64 vote_map)
+int ocfs_send_dismount_msg (ocfs_super * osb)
{
int status = 0;
ocfs_dlm_msg *dlm_msg = NULL;
ocfs_dlm_msg_hdr *req;
ocfs_vote_obj *obj;
__u32 msg_len, obj_len;
+ ocfs_node_map map;
- LOG_ENTRY_ARGS ("(osb=0x%p, vm=0x%08x)\n", osb,
- (__u32)(vote_map & 0xFFFFFFFFULL));
+ LOG_ENTRY_ARGS ("(osb=0x%p)\n", osb);
+ ocfs_node_map_dup(osb, &map, &osb->publ_map);
+ ocfs_node_map_clear_bit(&map, osb->node_num);
+
msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
- obj = ocfs_alloc_vote_obj (obj_len, 0, 0ULL);
+ obj = ocfs_alloc_vote_obj (osb, obj_len, 0, NULL);
if (obj == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto finally;
@@ -722,7 +785,7 @@
list_add_tail(&obj->list, &osb->vote_obj_queue);
spin_unlock(&osb->vote_obj_queue_lock);
- ocfs_send_bcast (osb, vote_map, dlm_msg);
+ ocfs_send_bcast (osb, &map, dlm_msg);
spin_lock (&obj->lock);
obj->vote_state = VOTE_OBJ_STATE_SENT;
spin_unlock (&obj->lock);
@@ -754,17 +817,17 @@
dlm_msg->msg_len = msg_len;
dlm_msg->src_node = osb->node_num;
dlm_msg->msg_type = type;
- memcpy (dlm_msg->vol_id, osb->vol_layout.vol_id, MAX_VOL_ID_LENGTH);
+ memcpy (dlm_msg->vol_id, osb->uuid, MAX_VOL_ID_LENGTH);
LOG_EXIT ();
return;
} /* ocfs_init_dlm_msg */
-static ocfs_vote_obj * ocfs_alloc_vote_obj (int bytes, __u32 reqlock, __u64 votemap)
+static ocfs_vote_obj * ocfs_alloc_vote_obj (ocfs_super *osb, int bytes, __u32 reqlock, ocfs_node_map *votemap)
{
ocfs_vote_obj *obj = NULL;
-
+
obj = ocfs_malloc (bytes);
if (obj == NULL)
return NULL;
@@ -777,13 +840,17 @@
init_waitqueue_head (&obj->voted_event);
INIT_LIST_HEAD (&obj->list);
- obj->req_vote_map = votemap;
- obj->got_vote_map = 0ULL;
- obj->tmp_openmap = 0ULL;
+ if (votemap)
+ ocfs_node_map_dup(osb, &obj->req_vote_map, votemap);
+ else
+ ocfs_node_map_init(osb, &obj->req_vote_map);
+ ocfs_node_map_init(osb, &obj->got_vote_map);
+ ocfs_node_map_init(osb, &obj->tmp_openmap);
+
obj->seq_num = 0ULL;
obj->req_lock_type = reqlock;
obj->vote_status = 0;
- obj->pid = ocfs_getpid();
+ obj->pid = current->pid;
return obj;
}
@@ -794,7 +861,7 @@
* ocfs_send_dlm_request_msg()
* inode is definitely non NULL
*/
-int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, __u64 * vote_map, __u64 *openmap, struct inode *inode, int *vote_status)
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id, __u32 lock_type, __u32 flags, ocfs_node_map *votemap, ocfs_node_map *openmap, struct inode *inode, int *vote_status)
{
int status = 0;
ocfs_dlm_msg *dlm_msg = NULL;
@@ -802,13 +869,13 @@
ocfs_vote_obj *obj;
__u32 msg_len, obj_len;
- LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u, vm=0x%08x)\n",
- osb, lock_id, lock_type, flags, (__u32)(*vote_map & 0xFFFFFFFFULL));
+ LOG_ENTRY_ARGS ("(osb=0x%p, id=%llu, ty=%u, fl=%u)\n",
+ osb, lock_id, lock_type, flags);
msg_len = sizeof (ocfs_dlm_msg) + sizeof (ocfs_dlm_req_master);
obj_len = sizeof (ocfs_vote_obj) + sizeof (ocfs_dlm_req_master);
- obj = ocfs_alloc_vote_obj (obj_len, lock_type, *vote_map);
+ obj = ocfs_alloc_vote_obj (osb, obj_len, lock_type, votemap);
if (obj == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto finally;
@@ -834,7 +901,7 @@
list_add_tail(&obj->list, &osb->vote_obj_queue);
spin_unlock(&osb->vote_obj_queue_lock);
- ocfs_send_bcast (osb, *vote_map, dlm_msg);
+ ocfs_send_bcast (osb, votemap, dlm_msg);
spin_lock (&obj->lock);
obj->vote_state = VOTE_OBJ_STATE_SENT;
spin_unlock (&obj->lock);
@@ -847,19 +914,15 @@
// extend and update need to update the openmap
if (flags & (FLAG_FILE_EXTEND | FLAG_FILE_UPDATE))
if (openmap)
- *openmap = obj->tmp_openmap;
- LOG_TRACE_ARGS ("OK vote, lockid=%llu, map: 0x%08x\n",
- lock_id, (__u32)(obj->got_vote_map & 0xFFFFFFFFULL));
+ ocfs_node_map_set(openmap, &obj->tmp_openmap);
+ LOG_TRACE_ARGS ("OK vote, lockid=%llu\n", lock_id);
} else {
- LOG_ERROR_ARGS("vote_status=%d, vote_state=%d, lockid=%llu, map=0x%08x, got=0x%08x\n",
- obj->vote_status, obj->vote_state,
- lock_id,
- (__u32)(obj->req_vote_map & 0xFFFFFFFFULL),
- (__u32)(obj->got_vote_map & 0xFFFFFFFFULL));
+ LOG_ERROR_ARGS("vote_status=%d, vote_state=%d, lockid=%llu\n",
+ obj->vote_status, obj->vote_state, lock_id);
}
*vote_status = obj->vote_status;
obj->vote_state = VOTE_OBJ_STATE_DESTROYING;
- *vote_map = (*vote_map) & ~(obj->got_vote_map);
+ ocfs_node_map_clear_bits(votemap, &obj->got_vote_map);
spin_unlock (&obj->lock);
spin_lock(&osb->vote_obj_queue_lock);
@@ -884,10 +947,14 @@
int status;
int reply_status;
int open_handle = 0;
- __u64 mask = 0;
if (ctxt->reply_method == DISK_VOTE) {
- reply_status = ctxt->u.vote->vote[osb->node_num];
+ if (ctxt->u.vote->node != osb->node_num) {
+ LOG_ERROR_ARGS("vote->node(%hu) != this(%u)\n",
+ ctxt->u.vote->node, osb->node_num);
+ reply_status = 0;
+ } else
+ reply_status = ctxt->u.vote->type;
open_handle = ctxt->u.vote->open_handle;
} else {
reply_status = ctxt->u.reply->status;
@@ -895,24 +962,23 @@
}
status = 0;
- mask = 1 << node_num;
switch (reply_status) {
case FLAG_VOTE_NODE:
- *(ctxt->got_vote_map) |= mask;
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
if (ctxt->flags & (FLAG_FILE_EXTEND|FLAG_FILE_UPDATE) &&
open_handle && ctxt->open_map)
- *(ctxt->open_map) |= mask;
+ ocfs_node_map_set_bit(ctxt->open_map, node_num);
break;
case FLAG_VOTE_OIN_ALREADY_INUSE:
- *(ctxt->got_vote_map) |= mask;
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
status = -EFAIL;
if (ctxt->flags & FLAG_FILE_DELETE)
status = -EBUSY;
break;
case FLAG_VOTE_OIN_UPDATED:
status = 0;
- *(ctxt->got_vote_map) |= mask;
+ ocfs_node_map_set_bit(ctxt->got_vote_map, node_num);
break;
case FLAG_VOTE_UPDATE_RETRY:
status = -EAGAIN;
@@ -923,3 +989,238 @@
}
*(ctxt->status) = status;
}
+
+/* special case -1 for now
+ * TODO: should *really* make sure the calling func never passes -1!! */
+void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map)
+{
+ map->num_nodes = osb->max_nodes;
+ memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) *
+ sizeof(unsigned long));
+}
+
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit)
+{
+ if (bit==-1)
+ return;
+ OCFS_ASSERT(bit < map->num_nodes);
+ set_bit(bit, map->map);
+}
+
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit)
+{
+ if (bit==-1)
+ return;
+ OCFS_ASSERT(bit < map->num_nodes);
+ clear_bit(bit, map->map);
+}
+
+// clear all the bits in "target" which are set in "mask"
+void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask)
+{
+ int bit, prev=0;
+ while (1) {
+ bit = find_next_bit (mask->map, mask->num_nodes, prev);
+ if (bit > mask->num_nodes)
+ break;
+ ocfs_node_map_clear_bit(target, bit);
+ prev = bit+1;
+ }
+}
+
+// set all the bits in "target" which are set in "mask"
+void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask)
+{
+ int bit, prev=0;
+ while (1) {
+ bit = find_next_bit (mask->map, mask->num_nodes, prev);
+ if (bit > mask->num_nodes)
+ break;
+ ocfs_node_map_set_bit(target, bit);
+ prev = bit+1;
+ }
+}
+
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit)
+{
+ if (bit >= map->num_nodes) {
+ LOG_ERROR_ARGS("bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
+ BUG();
+ }
+ return test_bit(bit, map->map);
+}
+
+int ocfs_node_map_stringify(ocfs_node_map *map, char **str)
+{
+ int i, n;
+ char *s;
+
+ OCFS_ASSERT(map->num_nodes > 0);
+
+ *str = kmalloc( strlen("123 ") * map->num_nodes, GFP_KERNEL);
+ if (!(*str))
+ return -ENOMEM;
+
+ s = *str;
+ for (i=0; i<map->num_nodes; i++) {
+ if (ocfs_node_map_test_bit(map, i)) {
+ n = sprintf(s, "%3d ", i);
+ if (n != strlen("123 ")) {
+ kfree(*str);
+ return -ENOMEM;
+ }
+ s += n;
+ }
+ }
+ return 0;
+}
+
+int ocfs_node_map_is_empty(ocfs_node_map *map)
+{
+ int bit;
+ OCFS_ASSERT(map->num_nodes > 0);
+ bit = find_first_bit(map->map, map->num_nodes);
+ if (bit < map->num_nodes)
+ return 0;
+ return 1;
+}
+
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2)
+{
+ int num_longs, i;
+
+ OCFS_ASSERT(map1->num_nodes == map2->num_nodes);
+ OCFS_ASSERT(map1->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(map1->num_nodes);
+ for (i=0; i<num_longs; i++) {
+ if (map1->map[i] != map2->map[i])
+ return 0;
+ }
+ return 1;
+}
+
+
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask)
+{
+ int num_longs, i;
+
+ OCFS_ASSERT(target->num_nodes == mask->num_nodes);
+ OCFS_ASSERT(target->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(target->num_nodes);
+ for (i=0; i<num_longs; i++)
+ target->map[i] &= mask->map[i];
+}
+
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from)
+{
+ int num_longs, i;
+
+ OCFS_ASSERT(target->num_nodes == from->num_nodes);
+ OCFS_ASSERT(target->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(target->num_nodes);
+ for (i=0; i<num_longs; i++)
+ target->map[i] = from->map[i];
+}
+
+
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from)
+{
+ OCFS_ASSERT(from->num_nodes > 0);
+ ocfs_node_map_init(osb, target);
+ ocfs_node_map_set(target, from);
+}
+
+// expects that "target" is already inited with correct
+// num_nodes, and that "from" is of course wide enough
+/* Force le32 as the endian format for on-disk node maps */
+#define OCFS_NODE_MAP_DISK_BITS_PER_LONG 32
+#define OCFS_NODE_MAP_DISK_BITS_TO_LONGS(bits) \
+ (((bits)+OCFS_NODE_MAP_DISK_BITS_PER_LONG-1)/OCFS_NODE_MAP_DISK_BITS_PER_LONG)
+void ocfs_node_map_set_from_disk(ocfs_node_map *target, void *from)
+{
+ int i, num_u32;
+ u32 *t = (u32 *)target->map;
+ u32 *f = from;
+
+ OCFS_ASSERT(target->num_nodes > 0);
+
+ num_u32 = OCFS_NODE_MAP_DISK_BITS_TO_LONGS(target->num_nodes);
+ for (i = 0; i < num_u32; i++)
+ t[i] = le32_to_cpu(f[i]);
+}
+
+void ocfs_node_map_set_to_disk(void *target, ocfs_node_map *from)
+{
+ int i, num_u32;
+ u32 *t = target;
+ u32 *f = (u32 *)from->map;
+
+ OCFS_ASSERT(from->num_nodes > 0);
+
+ num_u32 = OCFS_NODE_MAP_DISK_BITS_TO_LONGS(from->num_nodes);
+ for (i=0; i<num_u32; i++)
+ t[i] = cpu_to_le32(f[i]);
+}
+
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit)
+{
+ ocfs_node_map temp;
+ int ret;
+
+ ocfs_node_map_dup(osb, &temp, target);
+ ocfs_node_map_clear_bit(&temp, bit);
+ ret = ocfs_node_map_is_empty(&temp);
+ return ret;
+}
+
+/*
+** All structures have a type, and a size associated with it.
+** The type serves to identify the structure. The size is used for
+** consistency checking ...
+*/
+void ocfs_publish_map_set(ocfs_node_map *pubmap, int num)
+{
+ ocfs_node_map_set_bit(pubmap, num);
+}
+
+void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num)
+{
+ ocfs_node_map_clear_bit(pubmap, num);
+}
+
+/* update the recovery map here */
+void ocfs_recovery_map_set(ocfs_super *osb, int num)
+{
+ spin_lock(&osb->recovery_map_lock);
+ osb->vol_state = VOLUME_IN_RECOVERY;
+ ocfs_node_map_set_bit(&osb->recovery_map, num);
+ spin_unlock(&osb->recovery_map_lock);
+}
+
+void ocfs_recovery_map_clear(ocfs_super *osb, int num)
+{
+ spin_lock(&osb->recovery_map_lock);
+ ocfs_node_map_clear_bit(&osb->recovery_map, num);
+ if (ocfs_node_map_is_empty(&osb->recovery_map))
+ osb->vol_state = VOLUME_ENABLED;
+ spin_unlock(&osb->recovery_map_lock);
+}
+
+int ocfs_node_is_recovering(ocfs_super *osb, int num)
+{
+ if (num == -1)
+ return 0;
+ return ocfs_node_map_test_bit(&osb->recovery_map, num);
+}
+
+int ocfs_node_is_alive(ocfs_node_map *pubmap, int index)
+{
+ if (index == -1)
+ return 0;
+ return ocfs_node_map_test_bit(pubmap, index);
+}
+
Added: trunk/src/vote.h
===================================================================
--- trunk/src/vote.h 2004-06-17 16:59:59 UTC (rev 1127)
+++ trunk/src/vote.h 2004-06-17 21:06:33 UTC (rev 1128)
@@ -0,0 +1,83 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * vote.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_VOTE_H
+#define OCFS2_VOTE_H
+
+void ocfs_get_vote_obj(ocfs_vote_obj *obj);
+int ocfs_init_udp_sock(struct socket **send_sock,
+ struct socket **recv_sock);
+int ocfs_lookup_obj_for_proc(ocfs_vote_obj *obj,
+ ocfs_vote_obj_lookup_data *data);
+int ocfs_lookup_obj_by_lockid(ocfs_vote_obj *obj,
+ ocfs_vote_obj_lookup_data *data);
+int ocfs_lookup_vote_request_obj(ocfs_super *osb,
+ ocfs_vote_obj_lookup_data *data);
+void ocfs_process_one_vote_reply(ocfs_super *osb,
+ ocfs_vote_reply_ctxt *ctxt,
+ __u32 node_num);
+int ocfs_recv_udp_msg(ocfs_recv_ctxt *recv_ctxt);
+int ocfs_send_dismount_msg(ocfs_super *osb);
+int ocfs_send_dlm_request_msg (ocfs_super * osb, __u64 lock_id,
+ __u32 lock_type, __u32 flags,
+ ocfs_node_map *votemap, ocfs_node_map *openmap,
+ struct inode *inode, int *vote_status);
+int ocfs_send_vote_reply(ocfs_super *osb, ocfs_dlm_msg *dlm_msg,
+ __u32 vote_status, int inode_open);
+int ocfs_lookup_vote_request_obj (ocfs_super *osb,
+ ocfs_vote_obj_lookup_data *data);
+
+void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit);
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit);
+// clear all the bits in "target" which are set in "mask"
+void ocfs_node_map_clear_bits(ocfs_node_map *target, ocfs_node_map *mask);
+// set all the bits in "target" which are set in "mask"
+void ocfs_node_map_set_bits(ocfs_node_map *target, ocfs_node_map *mask);
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit);
+int ocfs_node_map_stringify(ocfs_node_map *map, char **str);
+int ocfs_node_map_is_empty(ocfs_node_map *map);
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2);
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask);
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from);
+// expects that "target" is already inited with correct
+// num_nodes, and that "from" is of course wide enough
+void ocfs_node_map_set_from_disk(ocfs_node_map *target, void *from);
+void ocfs_node_map_set_to_disk(void *target, ocfs_node_map *from);
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit);
+
+int ocfs_node_is_recovering(ocfs_super *osb, int num);
+int ocfs_node_is_alive(ocfs_node_map *pubmap, int index);
+void ocfs_publish_map_set(ocfs_node_map *pubmap, int num);
+void ocfs_publish_map_clear(ocfs_node_map *pubmap, int num);
+void ocfs_recovery_map_set(ocfs_super *osb, int num);
+void ocfs_recovery_map_clear(ocfs_super *osb, int num);
+
+#endif /* OCFS2_VOTE_H */
More information about the Ocfs2-commits
mailing list