[Ocfs2-commits] jlbec commits r1584 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Oct 21 18:20:18 CDT 2004
Author: jlbec
Date: 2004-10-21 18:20:17 -0500 (Thu, 21 Oct 2004)
New Revision: 1584
Added:
trunk/src/extent_map.c
trunk/src/extent_map.h
Modified:
trunk/src/24io.c
trunk/src/Makefile
trunk/src/alloc.c
trunk/src/aops.c
trunk/src/dir.c
trunk/src/file.c
trunk/src/inode.c
trunk/src/namei.c
trunk/src/nm.c
trunk/src/ocfs.h
trunk/src/ocfs2.h
trunk/src/ocfs_compat.h
trunk/src/super.c
Log:
o Add new rbtree-based extent map.
Modified: trunk/src/24io.c
===================================================================
--- trunk/src/24io.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/24io.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -14,6 +14,7 @@
#include "ocfs.h"
#include "alloc.h"
+#include "extent_map.h"
#include "inode.h"
@@ -29,6 +30,10 @@
ocfs_super *osb;
__s64 vbo = 0;
__s64 lbo = 0;
+ u64 p_blkno;
+ u64 p_sector;
+ u64 v_blkno;
+ int s_to_b_bits, offset;
LOG_ENTRY_ARGS ("(0x%p, %ld)\n", inode, iblock);
@@ -59,6 +64,24 @@
OCFS_I(inode)->ip_blkno);
}
+ s_to_b_bits = osb->sb->s_blocksize_bits - osb->s_sectsize_bits;
+ v_blkno = (u64)iblock >> s_to_b_bits;
+ offset = (int)((u64)iblock & ((1ULL << s_to_b_bits) - 1));
+ err = ocfs2_extent_map_get_blocks(inode, v_blkno, 1, &p_blkno,
+ NULL);
+ if (err) {
+ LOG_ERROR_STATUS(err);
+ goto bail;
+ }
+
+ p_sector = p_blkno << s_to_b_bits;
+ p_sector += offset;
+ if (p_sector != *oblock) {
+ err = -EIO;
+ LOG_ERROR_ARGS("p_sector = %llu, *oblock = %llu\n",
+ p_sector, (unsigned long long)*oblock);
+ }
+
bail:
if (err < 0)
err = -EIO;
Modified: trunk/src/Makefile
===================================================================
--- trunk/src/Makefile 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/Makefile 2004-10-21 23:20:17 UTC (rev 1584)
@@ -72,6 +72,7 @@
dir.c \
dlm.c \
extmap.c \
+ extent_map.c \
file.c \
heartbeat.c \
inode.c \
@@ -106,6 +107,7 @@
dir.h \
dlm.h \
extmap.h \
+ extent_map.h \
file.h \
heartbeat.h \
inode.h \
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/alloc.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -38,6 +38,7 @@
#include "bitmap.h"
#include "dlm.h"
#include "extmap.h"
+#include "extent_map.h"
#include "inode.h"
#include "localalloc.h"
#include "util.h"
@@ -716,6 +717,17 @@
el->l_recs[i].e_cpos = fe->i_clusters;
el->l_next_free_rec++;
}
+
+ /*
+ * extent_map errors are not fatal, so they are ignored outside
+ * of flushing the thing.
+ */
+ status = ocfs2_extent_map_append(inode, &el->l_recs[i],
+ new_clusters);
+ if (status) {
+ LOG_ERROR_STATUS(status);
+ ocfs2_extent_map_drop(inode, fe->i_clusters);
+ }
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0)
@@ -1766,6 +1778,10 @@
}
status = 0;
bail:
+ if (!status)
+ ocfs2_extent_map_trunc(inode, fe->i_clusters);
+ else
+ ocfs2_extent_map_drop(inode, 0);
LOG_EXIT_STATUS(status);
return(status);
}
Modified: trunk/src/aops.c
===================================================================
--- trunk/src/aops.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/aops.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -32,6 +32,7 @@
#include "alloc.h"
#include "buffer_head_io.h"
+#include "extent_map.h"
#include "file.h"
#include "inode.h"
#include "ocfs_journal.h"
@@ -127,8 +128,10 @@
struct buffer_head *bh_result, int create)
{
int err = -EIO;
+ int err2 = -EIO;
__s64 vbo = 0;
__s64 lbo = 0;
+ u64 p_blkno;
__u32 len;
int open_direct;
@@ -166,6 +169,9 @@
err = ocfs_lookup_file_allocation(OCFS2_SB(inode->i_sb),
vbo, &lbo, len, NULL,
inode);
+
+ err2 = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
+ NULL);
if (!open_direct)
up_read(&OCFS_I(inode)->ip_node_extend_sem);
@@ -174,6 +180,20 @@
goto bail;
}
+ if (err2 < 0) {
+ err = err2;
+ LOG_ERROR_ARGS("Error %d from get_blocks(0x%p, %llu, 1, %llu, NULL)\n",
+ err2,
+ inode, (unsigned long long)iblock,
+ p_blkno);
+ goto bail;
+ }
+
+ if (p_blkno != (lbo >> inode->i_sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("get_blocks() returned %llu, expected %lld\n",
+ p_blkno, lbo >> inode->i_sb->s_blocksize_bits);
+ }
+
map_bh(bh_result, inode->i_sb, lbo >> inode->i_sb->s_blocksize_bits);
err = 0;
@@ -204,6 +224,7 @@
OCFS_I(inode)->ip_mmu_private += inode->i_sb->s_blocksize;
bail:
+#warning Any error will crash kunmap_high() from cont_prepare_write()
if (err < 0)
err = -EIO;
@@ -434,12 +455,20 @@
static int ocfs_bmap(struct address_space *mapping, long block)
#endif
{
+ /* Why do two #ifs? Because Mark is an EMACS user. */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ sector_t disk_block = 0;
+ sector_t status;
+#else
int disk_block = 0;
+ int status;
+#endif
ocfs_super *osb = OCFS_SB(mapping->host->i_sb);
__s64 vbo = 0;
__s64 lbo = 0;
__u32 len;
- int err = 0, status;
+ u64 p_blkno;
+ int err = 0;
struct inode *inode = mapping->host;
LOG_SET_CONTEXT(BMAP);
@@ -463,12 +492,33 @@
goto bail;
}
+ err = ocfs2_extent_map_get_blocks(inode, block, 1, &p_blkno,
+ NULL);
+ if (err) {
+ LOG_ERROR_ARGS("get_blocks() failed, block = %llu\n",
+ (unsigned long long)block);
+ LOG_ERROR_STATUS(err);
+ goto bail;
+ }
+
disk_block = lbo >> inode->i_sb->s_blocksize_bits;
+
+ /* FIXME size of items */
+ if (p_blkno != disk_block) {
+ LOG_ERROR_ARGS("get_blocks() returned %llu, expected %llu\n",
+ p_blkno,
+ (unsigned long long)disk_block);
+ }
bail:
+ /* "Gross" - MarkF (While Joel was inserting the #if) */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ status = err ? 0 : disk_block;
+#else
status = err ? err : disk_block;
+#endif
- LOG_EXIT_STATUS(status);
+ LOG_EXIT_STATUS((int)status);
LOG_CLEAR_CONTEXT();
return status;
}
@@ -499,6 +549,8 @@
__s64 vbo; /* file offset */
__s64 lbo; /* logical (disk) offset */
__s64 vbo_max; /* file offset, max_blocks from iblock */
+ u64 p_blkno;
+ int contig_blocks;
int set_new = 0; /* flag */
__u64 new_size; /* In bytes, the size of the contiguous block */
unsigned char blocksize_bits;
@@ -544,9 +596,34 @@
/* This figure out the size of the next contiguous block, and
* our logical offset */
/* TODO: Try our damndest to give sizes in multiples of PAGE_SIZE */
+ /* FIXME: nice bug, fail to check status. Which will fail if
+ * max_blocks > the contiguousness. */
status = ocfs_lookup_file_allocation(osb, vbo, &lbo, max_blocks << blocksize_bits,
&new_size, inode);
+ status = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
+ &contig_blocks);
+ if (status) {
+ LOG_ERROR_ARGS("get_blocks() failed iblock=%llu\n",
+ (unsigned long long)iblock);
+ status = -EIO;
+ goto bail;
+ }
+
+ if (p_blkno != (lbo >> blocksize_bits)) {
+ LOG_ERROR_ARGS("get_blocks() returned %llu, expected %lld\n",
+ p_blkno, lbo >> blocksize_bits);
+ }
+
+ if (contig_blocks != (new_size >> blocksize_bits)) {
+ LOG_ERROR_ARGS("get_blocks() returned contig = %u, expected %lld\n",
+ contig_blocks, new_size >> blocksize_bits);
+ }
+
+ if (max_blocks < contig_blocks)
+ contig_blocks = max_blocks;
+
+
/* Do whatever we need to the buffer_head */
if (set_new) {
set_buffer_new(bh_result);
Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/dir.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -50,6 +50,7 @@
#include "alloc.h"
#include "dir.h"
#include "dlm.h"
+#include "extent_map.h"
#include "file.h"
#include "inode.h"
#include "ocfs_journal.h"
@@ -349,6 +350,7 @@
int status;
s64 vbo, lbo;
int extend;
+ u64 p_blkno;
spin_lock(&OCFS_I(dir)->ip_lock);
extend = (dir->i_size == OCFS_I(dir)->ip_alloc_size);
@@ -375,6 +377,17 @@
goto bail;
}
+ status = ocfs2_extent_map_get_blocks(dir, dir->i_blocks, 1,
+ &p_blkno, NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ if (p_blkno != (lbo >> sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("Bad get_blocks(), got %llu, expected %llu\n",
+ p_blkno, (lbo >> sb->s_blocksize_bits));
+ }
+
*new_bh = sb_getblk(sb, lbo >> sb->s_blocksize_bits);
if (!*new_bh) {
status = -EIO;
Added: trunk/src/extent_map.c
===================================================================
--- trunk/src/extent_map.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/extent_map.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -0,0 +1,962 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * extent_map.c
+ *
+ * In-memory extent map for OCFS2. Man, this code was prettier in
+ * the library.
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#define _XOPEN_SOURCE 600 /* Triggers magic in features.h */
+#define _LARGEFILE64_SOURCE
+
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "ocfs2.h"
+
+#include "extent_map.h"
+
+#include "buffer_head_io.h"
+
+
+/*
+ * SUCK SUCK SUCK
+ * Our headers are so bad that struct ocfs2_extent_map is in ocfs.h
+ */
+
+struct ocfs2_extent_map_entry {
+ struct rb_node e_node;
+ int e_tree_depth;
+ ocfs2_extent_rec e_rec;
+};
+
+struct ocfs2_em_insert_context {
+ int need_left;
+ int need_right;
+ struct ocfs2_extent_map_entry *new_ent;
+ struct ocfs2_extent_map_entry *old_ent;
+ struct ocfs2_extent_map_entry *left_ent;
+ struct ocfs2_extent_map_entry *right_ent;
+};
+
+static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
+
+
+static struct ocfs2_extent_map_entry *
+ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
+ u32 cpos, u32 clusters,
+ struct rb_node ***ret_p,
+ struct rb_node **ret_parent);
+static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
+ struct ocfs2_extent_map_entry *ent);
+static int ocfs2_extent_map_find_leaf(struct inode *inode,
+ u32 cpos, u32 clusters,
+ ocfs2_extent_list *el);
+static int ocfs2_extent_map_lookup_read(struct inode *inode,
+ u32 cpos, u32 clusters,
+ struct ocfs2_extent_map_entry **ret_ent);
+static int ocfs2_extent_map_try_insert(struct inode *inode,
+ ocfs2_extent_rec *rec,
+ int tree_depth,
+ struct ocfs2_em_insert_context *ctxt);
+
+
+
+/*
+ * Find an entry in the tree that intersects the region passed in.
+ * Note that this will find straddled intervals, it is up to the
+ * callers to enforce any boundary conditions.
+ *
+ * Callers must hold ip_lock. This lookup is not guaranteed to return
+ * a tree_depth 0 match, and as such can race inserts if the lock
+ * were not held.
+ *
+ * The rb_node garbage lets insertion share the search. Trivial
+ * callers pass NULL.
+ */
+static struct ocfs2_extent_map_entry *
+ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
+ u32 cpos, u32 clusters,
+ struct rb_node ***ret_p,
+ struct rb_node **ret_parent)
+{
+ struct rb_node **p =
+#ifdef rb_node
+#undef rb_node
+ &em->em_extents.rb_node;
+#define rb_node rb_node_s /* I HATE YOU 2.4 */
+#else
+ &em->em_extents.rb_node;
+#endif
+ struct rb_node *parent = NULL;
+ struct ocfs2_extent_map_entry *ent = NULL;
+
+ while (*p)
+ {
+ parent = *p;
+ ent = rb_entry(parent, struct ocfs2_extent_map_entry,
+ e_node);
+ if ((cpos + clusters) <= ent->e_rec.e_cpos) {
+ p = &(*p)->rb_left;
+ ent = NULL;
+ } else if (cpos >= (ent->e_rec.e_cpos +
+ ent->e_rec.e_clusters)) {
+ p = &(*p)->rb_right;
+ ent = NULL;
+ } else
+ break;
+ }
+
+ if (ret_p != NULL)
+ *ret_p = p;
+ if (ret_parent != NULL)
+ *ret_parent = parent;
+ return ent;
+}
+
+/*
+ * Find the leaf containing the interval we want. While we're on our
+ * way down the tree, fill in every record we see at any depth, because
+ * we might want it later.
+ *
+ * Note that this code is run without ip_lock. That's because it
+ * sleeps while reading. If someone is also filling the extent list at
+ * the same time we are, we might have to restart.
+ */
+static int ocfs2_extent_map_find_leaf(struct inode *inode,
+ u32 cpos, u32 clusters,
+ ocfs2_extent_list *el)
+{
+ int i, ret;
+ struct buffer_head *eb_bh = NULL;
+ u64 blkno;
+ ocfs2_extent_block *eb;
+ ocfs2_extent_rec *rec;
+
+ /*
+ * The bh data containing the el cannot change here, because
+ * we hold alloc_sem. So we can do this without other
+ * locks.
+ */
+ while (el->l_tree_depth)
+ {
+ blkno = 0;
+ for (i = 0; i < el->l_next_free_rec; i++) {
+ rec = &el->l_recs[i];
+
+ ret = -EBADR;
+ if ((rec->e_cpos + rec->e_clusters) >
+ (OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits))
+ goto out_free;
+
+ if ((rec->e_cpos + rec->e_clusters) <= cpos) {
+ ret = ocfs2_extent_map_insert(inode,
+ rec,
+ el->l_tree_depth);
+ if (ret && (ret != -EEXIST))
+ goto out_free;
+ continue;
+ }
+ if ((cpos + clusters) <= rec->e_cpos) {
+ ret = ocfs2_extent_map_insert(inode,
+ rec,
+ el->l_tree_depth);
+ if (ret && (ret != -EEXIST))
+ goto out_free;
+ continue;
+ }
+
+ /*
+ * We've found a record that matches our
+ * interval. We don't insert it because we're
+ * about to traverse it.
+ */
+
+ /* Check to see if we're stradling */
+ ret = -ESRCH;
+ if ((rec->e_cpos > cpos) ||
+ ((cpos + clusters) >
+ (rec->e_cpos + rec->e_clusters)))
+ goto out_free;
+
+ /*
+ * If we've already found a record, the el has
+ * two records covering the same interval.
+ * EEEK!
+ */
+ ret = -EBADR;
+ if (blkno)
+ goto out_free;
+
+ blkno = rec->e_blkno;
+ }
+
+ /*
+ * We don't support holes, and we're still up
+ * in the branches, so we'd better have found someone
+ */
+ ret = -EBADR;
+ if (!blkno)
+ goto out_free;
+
+ if (eb_bh) {
+ brelse(eb_bh);
+ eb_bh = NULL;
+ }
+ ret = ocfs_read_block(OCFS_SB(inode->i_sb),
+ blkno, &eb_bh, OCFS_BH_CACHED,
+ inode);
+ if (ret)
+ goto out_free;
+ eb = (ocfs2_extent_block *)eb_bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_EXTENT_BLOCK(eb));
+ el = &eb->h_list;
+ }
+
+ if (el->l_tree_depth)
+ BUG();
+
+ for (i = 0; i < el->l_next_free_rec; i++) {
+ rec = &el->l_recs[i];
+ ret = ocfs2_extent_map_insert(inode, rec,
+ el->l_tree_depth);
+ if (ret)
+ goto out_free;
+ }
+
+ ret = 0;
+
+out_free:
+ if (eb_bh)
+ brelse(eb_bh);
+
+ return ret;
+}
+
+/*
+ * This lookup actually will read from disk. It has one invariant:
+ * It will never re-traverse blocks. This means that all inserts should
+ * be new regions or more granular regions (both allowed by insert).
+ */
+static int ocfs2_extent_map_lookup_read(struct inode *inode,
+ u32 cpos, u32 clusters,
+ struct ocfs2_extent_map_entry **ret_ent)
+{
+ int ret;
+ u64 blkno;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent;
+ struct buffer_head *bh = NULL;
+ ocfs2_extent_block *eb;
+ ocfs2_dinode *di;
+ ocfs2_extent_list *el;
+
+ spin_lock(&OCFS_I(inode)->ip_lock);
+ ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
+ if (ent) {
+ if (!ent->e_tree_depth) {
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+ *ret_ent = ent;
+ return 0;
+ }
+ blkno = ent->e_rec.e_blkno;
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ ret = ocfs_read_block(OCFS_SB(inode->i_sb), blkno, &bh,
+ OCFS_BH_CACHED, inode);
+ if (ret) {
+ if (bh)
+ brelse(bh);
+ return ret;
+ }
+ eb = (ocfs2_extent_block *)bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_EXTENT_BLOCK(eb));
+ el = &eb->h_list;
+ } else {
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ ret = ocfs_read_block(OCFS_SB(inode->i_sb),
+ OCFS_I(inode)->ip_blkno, &bh,
+ OCFS_BH_CACHED, inode);
+ if (ret) {
+ if (bh)
+ brelse(bh);
+ return ret;
+ }
+ di = (ocfs2_dinode *)bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(di));
+ el = &di->id2.i_list;
+ }
+
+ ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el);
+ brelse(bh);
+ if (ret)
+ return ret;
+
+ ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
+ if (!ent)
+ return -ESRCH;
+
+ if (ent->e_tree_depth)
+ BUG(); /* FIXME: Make sure this isn't a corruption */
+
+ *ret_ent = ent;
+
+ return 0;
+}
+
+/*
+ * Callers must hold ip_lock. This can insert pieces of the tree,
+ * thus racing lookup if the lock weren't held.
+ */
+static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
+ struct ocfs2_extent_map_entry *ent)
+{
+ struct rb_node **p, *parent;
+ struct ocfs2_extent_map_entry *old_ent;
+
+ old_ent = ocfs2_extent_map_lookup(em, ent->e_rec.e_cpos,
+ ent->e_rec.e_clusters,
+ &p, &parent);
+ if (old_ent)
+ return -EEXIST;
+
+ rb_link_node(&ent->e_node, parent, p);
+ rb_insert_color(&ent->e_node, &em->em_extents);
+
+ return 0;
+}
+
+
+/*
+ * Simple rule: on any return code other than -EAGAIN, anything left
+ * in the insert_context will be freed.
+ */
+static int ocfs2_extent_map_try_insert(struct inode *inode,
+ ocfs2_extent_rec *rec,
+ int tree_depth,
+ struct ocfs2_em_insert_context *ctxt)
+{
+ int ret;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *old_ent;
+
+ ctxt->need_left = 0;
+ ctxt->need_right = 0;
+ ctxt->old_ent = NULL;
+
+ spin_lock(&OCFS_I(inode)->ip_lock);
+ ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
+ if (!ret) {
+ ctxt->new_ent = NULL;
+ goto out_unlock;
+ }
+
+ old_ent = ocfs2_extent_map_lookup(em, rec->e_cpos,
+ rec->e_clusters, NULL, NULL);
+
+ if (!old_ent)
+ BUG();
+
+ ret = -EEXIST;
+ if (old_ent->e_tree_depth < tree_depth)
+ goto out_unlock;
+
+ if (old_ent->e_tree_depth == tree_depth) {
+ if (!memcmp(rec, &old_ent->e_rec,
+ sizeof(ocfs2_extent_rec)))
+ ret = 0;
+
+ /* FIXME: Should this be ESRCH/EBADR??? */
+ goto out_unlock;
+ }
+
+ /*
+ * We do it in this order specifically so that no actual tree
+ * changes occur until we have all the pieces we need. We
+ * don't want malloc failures to leave an inconsistent tree.
+ * Whenever we drop the lock, another process could be
+ * inserting. Also note that, if another process just beat us
+ * to an insert, we might not need the same pieces we needed
+ * the first go round. In the end, the pieces we need will
+ * be used, and the pieces we don't will be freed.
+ */
+ ctxt->need_left = !!(rec->e_cpos > old_ent->e_rec.e_cpos);
+ ctxt->need_right = !!((old_ent->e_rec.e_cpos +
+ old_ent->e_rec.e_clusters) >
+ (rec->e_cpos + rec->e_clusters));
+ ret = -EAGAIN;
+ if (ctxt->need_left) {
+ if (!ctxt->left_ent)
+ goto out_unlock;
+ *(ctxt->left_ent) = *old_ent;
+ ctxt->left_ent->e_rec.e_clusters =
+ rec->e_cpos - ctxt->left_ent->e_rec.e_cpos;
+ }
+ if (ctxt->need_right) {
+ if (!ctxt->right_ent)
+ goto out_unlock;
+ *(ctxt->right_ent) = *old_ent;
+ ctxt->right_ent->e_rec.e_cpos =
+ rec->e_cpos + rec->e_clusters;
+ ctxt->right_ent->e_rec.e_clusters =
+ (old_ent->e_rec.e_cpos +
+ old_ent->e_rec.e_clusters) -
+ ctxt->right_ent->e_rec.e_cpos;
+ }
+
+ rb_erase(&old_ent->e_node, &em->em_extents);
+ /* Now that he's erased, set him up for deletion */
+ ctxt->old_ent = old_ent;
+
+ if (ctxt->need_left) {
+ ret = ocfs2_extent_map_insert_entry(em,
+ ctxt->left_ent);
+ if (ret)
+ goto out_unlock;
+ ctxt->left_ent = NULL;
+ }
+
+ if (ctxt->need_right) {
+ ret = ocfs2_extent_map_insert_entry(em,
+ ctxt->right_ent);
+ if (ret)
+ goto out_unlock;
+ ctxt->right_ent = NULL;
+ }
+
+ ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
+
+ if (!ret)
+ ctxt->new_ent = NULL;
+
+out_unlock:
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ return ret;
+}
+
+
+int ocfs2_extent_map_insert(struct inode *inode, ocfs2_extent_rec *rec,
+ int tree_depth)
+{
+ int ret;
+ struct ocfs2_em_insert_context ctxt = {0, };
+
+ if ((rec->e_cpos + rec->e_clusters) >
+ OCFS_I(inode)->ip_map.em_clusters)
+ return -EBADR;
+
+ /* Zero e_clusters means a truncated tail record. It better be EOF */
+ if (!rec->e_clusters) {
+ if ((rec->e_cpos + rec->e_clusters) !=
+ OCFS_I(inode)->ip_map.em_clusters)
+ return -EBADR;
+
+ /* Ignore the truncated tail */
+ return 0;
+ }
+
+ ret = -ENOMEM;
+ ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
+ GFP_KERNEL);
+ if (!ctxt.new_ent)
+ return ret;
+
+ ctxt.new_ent->e_rec = *rec;
+ ctxt.new_ent->e_tree_depth = tree_depth;
+
+ do {
+ ret = -ENOMEM;
+ if (ctxt.need_left && !ctxt.left_ent) {
+ ctxt.left_ent =
+ kmem_cache_alloc(ocfs2_em_ent_cachep,
+ GFP_KERNEL);
+ if (!ctxt.left_ent)
+ break;
+ }
+ if (ctxt.need_right && !ctxt.right_ent) {
+ ctxt.right_ent =
+ kmem_cache_alloc(ocfs2_em_ent_cachep,
+ GFP_KERNEL);
+ if (!ctxt.right_ent)
+ break;
+ }
+
+ ret = ocfs2_extent_map_try_insert(inode, rec,
+ tree_depth, &ctxt);
+ } while (ret == -EAGAIN);
+
+ if (ctxt.left_ent)
+ kmem_cache_free(ocfs2_em_ent_cachep, ctxt.left_ent);
+ if (ctxt.right_ent)
+ kmem_cache_free(ocfs2_em_ent_cachep, ctxt.right_ent);
+ if (ctxt.old_ent)
+ kmem_cache_free(ocfs2_em_ent_cachep, ctxt.old_ent);
+ if (ctxt.new_ent)
+ kmem_cache_free(ocfs2_em_ent_cachep, ctxt.new_ent);
+
+ return ret;
+}
+
+/*
+ * Append this record to the tail of the extent map. It must be
+ * tree_depth 0. The record might be an extension of an existing
+ * record, and as such that needs to be handled. eg:
+ *
+ * Existing record in the extent map:
+ *
+ * cpos = 10, len = 10
+ * |---------|
+ *
+ * New Record:
+ *
+ * cpos = 10, len = 20
+ * |------------------|
+ *
+ * The passed record is the new on-disk record. The new_clusters value
+ * is how many clusters were added to the file. If the append is a
+ * contiguous append, the new_clusters has been added to
+ * rec->e_clusters. If the append is an entirely new extent, then
+ * rec->e_clusters is == new_clusters.
+ */
+int ocfs2_extent_map_append(struct inode *inode, ocfs2_extent_rec *rec,
+ u32 new_clusters)
+{
+ int ret;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent;
+ ocfs2_extent_rec *old;
+
+ OCFS_ASSERT(new_clusters);
+ OCFS_ASSERT(rec->e_clusters >= new_clusters);
+
+ if (em->em_clusters <
+ (OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits)) {
+ /*
+ * Size changed underneath us on disk. Drop any
+ * straddling records and update our idea of
+ * i_clusters
+ */
+ ocfs2_extent_map_drop(inode, em->em_clusters - 1);
+ em->em_clusters = OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits;
+ }
+
+ OCFS_ASSERT((rec->e_cpos + rec->e_clusters) ==
+ (em->em_clusters + new_clusters));
+
+ em->em_clusters += new_clusters;
+
+ ret = -ENOENT;
+ if (rec->e_clusters > new_clusters) {
+ /* This is a contiguous append */
+ ent = ocfs2_extent_map_lookup(em, rec->e_cpos, 1,
+ NULL, NULL);
+ if (ent) {
+ old = &ent->e_rec;
+ OCFS_ASSERT((rec->e_cpos + rec->e_clusters) ==
+ (old->e_cpos + old->e_clusters +
+ new_clusters));
+ if (!ent->e_tree_depth) {
+ OCFS_ASSERT(old->e_cpos == rec->e_cpos);
+ OCFS_ASSERT(old->e_blkno ==
+ rec->e_blkno);
+ ret = 0;
+ }
+ /*
+ * Let non-leafs fall through as -ENOENT to
+ * force insertion of the new leaf.
+ */
+ old->e_clusters += new_clusters;
+ }
+ }
+
+ if (ret == -ENOENT)
+ ret = ocfs2_extent_map_insert(inode, rec, 0);
+
+ return ret;
+}
+
+/*
+ * Look up the record containing this cluster offset. This record is
+ * part of the extent map. Do not free it. Any changes you make to
+ * it will reflect in the extent map. So, if your last extent
+ * is (cpos = 10, clusters = 10) and you truncate the file by 5
+ * clusters, you can do:
+ *
+ * ret = ocfs2_extent_map_get_rec(em, orig_size - 5, &rec);
+ * rec->e_clusters -= 5;
+ *
+ * The lookup does not read from disk. If the map isn't filled in for
+ * an entry, you won't find it.
+ *
+ * Also note that the returned record is valid until alloc_sem is
+ * dropped. After that, truncate and extend can happen. Caveat Emptor.
+ */
+int ocfs2_extent_map_get_rec(struct inode *inode, u32 cpos,
+ ocfs2_extent_rec **rec,
+ int *tree_depth)
+{
+ int ret = -ENOENT;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent;
+
+ *rec = NULL;
+
+ if (cpos >=
+ (OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits))
+ return -EINVAL;
+
+ if (cpos >= em->em_clusters) {
+ /*
+ * Size changed underneath us on disk. Drop any
+ * straddling records and update our idea of
+ * i_clusters
+ */
+ ocfs2_extent_map_drop(inode, em->em_clusters - 1);
+ em->em_clusters = OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits;
+ }
+
+ ent = ocfs2_extent_map_lookup(&OCFS_I(inode)->ip_map, cpos, 1,
+ NULL, NULL);
+
+ if (ent) {
+ *rec = &ent->e_rec;
+ if (tree_depth)
+ *tree_depth = ent->e_tree_depth;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int ocfs2_extent_map_get_clusters(struct inode *inode,
+ u32 v_cpos, int count,
+ u32 *p_cpos, int *ret_count)
+{
+ int ret;
+ u32 coff, ccount;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent = NULL;
+
+ *p_cpos = ccount = 0;
+
+ if ((v_cpos + count) >
+ (OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits))
+ return -EINVAL;
+
+ if ((v_cpos + count) > em->em_clusters) {
+ /*
+ * Size changed underneath us on disk. Drop any
+ * straddling records and update our idea of
+ * i_clusters
+ */
+ ocfs2_extent_map_drop(inode, em->em_clusters - 1);
+ em->em_clusters = OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits;
+ }
+
+
+ ret = ocfs2_extent_map_lookup_read(inode, v_cpos, count, &ent);
+ if (ret)
+ return ret;
+
+ if (ent) {
+ /* We should never find ourselves straddling an interval */
+ if ((ent->e_rec.e_cpos > v_cpos) ||
+ ((v_cpos + count) >
+ (ent->e_rec.e_cpos + ent->e_rec.e_clusters)))
+ return -ESRCH;
+
+ coff = v_cpos - ent->e_rec.e_cpos;
+ *p_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
+ ent->e_rec.e_blkno) +
+ coff;
+
+ if (ret_count)
+ *ret_count = ent->e_rec.e_clusters - coff;
+
+ return 0;
+ }
+
+
+ return -ENOENT;
+}
+
+int ocfs2_extent_map_get_blocks(struct inode *inode,
+ u64 v_blkno, int count,
+ u64 *p_blkno, int *ret_count)
+{
+ int ret;
+ u64 boff;
+ u32 cpos, clusters;
+ int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
+ struct ocfs2_extent_map_entry *ent = NULL;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ ocfs2_extent_rec *rec;
+
+ *p_blkno = 0;
+
+ cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
+ clusters = ocfs2_blocks_to_clusters(inode->i_sb,
+ (u64)count + bpc - 1);
+ if ((cpos + clusters) >
+ (OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits))
+ return -EINVAL;
+
+ if ((cpos + clusters) > em->em_clusters) {
+ /*
+ * Size changed underneath us on disk. Drop any
+ * straddling records and update our idea of
+ * i_clusters
+ */
+ ocfs2_extent_map_drop(inode, em->em_clusters - 1);
+ em->em_clusters = OCFS_I(inode)->ip_alloc_size >> OCFS_SB(inode->i_sb)->s_clustersize_bits;
+ }
+
+ ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent);
+ if (ret)
+ return ret;
+
+ if (ent)
+ {
+ rec = &ent->e_rec;
+
+ /* We should never find ourselves straddling an interval */
+ if ((rec->e_cpos > cpos) ||
+ ((cpos + clusters) >
+ (rec->e_cpos + rec->e_clusters)))
+ return -ESRCH;
+
+ boff = ocfs2_clusters_to_blocks(inode->i_sb,
+ cpos - rec->e_cpos);
+ boff += (v_blkno & (u64)(bpc - 1));
+ *p_blkno = rec->e_blkno + boff;
+
+ if (ret_count) {
+ *ret_count = ocfs2_clusters_to_blocks(inode->i_sb,
+ rec->e_clusters) - boff;
+ }
+
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int ocfs2_extent_map_init(struct inode *inode)
+{
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+
+ em->em_extents = RB_ROOT;
+ em->em_clusters = 0;
+
+ return 0;
+}
+
+/*
+ * Not in mainline at all
+ */
+static struct rb_node *rb_last(struct rb_root *root)
+{
+ struct rb_node *n;
+
+#ifdef rb_node
+#undef rb_node
+ n = root->rb_node;
+#define rb_node rb_node_s
+#else
+ n = root->rb_node;
+#endif
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+static struct rb_node *rb_prev(struct rb_node *node)
+{
+ /* If we have a left-hand child, go down and then right as far
+ as we can. */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return node;
+ }
+
+ /* No left-hand children. Go up till we find an ancestor which
+ is a right-hand child of its parent */
+ while (node->rb_parent && node == node->rb_parent->rb_left)
+ node = node->rb_parent;
+
+ return node->rb_parent;
+}
+#endif /* LINUX_VERSION_CODE */
+
+
+/* Needs the lock */
+static void __ocfs2_extent_map_drop(struct inode *inode,
+ u32 new_clusters,
+ struct rb_node **free_head,
+ struct ocfs2_extent_map_entry **tail_ent)
+{
+ struct rb_node *node, *next;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent;
+
+ *free_head = NULL;
+
+ ent = NULL;
+ node = rb_last(&em->em_extents);
+ while (node)
+ {
+ next = rb_prev(node);
+
+ ent = rb_entry(node, struct ocfs2_extent_map_entry,
+ e_node);
+ if (ent->e_rec.e_cpos < new_clusters)
+ break;
+
+ rb_erase(&ent->e_node, &em->em_extents);
+
+ node->rb_right = *free_head;
+ *free_head = node;
+
+ ent = NULL;
+ node = next;
+ }
+
+ /* Do we have an entry straddling new_clusters? */
+ if (tail_ent) {
+ if (ent &&
+ ((ent->e_rec.e_cpos + ent->e_rec.e_clusters) >
+ new_clusters))
+ *tail_ent = ent;
+ else
+ *tail_ent = NULL;
+ }
+
+ return;
+}
+
+static void __ocfs2_extent_map_drop_cleanup(struct rb_node *free_head)
+{
+ struct rb_node *node;
+ struct ocfs2_extent_map_entry *ent;
+
+ while (free_head) {
+ node = free_head;
+ free_head = node->rb_right;
+
+ ent = rb_entry(node, struct ocfs2_extent_map_entry,
+ e_node);
+ kmem_cache_free(ocfs2_em_ent_cachep, ent);
+ }
+}
+
+
+/*
+ * Remove all entries past new_clusters, inclusive of an entry that
+ * contains new_clusters. This is effectively a cache forget.
+ *
+ * If you want to also clip the last extent by some number of clusters,
+ * you need to call ocfs2_extent_map_trunc().
+ * This code does not check or modify ip_alloc_size.
+ */
+int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters)
+{
+ struct rb_node *free_head = NULL;
+ struct ocfs2_extent_map *em = &OCFS_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent;
+
+ spin_lock(&OCFS_I(inode)->ip_lock);
+
+ __ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
+
+ if (ent) {
+ rb_erase(&ent->e_node, &em->em_extents);
+ ent->e_node.rb_right = free_head;
+ free_head = &ent->e_node;
+ }
+
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ if (free_head)
+ __ocfs2_extent_map_drop_cleanup(free_head);
+
+ return 0;
+}
+
+/*
+ * Remove all entries past new_clusters and also clip any extent
+ * straddling new_clusters, if there is one. This does not check
+ * or modify ip_alloc_size.
+ */
+int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters)
+{
+ struct rb_node *free_head = NULL;
+ struct ocfs2_extent_map_entry *ent = NULL;
+
+ spin_lock(&OCFS_I(inode)->ip_lock);
+
+ __ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
+
+ if (ent)
+ ent->e_rec.e_clusters =
+ new_clusters - ent->e_rec.e_cpos;
+
+ OCFS_I(inode)->ip_map.em_clusters = new_clusters;
+
+ spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ if (free_head)
+ __ocfs2_extent_map_drop_cleanup(free_head);
+
+ return 0;
+}
+
+
+int __init init_ocfs2_extent_maps(void)
+{
+ ocfs2_em_ent_cachep =
+ kmem_cache_create("ocfs2_em_ent",
+ sizeof(struct ocfs2_extent_map_entry),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!ocfs2_em_ent_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void __exit exit_ocfs2_extent_maps(void)
+{
+ kmem_cache_destroy(ocfs2_em_ent_cachep);
+
+ return;
+}
Added: trunk/src/extent_map.h
===================================================================
--- trunk/src/extent_map.h 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/extent_map.h 2004-10-21 23:20:17 UTC (rev 1584)
@@ -0,0 +1,55 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * extent_map.h
+ *
+ * In-memory file extent mappings for OCFS2.
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Joel Becker
+ */
+
+#ifndef _EXTENT_MAP_H
+#define _EXTENT_MAP_H
+
+int init_ocfs2_extent_maps(void);
+void exit_ocfs2_extent_maps(void);
+
+/*
+ * EVERY CALL here except _init, _trunc, and _drop expects alloc_sem
+ * to be held. The allocation cannot change at all while the map is
+ * in the process of being updated.
+ */
+int ocfs2_extent_map_init(struct inode *inode);
+int ocfs2_extent_map_insert(struct inode *inode, ocfs2_extent_rec *rec,
+ int tree_depth);
+int ocfs2_extent_map_append(struct inode *inode, ocfs2_extent_rec *rec,
+ u32 new_clusters);
+int ocfs2_extent_map_get_rec(struct inode *inode, u32 cpos,
+ ocfs2_extent_rec **rec,
+ int *tree_depth);
+int ocfs2_extent_map_get_clusters(struct inode *inode,
+ u32 v_cpos, int count,
+ u32 *p_cpos, int *ret_count);
+int ocfs2_extent_map_get_blocks(struct inode *inode,
+ u64 v_blkno, int count,
+ u64 *p_blkno, int *ret_count);
+int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters);
+int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters);
+
+#endif /* _EXTENT_MAP_H */
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/file.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -40,6 +40,7 @@
#include "dir.h"
#include "dlm.h"
#include "extmap.h"
+#include "extent_map.h"
#include "file.h"
#include "sysfile.h"
#include "inode.h"
@@ -821,12 +822,13 @@
struct buffer_head *fe_bh,
u64 new_i_size)
{
- int status;
+ int status, grow;
struct super_block *sb = inode->i_sb;
ocfs_inode_private *oip = OCFS_I(inode);
LOG_ENTRY();
+ grow = new_i_size > inode->i_size;
inode->i_size = new_i_size;
OCFS_SET_INODE_TIME(inode, i_mtime, OCFS_CURRENT_TIME);
inode->i_blocks = (new_i_size + sb->s_blocksize - 1)
@@ -837,11 +839,16 @@
goto bail;
}
+ /* FIXME: I think this should all be in the caller */
spin_lock(&oip->ip_lock);
- oip->ip_mmu_private = inode->i_size;
+ if (!grow)
+ oip->ip_mmu_private = inode->i_size;
/* do we really need to do an extent_map_trunc here? */
ocfs_extent_map_trunc(&oip->ip_ext_map);
spin_unlock(&oip->ip_lock);
+
+ ocfs2_extent_map_drop(inode,
+ ocfs2_clusters_for_bytes(sb, new_i_size));
bail:
LOG_EXIT_STATUS(status);
return status;
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/inode.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -41,6 +41,7 @@
#include "alloc.h"
#include "dlm.h"
#include "extmap.h"
+#include "extent_map.h"
#include "file.h"
#include "inode.h"
#include "lockres.h"
@@ -311,6 +312,7 @@
i->ip_open_cnt = 0;
spin_lock_init(&i->ip_lock);
ocfs_extent_map_init (&i->ip_ext_map);
+ ocfs2_extent_map_init(inode);
INIT_LIST_HEAD(&i->ip_recovery_list);
INIT_LIST_HEAD(&i->ip_handle_list);
i->ip_handle = NULL;
@@ -819,6 +821,7 @@
}
ocfs_extent_map_trunc(&OCFS_I(inode)->ip_ext_map);
+ ocfs2_extent_map_drop(inode, 0);
down(&recovery_list_sem);
list_del(&OCFS_I(inode)->ip_recovery_list);
@@ -854,6 +857,7 @@
int tmperr;
ocfs_super *osb;
__s64 vbo, lbo;
+ u64 p_blkno;
int readflags = OCFS_BH_CACHED;
osb = OCFS_SB(inode->i_sb);
@@ -870,17 +874,22 @@
return(NULL);
/* do we need extend sem? no extend dlm message for dirs */
- /*
- * UGLY: last argument to lookup_file_allocation() (locked) is
- * forced to '1' here, even though we don't have the lock. This
- * is to force fast, unlocked operation. Get A Real DLM.
- */
tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo,
osb->sb->s_blocksize, NULL,
inode);
if (tmperr < 0)
goto fail;
+ tmperr = ocfs2_extent_map_get_blocks(inode, block, 1,
+ &p_blkno, NULL);
+ if (tmperr < 0)
+ goto fail;
+
+ if (p_blkno != (lbo >> osb->sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("get_blocks() expected %llu, got %lld\n",
+ p_blkno, lbo >> osb->sb->s_blocksize_bits);
+ }
+
tmperr = ocfs_read_block(osb, lbo >> osb->sb->s_blocksize_bits,
&bh, readflags, inode);
if (tmperr < 0)
@@ -1037,6 +1046,7 @@
ocfs2_dinode *fe)
{
int status = 0;
+ int drop_map = 0;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
spin_lock(&OCFS_I(inode)->ip_lock);
@@ -1086,6 +1096,7 @@
OCFS_I(inode)->ip_alloc_size,
fe->i_clusters);
ocfs_extent_map_trunc (&OCFS_I(inode)->ip_ext_map);
+ drop_map = 1; /* Because we have the lock here */
}
if (le32_to_cpu(fe->i_flags) & OCFS2_BITMAP_FL) {
@@ -1133,6 +1144,15 @@
spin_unlock(&OCFS_I(inode)->ip_lock);
+ if (drop_map) {
+ /*
+ * If we could trust the ordering of truncate
+ * notification, we could some day do:
+ * ocfs2_extent_map_trunc(inode, fe->i_clusters)
+ */
+ ocfs2_extent_map_trunc(inode, fe->i_clusters);
+ }
+
return(status);
} /* ocfs_refresh_inode */
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/namei.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -51,6 +51,7 @@
#include "dcache.h"
#include "dir.h"
#include "dlm.h"
+#include "extent_map.h"
#include "file.h"
#include "sysfile.h"
#include "inode.h"
@@ -1415,6 +1416,8 @@
const char *c;
struct super_block *sb = osb->sb;
s64 logical, contig;
+ u64 p_blkno;
+ int p_blocks;
int virtual, blocks, status, i, bytes_left;
bytes_left = inode->i_size + 1;
@@ -1461,6 +1464,23 @@
/* right now lookup_file_allocation returns bytes, but that
* changes soon so shift back to blocks. */
logical = logical >> sb->s_blocksize_bits;
+
+ status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
+ &p_blocks);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (logical != p_blkno) {
+ LOG_ERROR_ARGS("Bad get_blocks(): expected %llu, got %llu\n",
+ p_blkno, logical);
+ }
+ if (p_blocks != (contig >> sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("Bad get_blocks() length: expected %u, got %lld\n",
+ p_blocks, (contig >> sb->s_blocksize_bits));
+ }
+
virtual = 0;
while(bytes_left > 0) {
c = &symname[virtual * sb->s_blocksize];
Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/nm.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -38,14 +38,15 @@
#include "alloc.h"
#include "dlm.h"
+#include "extmap.h"
+#include "extent_map.h"
+#include "file.h"
#include "heartbeat.h"
#include "inode.h"
#include "lockres.h"
#include "nm.h"
#include "util.h"
#include "vote.h"
-#include "extmap.h"
-#include "file.h"
#include "ocfs_journal.h"
#include "buffer_head_io.h"
@@ -444,6 +445,7 @@
spin_lock(&OCFS_I(inode)->ip_lock);
ocfs_extent_map_trunc(&OCFS_I(inode)->ip_ext_map);
spin_unlock(&OCFS_I(inode)->ip_lock);
+ ocfs2_extent_map_drop(inode, 0);
}
@@ -695,11 +697,14 @@
ocfs_truncate_inode_pages(inode, 0);
spin_lock(&OCFS_I(inode)->ip_lock);
ocfs_extent_map_trunc(&OCFS_I(inode)->ip_ext_map);
-
+
/* truncate may send this */
if (flags & FLAG_FILE_UPDATE_OIN)
atomic_set(&OCFS_I(inode)->ip_needs_verification, 1);
spin_unlock(&OCFS_I(inode)->ip_lock);
+
+ /* Do we need this? */
+ ocfs2_extent_map_drop(inode, 0);
}
return 0;
}
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/ocfs.h 2004-10-21 23:20:17 UTC (rev 1584)
@@ -35,6 +35,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/list.h>
+#include <linux/rbtree.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
# include <linux/workqueue.h>
#else
@@ -367,6 +368,12 @@
struct _ocfs_journal_handle;
+/* I hate our includes */
+struct ocfs2_extent_map {
+ u32 em_clusters;
+ struct rb_root em_extents;
+};
+
/* OCFS2 Inode Private Data */
typedef struct _ocfs_inode_private
{
@@ -386,6 +393,7 @@
__s64 ip_mmu_private;
__u32 ip_open_flags;
ocfs_extent_map ip_ext_map;
+ struct ocfs2_extent_map ip_map;
atomic_t ip_needs_verification;
Modified: trunk/src/ocfs2.h
===================================================================
--- trunk/src/ocfs2.h 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/ocfs2.h 2004-10-21 23:20:17 UTC (rev 1584)
@@ -45,7 +45,7 @@
}
static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
- __u64 bytes)
+ u64 bytes)
{
int cl_bits = OCFS_SB(sb)->s_clustersize_bits;
unsigned int clusters;
Modified: trunk/src/ocfs_compat.h
===================================================================
--- trunk/src/ocfs_compat.h 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/ocfs_compat.h 2004-10-21 23:20:17 UTC (rev 1584)
@@ -60,6 +60,9 @@
#define generic_file_write_nolock do_generic_file_write
#endif
+#define rb_root rb_root_s
+#define rb_node rb_node_s
+
typedef long sector_t;
#define map_bh(bh, sb, blk) \
Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c 2004-10-19 22:45:10 UTC (rev 1583)
+++ trunk/src/super.c 2004-10-21 23:20:17 UTC (rev 1584)
@@ -51,6 +51,7 @@
#include "alloc.h"
#include "bitmap.h"
+#include "extent_map.h"
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
@@ -516,10 +517,15 @@
LOG_ENTRY ();
ocfs_version_print();
+
+ if (init_ocfs2_extent_maps())
+ return -ENOMEM;
ocfs_hostname = kmalloc(strlen(system_utsname.nodename) + 1, GFP_KERNEL);
- if (ocfs_hostname == NULL)
- return -EINVAL;
+ if (ocfs_hostname == NULL) {
+ status = -EINVAL;
+ goto leave;
+ }
strcpy(ocfs_hostname, system_utsname.nodename);
printk("ocfs2: hostname is %s\n", ocfs_hostname);
@@ -583,6 +589,8 @@
if (ocfs_table_header)
unregister_sysctl_table(ocfs_table_header);
+
+ exit_ocfs2_extent_maps();
}
LOG_EXIT_STATUS (status);
@@ -693,6 +701,8 @@
unregister_filesystem (&ocfs_fs_type);
+ exit_ocfs2_extent_maps();
+
printk("Unloaded OCFS Driver module\n");
LOG_EXIT ();
return;
@@ -1330,6 +1340,7 @@
int status = 0;
ocfs_publish *publish = NULL;
__u64 ret;
+ u64 p_blkno;
struct buffer_head *publish_bh = NULL; /* our own publish sector */
struct buffer_head **publish_bhs = NULL; /* all the publish sectors */
int i;
@@ -1544,13 +1555,26 @@
LOG_ERROR_STATUS(status = -EINVAL);
goto bail;
}
- /* We're in the mount path, pretend locked=1 */
status = ocfs_lookup_file_allocation(osb, 0ULL, &ret, osb->sb->s_blocksize, NULL,
inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
+
+ status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &p_blkno,
+ NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (p_blkno != (ret >> osb->sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("get_blocks() returned %llu, expected %lld\n",
+ p_blkno,
+ ret >> osb->sb->s_blocksize_bits);
+ }
+
// i_size must be at least
// (2 + osb->max_nodes + 4) + osb->max_nodes + osb->max_nodes
if (inode->i_size >> osb->sb->s_blocksize_bits <
@@ -1617,6 +1641,20 @@
LOG_ERROR_STATUS(status);
goto bail;
}
+
+ status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &p_blkno,
+ NULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (p_blkno != (ret >> osb->sb->s_blocksize_bits)) {
+ LOG_ERROR_ARGS("get_blocks() returned %llu, expected %lld\n",
+ p_blkno,
+ ret >> osb->sb->s_blocksize_bits);
+ }
+
/* for now, just one extent... but in the future... */
osb->bitmap_blkno = ret >> osb->sb->s_blocksize_bits;
osb->bitmap_blocks = OCFS_I(inode)->ip_alloc_size >> osb->sb->s_blocksize_bits;
More information about the Ocfs2-commits
mailing list