[Ocfs2-tools-commits] taoma commits r1319 - in
branches/sparse-files: debugfs.ocfs2 extras fsck.ocfs2
libocfs2 libocfs2/include mkfs.ocfs2
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Sun Mar 18 22:47:37 PDT 2007
Author: taoma
Date: 2007-03-18 22:47:26 -0700 (Sun, 18 Mar 2007)
New Revision: 1319
Added:
branches/sparse-files/extras/sparse.c
Modified:
branches/sparse-files/debugfs.ocfs2/commands.c
branches/sparse-files/debugfs.ocfs2/dump.c
branches/sparse-files/debugfs.ocfs2/find_block_inode.c
branches/sparse-files/extras/Makefile
branches/sparse-files/fsck.ocfs2/extent.c
branches/sparse-files/fsck.ocfs2/pass1.c
branches/sparse-files/libocfs2/extend_file.c
branches/sparse-files/libocfs2/extent_map.c
branches/sparse-files/libocfs2/extents.c
branches/sparse-files/libocfs2/fileio.c
branches/sparse-files/libocfs2/include/ocfs2.h
branches/sparse-files/libocfs2/include/ocfs2_fs.h
branches/sparse-files/libocfs2/truncate.c
branches/sparse-files/mkfs.ocfs2/mkfs.c
Log:
The 1st version of sparse file support in ocfs2-tools.
The mechanism for finding blocks in a hole is still the extent map. This will be removed by future
version.
Signed-off-by: jlbec
Modified: branches/sparse-files/debugfs.ocfs2/commands.c
===================================================================
--- branches/sparse-files/debugfs.ocfs2/commands.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/debugfs.ocfs2/commands.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -434,13 +434,24 @@
for (i = 0; i < el->l_next_free_rec; ++i) {
rec = &(el->l_recs[i]);
- /* TODO Fix to handle sparse trees */
+ /* For a sparse file, we may find an empty record.
+ * Just skip it.
+ */
+ if (!rec->e_clusters)
+ continue;
+
if (clstoff >= (rec->e_cpos + rec->e_clusters))
continue;
if (!el->l_tree_depth) {
- tmp = blkoff - ocfs2_clusters_to_blocks(fs, clstoff);
- dump_logical_blkno(out, rec->e_blkno + tmp);
+ if (clstoff < rec->e_cpos) {
+ dump_logical_blkno(out, 0);
+ } else {
+ tmp = blkoff -
+ ocfs2_clusters_to_blocks(fs,
+ rec->e_cpos);
+ dump_logical_blkno(out, rec->e_blkno + tmp);
+ }
goto bail;
}
@@ -487,6 +498,15 @@
for (i = 0; i < el->l_next_free_rec; ++i) {
rec = &(el->l_recs[i]);
+
+ /* In a unsuccessful insertion, we may shift a tree
+ * add a new branch for it and do no insertion. So we
+ * may meet a extent block which have
+ * rec->e_clusters == 0, this should only be happen
+ * in the last extent rec. */
+ if (!rec->e_clusters && i == el->l_next_free_rec - 1)
+ break;
+
if (el->l_tree_depth) {
ret = ocfs2_malloc_block(gbls.fs->fs_io, &buf);
if (ret)
Modified: branches/sparse-files/debugfs.ocfs2/dump.c
===================================================================
--- branches/sparse-files/debugfs.ocfs2/dump.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/debugfs.ocfs2/dump.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -301,6 +301,11 @@
for (i = 0; i < ext->l_next_free_rec; ++i) {
rec = &(ext->l_recs[i]);
+ /* For a sparse file, we may find an empty record.
+ * Just skip it.
+ */
+ if (!rec->e_clusters)
+ continue;
fprintf(out, "\t%-2d %-11u %-12u %"PRIu64"\n",
i, rec->e_cpos, rec->e_clusters, rec->e_blkno);
}
Modified: branches/sparse-files/debugfs.ocfs2/find_block_inode.c
===================================================================
--- branches/sparse-files/debugfs.ocfs2/find_block_inode.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/debugfs.ocfs2/find_block_inode.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -63,6 +63,13 @@
for (i = 0; i < el->l_next_free_rec; ++i) {
rec = &(el->l_recs[i]);
+
+ /* For a sparse file, we may find an empty record.
+ * Just skip it.
+ */
+ if (!rec->e_clusters)
+ continue;
+
if (el->l_tree_depth) {
ret = ocfs2_read_extent_block(fs, rec->e_blkno, buf);
if (ret) {
Modified: branches/sparse-files/extras/Makefile
===================================================================
--- branches/sparse-files/extras/Makefile 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/extras/Makefile 2007-03-19 05:47:26 UTC (rev 1319)
@@ -11,7 +11,7 @@
CFLAGS = $(OPTS) $(WARNINGS)
-UNINST_PROGRAMS = find_hardlinks find_dup_extents find_inode_paths set_random_bits decode_lockres encode_lockres mark_journal_dirty find_allocation_fragments compute_groups
+UNINST_PROGRAMS = find_hardlinks find_dup_extents find_inode_paths set_random_bits decode_lockres encode_lockres mark_journal_dirty find_allocation_fragments compute_groups sparse_file_test
INCLUDES = -I../libocfs2/include -I$(TOPDIR)/libo2dlm/include -I$(TOPDIR)/libo2cb/include
@@ -30,6 +30,7 @@
MARK_JOURNAL_DIRTY_CFILES = mark_journal_dirty.c
FIND_ALLOC_FRAG_CFILES = find_allocation_fragments.c
COMPUTE_GROUPS_CFILES = compute_groups.c
+SPARSE_FILE_TEST_CFILES = sparse.c
DIST_FILES = $(FIND_HARDLINKS_CFILES) $(FIND_DUP_EXTENTS_CFILES) $(FIND_INODE_PATHS_CFILES) $(SET_RANDOM_BITS_CFILES) $(DECODE_LOCKRES_CFILES) $(ENCODE_LOCKRES_CFILES) $(MARK_JOURNAL_DIRTY_CFILES) $(FIND_ALLOC_FRAG_CFILES) $(COMPUTE_GROUPS_CFILES)
@@ -42,6 +43,7 @@
MARK_JOURNAL_DIRTY_OBJS = $(subst .c,.o,$(MARK_JOURNAL_DIRTY_CFILES))
FIND_ALLOC_FRAG_OBJS = $(subst .c,.o,$(FIND_ALLOC_FRAG_CFILES))
COMPUTE_GROUPS_OBJS = $(subst .c,.o,$(COMPUTE_GROUPS_CFILES))
+SPARSE_FILE_TEST_OBJS = $(subst .c,.o,$(SPARSE_FILE_TEST_CFILES))
LIBOCFS2 = ../libocfs2/libocfs2.a
EXTRAS_LIBS = $(LIBOCFS2) $(COM_ERR_LIBS)
@@ -73,4 +75,7 @@
compute_groups: $(COMPUTE_GROUPS_OBJS) $(LIBOCFS2)
$(LINK) $(EXTRAS_LIBS)
+sparse_file_test: $(SPARSE_FILE_TEST_OBJS) $(LIBOCFS2)
+ $(LINK) $(EXTRAS_LIBS)
+
include $(TOPDIR)/Postamble.make
Added: branches/sparse-files/extras/sparse.c
===================================================================
--- branches/sparse-files/extras/sparse.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/extras/sparse.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -0,0 +1,378 @@
+/*
+ * sparse.c
+ *
+ * test file for sparse files
+ *
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
+#include <time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <sys/raw.h>
+#include <linux/kdev_t.h>
+#include <inttypes.h>
+
+#include <linux/types.h>
+
+#include "ocfs2.h"
+#include "ocfs2_fs.h"
+#include "ocfs1_fs_compat.h"
+
+
+char *progname = NULL;
+
+static void handle_signal (int sig)
+{
+ switch (sig) {
+ case SIGTERM:
+ case SIGINT:
+ exit(1);
+ }
+
+ return ;
+}
+
+enum operations {
+ READ_FILE=1,
+ WRITE_FILE,
+ SET_SIZE
+};
+
+struct{
+ char file_name[OCFS2_MAX_FILENAME_LEN];
+ uint64_t block_offset;
+ enum operations ops;
+ uint64_t bytes;
+} options;
+
+static char *device = NULL;
+
+static void usage (const char *progname)
+{
+ fprintf(stderr, "usage: %s [-f file-name] [-b block-offset]\n"
+ "\t\t[-w write-bytes] [-r read-bytes] [-s new-size] device\n",
+ progname);
+
+ exit (0);
+}
+
+static int read_options(int argc, char **argv)
+{
+ int c;
+
+ progname = basename(argv[0]);
+
+ if (argc < 2) {
+ return 1;
+ }
+
+ while(1) {
+ c = getopt(argc, argv, "f:b:w:r:s:");
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'f': /* file name */
+ strcpy(options.file_name, optarg);
+ break;
+
+ case 'b': /* block_offset */
+ options.block_offset = strtoull(optarg, NULL, 0);
+ break;
+
+ case 'w':
+ options.ops = WRITE_FILE;
+ options.bytes = strtoull(optarg, NULL, 0);
+ break;
+
+ case 'r':
+ options.ops = READ_FILE;
+ options.bytes = strtoull(optarg, NULL, 0);
+ break;
+
+ case 's':
+ options.ops = SET_SIZE;
+ options.bytes = strtoull(optarg, NULL, 0);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (optind < argc && argv[optind])
+ device = argv[optind];
+
+ return 0;
+}
+
+static errcode_t open_test_inode(ocfs2_filesys *fs, char *name, uint64_t *ino)
+{
+ errcode_t ret = 0;
+ uint64_t tmp_blkno = 0;
+ int namelen = strlen(name);
+
+ ret = ocfs2_lookup(fs, fs->fs_root_blkno, name, namelen,
+ NULL, &tmp_blkno);
+ if (!ret) {
+ *ino = tmp_blkno;
+ return 0;
+ } else if (ret != OCFS2_ET_FILE_NOT_FOUND)
+ return ret;
+
+ ret = ocfs2_new_inode(fs, &tmp_blkno, S_IFREG | 0755);
+ if (ret)
+ return ret;
+
+ ret = ocfs2_link(fs, fs->fs_root_blkno, name,
+ tmp_blkno, OCFS2_FT_REG_FILE);
+ if (ret == OCFS2_ET_DIR_NO_SPACE) {
+ ret = ocfs2_expand_dir(fs, fs->fs_root_blkno,
+ fs->fs_root_blkno);
+ if (ret)
+ return ret;
+
+ ret = ocfs2_link(fs, fs->fs_root_blkno, name,
+ tmp_blkno, OCFS2_FT_REG_FILE);
+ if (ret)
+ return ret;
+ } else if (ret)
+ return ret;
+
+ *ino = tmp_blkno;
+
+ return 0;
+}
+
+static void output(char *buf, int bytes)
+{
+ int i;
+
+ for (i = 0; i < bytes; i++) {
+ printf("%d,", buf[i]);
+ }
+ printf("\n");
+}
+
+static errcode_t test_file_read(ocfs2_filesys *fs, uint64_t ino, uint64_t block_offset,
+ int read_bytes)
+{
+ errcode_t ret;
+ uint64_t offset;
+ uint32_t got;
+ ocfs2_cached_inode *ci = NULL;
+ int block_num, buflen;
+ char *buf = NULL;
+
+ ret = ocfs2_read_cached_inode(fs, ino, &ci);
+ if (ret) {
+ com_err(progname, ret, "while reading inode %"PRIu64, ino);
+ goto bail;
+ }
+
+ ret = ocfs2_extent_map_init(fs, ci);
+ if (ret) {
+ com_err(progname, ret, "while initializing extent map");
+ goto bail;
+ }
+
+ block_num = (read_bytes + fs->fs_blocksize - 1) / fs->fs_blocksize;
+ buflen = block_num * fs->fs_blocksize;
+
+ ret = ocfs2_malloc_blocks(fs->fs_io, block_num, &buf);
+ if (ret) {
+ com_err(progname, ret, "while allocating %u bytes", buflen);
+ goto bail;
+ }
+
+ offset = block_offset * fs->fs_blocksize;
+ ret = ocfs2_file_read(ci, buf, buflen, offset, &got);
+ if (ret) {
+ com_err(progname, ret, "while reading %u bytes", buflen);
+ goto bail;
+ }
+
+ output(buf, read_bytes);
+bail:
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+static unsigned long get_rand(unsigned long min, unsigned long max)
+{
+ if (min == 0 && max == 0)
+ return 0;
+
+ return min + (rand() % (max - min));
+}
+
+static void create_data(char *data, int len)
+{
+ int i;
+
+ for (i = 0; i < len - 1; i ++) {
+ data[i] = 'A' + get_rand(0,52);
+ }
+ data[i] = '\0';
+}
+
+static errcode_t test_file_write(ocfs2_filesys *fs, uint64_t ino,
+ uint64_t block_offset, int write_byte)
+{
+ errcode_t ret;
+ uint64_t offset, p_blkno;
+ uint32_t got;
+ uint32_t found;
+ ocfs2_cached_inode *ci = NULL;
+ int buflen, block_num;
+ char *buf = NULL;
+
+ ret = ocfs2_read_cached_inode(fs, ino, &ci);
+ if (ret) {
+ com_err(progname, ret, "while reading inode %"PRIu64, ino);
+ goto bail;
+ }
+
+ offset = block_offset * fs->fs_blocksize;
+ if (ci->ci_inode->i_size < offset + write_byte) {
+ ret = ocfs2_extend_file(fs, ino, offset+write_byte);
+ if (ret) {
+ com_err(progname, ret, "while extending the size of"
+ " inode %"PRIu64, ino);
+ goto bail;
+ }
+ ocfs2_free_cached_inode(fs, ci);
+ ocfs2_read_cached_inode(fs, ino, &ci);
+ }
+
+ block_num = (write_byte + fs->fs_blocksize - 1) / fs->fs_blocksize;
+ buflen = block_num * fs->fs_blocksize;
+
+ ret = ocfs2_malloc_blocks(fs->fs_io,block_num, &buf);
+ if (ret) {
+ com_err(progname, ret, "while allocating %u blocks", block_num);
+ goto bail;
+ }
+
+ create_data(buf, write_byte);
+
+ ret = ocfs2_extent_map_init(fs, ci);
+ if (ret) {
+ com_err(progname, ret, "while initializing extent map");
+ goto bail;
+ }
+
+ /* In order to test CONTIG_LEFT, we need to test like this:
+ * 1. Remove the comments.
+ * call ocfs2_new_clusters to allocate a cluster first and release
+ * it after the ocfs2_file_write.
+ * 2. Comments out ocfs2_new_clusters. So the old cluster will be
+ * used, and CONTIG_LEFT will be used when ocfs2_insert_extent is
+ * called.
+ */
+ /* ocfs2_new_clusters(fs, 1, 1, &p_blkno, &found); */
+
+ ret = ocfs2_file_write(ci, buf, buflen, offset, &got);
+ if (ret) {
+ com_err(progname, ret, "while reading %u bytes", buflen);
+ goto bail;
+ }
+
+ /* ocfs2_free_clusters(fs, 1, p_blkno); */
+
+ output(buf, write_byte);
+
+bail:
+ if (ci)
+ ocfs2_free_cached_inode(fs, ci);
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+int main (int argc, char **argv)
+{
+ ocfs2_filesys *fs = NULL;
+ errcode_t ret = 1;
+ uint64_t inode;
+
+ initialize_ocfs_error_table();
+
+#define INSTALL_SIGNAL(sig) \
+ do { \
+ if (signal(sig, handle_signal) == SIG_ERR) { \
+ printf("Could not set " #sig "\n"); \
+ goto bail; \
+ } \
+ } while (0)
+
+ INSTALL_SIGNAL(SIGTERM);
+ INSTALL_SIGNAL(SIGINT);
+
+ memset(&options, 0, sizeof(options));
+ if (read_options(argc, argv)) {
+ usage(progname);
+ goto bail;
+ }
+
+ if (!device || !options.ops) {
+ goto bail;
+ }
+
+ ret = ocfs2_open(device, OCFS2_FLAG_RW, 0, 0, &fs);
+ if (ret) {
+ com_err(progname, ret, "while opening \"%s\"", device);
+ goto bail;
+ }
+
+ ret = open_test_inode(fs, options.file_name, &inode);
+ if (ret) {
+ com_err(progname, ret, "while open test inode");
+ goto bail;
+ }
+
+ if (options.ops == READ_FILE)
+ ret = test_file_read(fs, inode, options.block_offset,
+ options.bytes);
+ else if (options.ops == WRITE_FILE)
+ ret = test_file_write(fs, inode, options.block_offset,
+ options.bytes);
+ else
+ ret = ocfs2_truncate(fs, inode, options.bytes);
+
+bail:
+ if (fs)
+ ocfs2_close(fs);
+
+ return ret;
+}
Modified: branches/sparse-files/fsck.ocfs2/extent.c
===================================================================
--- branches/sparse-files/fsck.ocfs2/extent.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/fsck.ocfs2/extent.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -287,6 +287,12 @@
for (i = 0; i < max_recs; i++) {
er = &el->l_recs[i];
+ /* For a sparse file, we may find an empty record
+ * in the left most record. Just skip it.
+ */
+ if (el->l_tree_depth && !i && !er->e_clusters)
+ continue;
+
/* returns immediately if blkno is out of range.
* descends into eb. checks that data er doesn't
* reference past the volume or anything crazy. */
Modified: branches/sparse-files/fsck.ocfs2/pass1.c
===================================================================
--- branches/sparse-files/fsck.ocfs2/pass1.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/fsck.ocfs2/pass1.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -739,7 +739,7 @@
static errcode_t o2fsck_check_blocks(ocfs2_filesys *fs, o2fsck_state *ost,
uint64_t blkno, struct ocfs2_dinode *di)
{
- uint64_t expected = 0;
+ uint64_t expected = 0, unexpected = 0;
errcode_t ret;
struct verifying_blocks vb = {
.vb_ost = ost,
@@ -804,21 +804,33 @@
goto out;
}
- if (vb.vb_num_blocks > 0)
- expected = (vb.vb_last_block + 1) * fs->fs_blocksize;
+ /* For a sparse file, the file size can be greater than the real
+ * last block offsets recorded in the extent list, but it shouldn't be
+ * less than that cluster offset since we have already allocated some
+ * blocks at that offset, so if the size is too small, fix it to the
+ * end of the visible cluster end.
+ *
+ * It is also reasonable for a file which has no allocated blocks but
+ * any number of byte sizes, so we don't need to check its size either.
+ */
+ if (vb.vb_num_blocks > 0) {
+ expected = ocfs2_blocks_to_clusters(fs, vb.vb_last_block + 1);
+ expected *= fs->fs_clustersize;
+ unexpected = expected - fs->fs_clustersize;
- /* i_size is checked for symlinks elsewhere */
- if (!S_ISLNK(di->i_mode) && di->i_size > expected &&
- prompt(ost, PY, PR_INODE_SIZE, "Inode %"PRIu64" has a size of "
- "%"PRIu64" but has %"PRIu64" bytes of actual data. "
- "Correct the file size?",
- di->i_blkno, di->i_size, expected)) {
- di->i_size = expected;
- o2fsck_write_inode(ost, blkno, di);
+ /* i_size is checked for symlinks elsewhere */
+ if (!S_ISLNK(di->i_mode) && di->i_size <= unexpected &&
+ prompt(ost, PY, PR_INODE_SIZE, "Inode %"PRIu64" has a size of "
+ "%"PRIu64" but has %"PRIu64" blocks of actual data. "
+ "Correct the file size?",
+ di->i_blkno, di->i_size, vb.vb_last_block + 1)) {
+ di->i_size = expected;
+ o2fsck_write_inode(ost, blkno, di);
+ }
}
if (vb.vb_num_blocks > 0)
- expected = ocfs2_clusters_in_blocks(fs, vb.vb_last_block + 1);
+ expected = ocfs2_clusters_in_blocks(fs, vb.vb_num_blocks);
if (di->i_clusters < expected &&
prompt(ost, PY, PR_INODE_CLUSTERS,
Modified: branches/sparse-files/libocfs2/extend_file.c
===================================================================
--- branches/sparse-files/libocfs2/extend_file.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/extend_file.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -30,221 +30,1213 @@
#include <unistd.h>
#endif
+#include <inttypes.h>
+#include <errno.h>
+#include <assert.h>
#include "ocfs2.h"
+/*
+ * Structures which describe a path through a btree, and functions to
+ * manipulate them.
+ *
+ * The idea here is to be as generic as possible with the tree
+ * manipulation code.
+ */
+struct ocfs2_path_item {
+ uint64_t blkno;
+ char *buf;
+ struct ocfs2_extent_list *el;
+};
+#define OCFS2_MAX_PATH_DEPTH 5
+
+struct ocfs2_path {
+ int p_tree_depth;
+ struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
+};
+
+#define path_root_blkno(_path) ((_path)->p_node[0].blkno)
+#define path_root_buf(_path) ((_path)->p_node[0].buf)
+#define path_root_el(_path) ((_path)->p_node[0].el)
+#define path_leaf_blkno(_path) ((_path)->p_node[(_path)->p_tree_depth].blkno)
+#define path_leaf_buf(_path) ((_path)->p_node[(_path)->p_tree_depth].buf)
+#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
+#define path_num_items(_path) ((_path)->p_tree_depth + 1)
+
struct insert_ctxt {
ocfs2_filesys *fs;
struct ocfs2_dinode *di;
struct ocfs2_extent_rec rec;
};
+/*
+ * Reset the actual path elements so that we can re-use the structure
+ * to build another path. Generally, this involves freeing the buffer
+ * heads.
+ */
+static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
+{
+ int i, start = 0, depth = 0;
+ struct ocfs2_path_item *node;
-static errcode_t insert_extent_eb(struct insert_ctxt *ctxt,
- uint64_t eb_blkno);
+ if (keep_root)
+ start = 1;
+ for(i = start; i < path_num_items(path); i++) {
+ node = &path->p_node[i];
+ if (!node->buf)
+ continue;
+
+ ocfs2_free(&node->buf);
+ node->blkno = 0;
+ node->buf = NULL;
+ node->el = NULL;
+ }
+
+ /*
+ * Tree depth may change during truncate, or insert. If we're
+ * keeping the root extent list, then make sure that our path
+ * structure reflects the proper depth.
+ */
+ if (keep_root)
+ depth = path_root_el(path)->l_tree_depth;
+
+ path->p_tree_depth = depth;
+}
+
+static void ocfs2_free_path(struct ocfs2_path *path)
+{
+ /* We don't free the root because often in libocfs2 the root is a
+ * shared buffer such as the inode. Caller must be responsible for
+ * handling the root of the path.
+ */
+ if (path) {
+ ocfs2_reinit_path(path, 1);
+ ocfs2_free(&path);
+ }
+}
+
/*
- * Update the leaf pointer from the previous last_eb_blk to the new
- * last_eb_blk. Also updates the dinode's ->last_eb_blk.
+ * Make the *dest path the same as src and re-initialize src path to
+ * have a root only.
*/
-static errcode_t update_last_eb_blk(struct insert_ctxt *ctxt,
- struct ocfs2_extent_block *eb)
+static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
{
+ int i;
+
+ assert(path_root_blkno(dest) == path_root_blkno(src));
+
+ for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
+ ocfs2_free(&dest->p_node[i].buf);
+
+ dest->p_node[i].blkno = src->p_node[i].blkno;
+ dest->p_node[i].buf = src->p_node[i].buf;
+ dest->p_node[i].el = src->p_node[i].el;
+
+ src->p_node[i].blkno = 0;
+ src->p_node[i].buf = NULL;
+ src->p_node[i].el = NULL;
+ }
+}
+
+/*
+ * Insert an extent block at given index.
+ *
+ * This will not take an additional reference on eb_bh.
+ */
+static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
+ char *buf)
+{
+ struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *) buf;
+ /*
+ * Right now, no root bh is an extent block, so this helps
+ * catch code errors with dinode trees. The assertion can be
+ * safely removed if we ever need to insert extent block
+ * structures at the root.
+ */
+ assert(index);
+
+ path->p_node[index].blkno = eb->h_blkno;
+ path->p_node[index].buf = (char *)buf;
+ path->p_node[index].el = &eb->h_list;
+}
+
+static struct ocfs2_path *ocfs2_new_path(ocfs2_filesys* fs, char *buf,
+ struct ocfs2_extent_list *root_el)
+{
+ errcode_t ret = 0;
+ struct ocfs2_path *path = NULL;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)buf;
+
+ assert(root_el->l_tree_depth < OCFS2_MAX_PATH_DEPTH);
+
+ ret = ocfs2_malloc0(sizeof(*path), &path);
+ if (path) {
+ path->p_tree_depth = root_el->l_tree_depth;
+ path->p_node[0].blkno = di->i_blkno;
+ path->p_node[0].buf = buf;
+ path->p_node[0].el = root_el;
+ }
+
+ return path;
+}
+
+/*
+ * Allocate and initialize a new path based on a disk inode tree.
+ */
+static struct ocfs2_path *ocfs2_new_inode_path(ocfs2_filesys *fs,
+ struct ocfs2_dinode *di)
+{
+ struct ocfs2_extent_list *el = &di->id2.i_list;
+
+ return ocfs2_new_path(fs, (char *)di, el);
+}
+
+/* Write all the extent block information to the disk.
+ * We write all paths furthur down than subtree_index.
+ * The caller will handle writing the sub_index.
+ */
+static errcode_t ocfs2_write_path_eb(ocfs2_filesys *fs,
+ struct ocfs2_path *path, int sub_index)
+{
errcode_t ret;
- char *buf;
- struct ocfs2_extent_block *last_eb;
+ int i;
- if (!ctxt->di->i_last_eb_blk)
- return OCFS2_ET_INTERNAL_FAILURE;
+ for (i = path->p_tree_depth; i > sub_index; i--) {
+ ret = ocfs2_write_extent_block(fs,
+ path->p_node[i].blkno,
+ path->p_node[i].buf);
+ if (ret)
+ return ret;
+ }
- ret = ocfs2_malloc_block(ctxt->fs->fs_io, &buf);
- if (ret)
- return ret;
+ return 0;
+}
- ret = ocfs2_read_extent_block(ctxt->fs, ctxt->di->i_last_eb_blk,
- buf);
- if (ret)
- goto out;
+/* some extent blocks is modified and we need to synchronize them to the disk
+ * accordingly.
+ *
+ * We will update the inode only if the subtree_index indicates the inode is
+ * the sub_root for the change.
+ */
+static errcode_t ocfs2_sync_path_to_disk(ocfs2_filesys *fs,
+ struct ocfs2_path *left_path,
+ struct ocfs2_path *right_path,
+ int subtree_index)
+{
+ errcode_t ret;
+ uint64_t blkno = right_path->p_node[subtree_index].blkno;
+ char *sub_root = right_path->p_node[subtree_index].buf;
- last_eb = (struct ocfs2_extent_block *)buf;
- last_eb->h_next_leaf_blk = eb->h_blkno;
+ assert(right_path);
- ret = ocfs2_write_extent_block(ctxt->fs, last_eb->h_blkno,
- buf);
+ if (left_path) {
+ ret = ocfs2_write_path_eb(fs, left_path, subtree_index);
+ if (ret)
+ goto bail;
+ }
+
+ ret = ocfs2_write_path_eb(fs, right_path, subtree_index);
if (ret)
- goto out;
+ goto bail;
- /* This is written at the end by insert_extent() */
- ctxt->di->i_last_eb_blk = eb->h_blkno;
+ if (subtree_index) {
+ /* subtree_index indicates an extent block. */
+ ret = ocfs2_write_extent_block(fs, blkno, sub_root);
+ if (ret)
+ goto bail;
+ } else {
+ /* here the subtree_index indicates the inode, so have to
+ * update the extent list in the ocfs2_dionde accordingly.
+ */
+ ret = ocfs2_write_inode(fs, blkno, sub_root);
+ if (ret)
+ goto bail;
+ }
+bail:
+ return ret;
+}
-out:
- ocfs2_free(&buf);
+enum ocfs2_contig_type {
+ CONTIG_NONE = 0,
+ CONTIG_LEFT,
+ CONTIG_RIGHT
+};
- return ret;
+static inline int ocfs2_block_extent_contig(ocfs2_filesys *fs,
+ struct ocfs2_extent_rec *ext,
+ uint64_t blkno)
+{
+ return blkno == (ext->e_blkno +
+ ocfs2_clusters_to_blocks(fs, ext->e_clusters));
}
+static inline int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
+ struct ocfs2_extent_rec *right)
+{
+ return (left->e_cpos + left->e_clusters == right->e_cpos);
+}
+
+static enum ocfs2_contig_type
+ ocfs2_extent_contig(ocfs2_filesys *fs,
+ struct ocfs2_extent_rec *ext,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ uint64_t blkno = insert_rec->e_blkno;
+
+ if (ocfs2_extents_adjacent(ext, insert_rec) &&
+ ocfs2_block_extent_contig(fs, ext, blkno))
+ return CONTIG_RIGHT;
+
+ blkno = ext->e_blkno;
+ if (ocfs2_extents_adjacent(insert_rec, ext) &&
+ ocfs2_block_extent_contig(fs, insert_rec, blkno))
+ return CONTIG_LEFT;
+
+ return CONTIG_NONE;
+}
+
/*
- * Add a child extent_block to a non-leaf extent list.
+ * NOTE: We can have pretty much any combination of contiguousness and
+ * appending.
+ *
+ * The usefulness of APPEND_TAIL is more in that it lets us know that
+ * we'll have to update the path to that leaf.
*/
-static errcode_t append_eb(struct insert_ctxt *ctxt,
- struct ocfs2_extent_list *el)
+enum ocfs2_append_type {
+ APPEND_NONE = 0,
+ APPEND_TAIL,
+};
+
+struct ocfs2_insert_type {
+ enum ocfs2_append_type ins_appending;
+ enum ocfs2_contig_type ins_contig;
+ int ins_contig_index;
+ int ins_free_records;
+ int ins_tree_depth;
+};
+
+/*
+ * Helper function for ocfs2_add_branch() and shift_tree_depth().
+ *
+ * Returns the sum of the rightmost extent rec logical offset and
+ * cluster count.
+ *
+ * ocfs2_add_branch() uses this to determine what logical cluster
+ * value should be populated into the leftmost new branch records.
+ *
+ * shift_tree_depth() uses this to determine the # clusters
+ * value for the new topmost tree record.
+ */
+static inline uint32_t ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
{
+ int i = el->l_next_free_rec - 1;
+
+ return el->l_recs[i].e_cpos + el->l_recs[i].e_clusters;
+}
+
+/*
+ * Add an entire tree branch to our inode. eb_buf is the extent block
+ * to start at, if we don't want to start the branch at the dinode
+ * structure.
+ *
+ * last_eb_bh is required as we have to update it's next_leaf pointer
+ * for the new last extent block.
+ *
+ * the new branch will be 'empty' in the sense that every block will
+ * contain a single record with e_clusters == 0.
+ */
+static int ocfs2_add_branch(ocfs2_filesys *fs,
+ struct ocfs2_dinode *fe,
+ char *eb_buf,
+ char *last_eb_buf)
+{
errcode_t ret;
- char *buf;
- uint64_t blkno;
+ int new_blocks, i;
+ uint64_t next_blkno, new_last_eb_blk;
struct ocfs2_extent_block *eb;
- struct ocfs2_extent_rec *rec;
+ struct ocfs2_extent_list *eb_el;
+ struct ocfs2_extent_list *el;
+ uint32_t new_cpos;
+ uint64_t *new_blknos = NULL;
+ char **new_eb_bufs = NULL;
+ char *buf = NULL;
- ret = ocfs2_malloc_block(ctxt->fs->fs_io, &buf);
- if (ret)
- return ret;
+ assert(last_eb_buf);
- ret = ocfs2_new_extent_block(ctxt->fs, &blkno);
+ if (eb_buf) {
+ eb = (struct ocfs2_extent_block *) eb_buf;
+ el = &eb->h_list;
+ } else
+ el = &fe->id2.i_list;
+
+ /* we never add a branch to a leaf. */
+ assert(el->l_tree_depth);
+
+ new_blocks = el->l_tree_depth;
+
+ /* allocate the number of new eb blocks we need new_blocks should be
+ * allocated here.*/
+ ret = ocfs2_malloc0(sizeof(uint64_t) * new_blocks, &new_blknos);
if (ret)
- goto out;
+ goto bail;
+ memset(new_blknos, 0, sizeof(uint64_t) * new_blocks);
- ret = ocfs2_read_extent_block(ctxt->fs, blkno, buf);
+ ret = ocfs2_malloc0(sizeof(char *) * new_blocks, &new_eb_bufs);
if (ret)
- goto out;
+ goto bail;
+ memset(new_eb_bufs, 0, sizeof(char *) * new_blocks);
- eb = (struct ocfs2_extent_block *)buf;
- eb->h_list.l_tree_depth = el->l_tree_depth - 1;
+ for (i = 0; i < new_blocks; i++) {
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ return ret;
+ new_eb_bufs[i] = buf;
- if (!eb->h_list.l_tree_depth) {
- ret = update_last_eb_blk(ctxt, eb);
+ ret = ocfs2_new_extent_block(fs, &new_blknos[i]);
if (ret)
- goto out;
+ goto bail;
+
+ ret = ocfs2_read_extent_block(fs, new_blknos[i], buf);
+ if (ret)
+ goto bail;
}
- if (el->l_next_free_rec) {
- rec = &el->l_recs[el->l_next_free_rec - 1];
- if (!rec->e_blkno) {
- rec->e_blkno = blkno;
- goto out;
- }
+ eb = (struct ocfs2_extent_block *)last_eb_buf;
+ new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
+
+ /* Note: new_eb_bufs[new_blocks - 1] is the guy which will be
+ * linked with the rest of the tree.
+ * conversly, new_eb_bufs[0] is the new bottommost leaf.
+ *
+ * when we leave the loop, new_last_eb_blk will point to the
+ * newest leaf, and next_blkno will point to the topmost extent
+ * block.
+ */
+ next_blkno = new_last_eb_blk = 0;
+ for(i = 0; i < new_blocks; i++) {
+ buf = new_eb_bufs[i];
+ eb = (struct ocfs2_extent_block *) buf;
+ eb_el = &eb->h_list;
+
+ eb->h_next_leaf_blk = 0;
+ eb_el->l_tree_depth = i;
+ eb_el->l_next_free_rec = 1;
+ /*
+ * This actually counts as an empty extent as
+ * c_clusters == 0
+ */
+ eb_el->l_recs[0].e_cpos = new_cpos;
+ eb_el->l_recs[0].e_blkno = next_blkno;
+ eb_el->l_recs[0].e_clusters = 0;
+ if (!eb_el->l_tree_depth)
+ new_last_eb_blk = eb->h_blkno;
+
+ next_blkno = eb->h_blkno;
}
- rec = &el->l_recs[el->l_next_free_rec];
- rec->e_blkno = blkno;
- rec->e_cpos = ctxt->rec.e_cpos;
+
+ /* Link the new branch into the rest of the tree (el will
+ * either be on the fe, or the extent block passed in.
+ */
+ i = el->l_next_free_rec;
+ el->l_recs[i].e_blkno = next_blkno;
+ el->l_recs[i].e_cpos = new_cpos;
+ el->l_recs[i].e_clusters = 0;
el->l_next_free_rec++;
-out:
- ocfs2_free(&buf);
+ /* fe needs a new last extent block pointer, as does the
+ * next_leaf on the previously last-extent-block.
+ */
+ fe->i_last_eb_blk = new_last_eb_blk;
+ eb = (struct ocfs2_extent_block *) last_eb_buf;
+ eb->h_next_leaf_blk = new_last_eb_blk;
+
+ /* here all the extent block and the new inode information should be
+ * written back to the disk.
+ */
+ for(i = 0; i < new_blocks; i++) {
+ buf = new_eb_bufs[i];
+ ret = ocfs2_write_extent_block(fs, new_blknos[i], buf);
+ if (ret)
+ goto bail;
+ }
+
+ if (eb_buf) {
+ eb = (struct ocfs2_extent_block *)eb_buf;
+ ret = ocfs2_write_extent_block(fs, eb->h_blkno, eb_buf);
+ if (ret)
+ goto bail;
+ }
+
+ ret = ocfs2_write_inode(fs, fe->i_blkno, (char *)fe);
+ if (ret)
+ goto bail;
+
+ ret = 0;
+bail:
+ if (new_eb_bufs) {
+ for (i = 0; i < new_blocks; i++)
+ if (new_eb_bufs[i])
+ ocfs2_free(&new_eb_bufs[i]);
+ ocfs2_free(&new_eb_bufs);
+ }
+
+ if (ret && new_blknos)
+ for (i = 0; i < new_blocks; i++)
+ if (new_blknos[i])
+ ocfs2_delete_extent_block(fs, new_blknos[i]);
+
+ if (new_blknos)
+ ocfs2_free(&new_blknos);
+
return ret;
}
/*
- * Insert a new extent into an extent list. If this list is a leaf,
- * add it where appropriate. Otherwise, recurse down the appropriate
- * branch, updating this list on the way back up.
+ * Should only be called when there is no space left in any of the
+ * leaf nodes. What we want to do is find the lowest tree depth
+ * non-leaf extent block with room for new records. There are three
+ * valid results of this search:
+ *
+ * 1) a lowest extent block is found, then we pass it back in
+ * *lowest_eb_bh and return '0'
+ *
+ * 2) the search fails to find anything, but the dinode has room. We
+ * pass NULL back in *lowest_eb_bh, but still return '0'
+ *
+ * 3) the search fails to find anything AND the dinode is full, in
+ * which case we return > 0
+ *
+ * return status < 0 indicates an error.
*/
-static errcode_t insert_extent_el(struct insert_ctxt *ctxt,
- struct ocfs2_extent_list *el)
+static errcode_t ocfs2_find_branch_target(ocfs2_filesys *fs,
+ struct ocfs2_dinode *fe,
+ char **target_buf)
{
- errcode_t ret;
- struct ocfs2_extent_rec *rec = NULL;
+ errcode_t ret = 0;
+ int i;
+ uint64_t blkno;
+ struct ocfs2_extent_block *eb;
+ struct ocfs2_extent_list *el;
+ char *buf = NULL, *lowest_buf = NULL;
- if (!el->l_tree_depth) {
- /* A leaf extent_list can do one of three things: */
- if (el->l_next_free_rec) {
- /* It has at least one valid entry and... */
- rec = &el->l_recs[el->l_next_free_rec - 1];
+ *target_buf = NULL;
- /* (1) That entry is contiguous with the new
- * one, so just enlarge the entry. */
- if ((rec->e_blkno +
- ocfs2_clusters_to_blocks(ctxt->fs, rec->e_clusters)) ==
- ctxt->rec.e_blkno) {
- rec->e_clusters += ctxt->rec.e_clusters;
- return 0;
- }
+ el = &fe->id2.i_list;
- /* (2) That entry is zero length, so just fill
- * it in with the new one. */
- if (!rec->e_clusters) {
- *rec = ctxt->rec;
- return 0;
- }
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ return ret;
- if (el->l_next_free_rec == el->l_count)
- return OCFS2_ET_NO_SPACE;
+ while(el->l_tree_depth > 1) {
+ if (el->l_next_free_rec == 0) {
+ fprintf(stderr, "Dinode %"PRIu64" has empty "
+ "extent list (next_free_rec == 0)",
+ fe->i_blkno);
+ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+ goto bail;
}
+ i = el->l_next_free_rec - 1;
+ blkno = el->l_recs[i].e_blkno;
+ if (!blkno) {
+ fprintf(stderr,"Dinode %"PRIu64" has extent "
+ "list where extent # %d has no physical "
+ "block start", fe->i_blkno, i);
+ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+ goto bail;
+ }
- /* (3) The new entry can't use an existing slot, so
- * put it in a new slot. */
- rec = &el->l_recs[el->l_next_free_rec];
- *rec = ctxt->rec;
- el->l_next_free_rec++;
- return 0;
+ ret = ocfs2_read_extent_block(fs, blkno, buf);
+ if (ret)
+ goto bail;
+
+ eb = (struct ocfs2_extent_block *) buf;
+ el = &eb->h_list;
+
+ if (el->l_next_free_rec < el->l_count)
+ lowest_buf = buf;
}
- /* We're a branch node */
- ret = OCFS2_ET_NO_SPACE;
- if (el->l_next_free_rec) {
- /* If there exists a valid record, and it is not an
- * empty record (e_blkno points to a valid child),
- * try to fill along that branch. */
- rec = &el->l_recs[el->l_next_free_rec - 1];
- if (rec->e_blkno)
- ret = insert_extent_eb(ctxt, rec->e_blkno);
+ /* If we didn't find one and the fe doesn't have any room,
+ * then return '1' */
+ if (!lowest_buf
+ && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
+ ret = 1;
+
+ *target_buf = lowest_buf;
+bail:
+ if (buf && !*target_buf)
+ ocfs2_free(&buf);
+
+ return ret;
+}
+
+static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
+{
+ return !rec->e_clusters;
+}
+
+/*
+ * This function will discard the rightmost extent record.
+ */
+static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
+{
+ int next_free = el->l_next_free_rec;
+ int count = el->l_count;
+ unsigned int num_bytes;
+
+ assert(next_free);
+ /* This will cause us to go off the end of our extent list. */
+ assert(next_free < count);
+
+ num_bytes = sizeof(struct ocfs2_extent_rec) * next_free;
+
+ memmove(&el->l_recs[1], &el->l_recs[0], num_bytes);
+}
+
+static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ int i, insert_index, next_free, has_empty, num_bytes;
+ uint32_t insert_cpos = insert_rec->e_cpos;
+ struct ocfs2_extent_rec *rec;
+
+ next_free = el->l_next_free_rec;
+ has_empty = ocfs2_is_empty_extent(&el->l_recs[0]);
+
+ assert(next_free);
+
+ /* The tree code before us didn't allow enough room in the leaf. */
+ if (el->l_next_free_rec == el->l_count && !has_empty)
+ assert(0);
+
+ /*
+ * The easiest way to approach this is to just remove the
+ * empty extent and temporarily decrement next_free.
+ */
+ if (has_empty) {
+ /*
+ * If next_free was 1 (only an empty extent), this
+ * loop won't execute, which is fine. We still want
+ * the decrement above to happen.
+ */
+ for(i = 0; i < (next_free - 1); i++)
+ el->l_recs[i] = el->l_recs[i+1];
+
+ next_free--;
}
- if (ret) {
- if (ret != OCFS2_ET_NO_SPACE)
- return ret;
-
- if ((el->l_next_free_rec == el->l_count) &&
- (el->l_recs[el->l_next_free_rec - 1].e_blkno))
- return OCFS2_ET_NO_SPACE;
- /* If there wasn't an existing child we insert to and
- * there are free slots, add a new child. */
- ret = append_eb(ctxt, el);
+ /* Figure out what the new record index should be. */
+ for(i = 0; i < next_free; i++) {
+ rec = &el->l_recs[i];
+
+ if (insert_cpos < le32_to_cpu(rec->e_cpos))
+ break;
+ }
+ insert_index = i;
+
+ assert(insert_index >= 0);
+ assert(insert_index < el->l_count);
+ assert(insert_index <= next_free);
+
+ /* No need to memmove if we're just adding to the tail. */
+ if (insert_index != next_free) {
+ assert(next_free < el->l_count);
+
+ num_bytes = next_free - insert_index;
+ num_bytes *= sizeof(struct ocfs2_extent_rec);
+ memmove(&el->l_recs[insert_index + 1],
+ &el->l_recs[insert_index],
+ num_bytes);
+ }
+
+ /*
+ * Either we had an empty extent, and need to re-increment or
+ * there was no empty extent on a non full rightmost leaf node,
+ * in which case we still need to increment.
+ */
+ next_free++;
+ el->l_next_free_rec = next_free;
+ /* Make sure none of the math above just messed up our tree. */
+ assert(el->l_next_free_rec <= el->l_count);
+
+ el->l_recs[insert_index] = *insert_rec;
+}
+
+/*
+ * Create an empty extent record .
+ *
+ * l_next_free_rec may be updated.
+ *
+ * If an empty extent already exists do nothing.
+ */
+static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el)
+{
+ int next_free = el->l_next_free_rec;
+
+ if (next_free == 0)
+ goto set_and_inc;
+
+ if (ocfs2_is_empty_extent(&el->l_recs[0]))
+ return;
+
+ ocfs2_shift_records_right(el);
+
+set_and_inc:
+ el->l_next_free_rec += 1;
+ memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
+}
+
+/*
+ * For a rotation which involves two leaf nodes, the "root node" is
+ * the lowest level tree node which contains a path to both leafs. This
+ * resulting set of information can be used to form a complete "subtree"
+ *
+ * This function is passed two full paths from the dinode down to a
+ * pair of adjacent leaves. It's task is to figure out which path
+ * index contains the subtree root - this can be the root index itself
+ * in a worst-case rotation.
+ *
+ * The array index of the subtree root is passed back.
+ */
+static int ocfs2_find_subtree_root(struct ocfs2_path *left,
+ struct ocfs2_path *right)
+{
+ int i = 0;
+
+ /* Check that the caller passed in two paths from the same tree. */
+ assert(path_root_blkno(left) == path_root_blkno(right));
+
+ do {
+ i++;
+
+ /* The caller didn't pass two adjacent paths. */
+ assert(i <= left->p_tree_depth);
+ } while (left->p_node[i].blkno == right->p_node[i].blkno);
+
+ return i - 1;
+}
+
+typedef void (path_insert_t)(void *, char *);
+
+/*
+ * Traverse a btree path in search of cpos, starting at root_el.
+ *
+ * This code can be called with a cpos larger than the tree, in which
+ * case it will return the rightmost path.
+ */
+static errcode_t __ocfs2_find_path(ocfs2_filesys *fs,
+ struct ocfs2_extent_list *root_el,
+ uint32_t cpos,
+ path_insert_t *func,
+ void *data)
+{
+ int i, ret = 0;
+ uint32_t range;
+ uint64_t blkno;
+ char *buf = NULL;
+ struct ocfs2_extent_block *eb;
+ struct ocfs2_extent_list *el;
+ struct ocfs2_extent_rec *rec;
+
+ el = root_el;
+ while (el->l_tree_depth) {
+ for(i = 0; i < el->l_next_free_rec - 1; i++) {
+ rec = &el->l_recs[i];
+
+ /*
+ * In the case that cpos is off the allocation
+ * tree, this should just wind up returning the
+ * rightmost record.
+ */
+ range = rec->e_cpos + rec->e_clusters;
+ if (cpos >= rec->e_cpos && cpos < range)
+ break;
+ }
+
+ blkno = el->l_recs[i].e_blkno;
+ assert(blkno);
+
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
if (ret)
return ret;
- /* append_eb() put a new record here, insert on it.
- * If the new child isn't a leaf, this recursion
- * will do the append_eb() again, all the way down to
- * the leaf. */
- rec = &el->l_recs[el->l_next_free_rec - 1];
- ret = insert_extent_eb(ctxt, rec->e_blkno);
+ ret = ocfs2_read_extent_block(fs, blkno, buf);
if (ret)
- return ret;
+ goto out;
+
+ eb = (struct ocfs2_extent_block *) buf;
+ el = &eb->h_list;
+
+ if (func)
+ func(data, buf);
+ buf = NULL;
}
- /* insert_extent_eb() doesn't update e_clusters so that
- * all updates are on the path up, not the path down. Do the
- * update now. */
- rec->e_clusters += ctxt->rec.e_clusters;
- return 0;
+out:
+ /* Catch any trailing buf that the loop didn't handle. */
+ if (buf)
+ ocfs2_free(&buf);
+
+ return ret;
}
/*
- * Insert a new extent into this extent_block. That means
- * reading the block, calling insert_extent_el() on the contained
- * extent list, and then writing out the updated block.
+ * Given an initialized path (that is, it has a valid root extent
+ * list), this function will traverse the btree in search of the path
+ * which would contain cpos.
+ *
+ * The path traveled is recorded in the path structure.
+ *
+ * Note that this will not do any comparisons on leaf node extent
+ * records, so it will work fine in the case that we just added a tree
+ * branch.
*/
-static errcode_t insert_extent_eb(struct insert_ctxt *ctxt,
- uint64_t eb_blkno)
+struct find_path_data {
+ int index;
+ struct ocfs2_path *path;
+};
+
+static void find_path_ins(void *data, char *eb)
{
+ struct find_path_data *fp = data;
+
+ ocfs2_path_insert_eb(fp->path, fp->index, eb);
+ fp->index++;
+}
+
+static int ocfs2_find_path(ocfs2_filesys *fs, struct ocfs2_path *path,
+ uint32_t cpos)
+{
+ struct find_path_data data;
+
+ data.index = 1;
+ data.path = path;
+ return __ocfs2_find_path(fs, path_root_el(path), cpos,
+ find_path_ins, &data);
+}
+
+/*
+ * Adjust the adjacent records (left_rec, right_rec) involved in a rotation.
+ *
+ * Basically, we've moved stuff around at the bottom of the tree and
+ * we need to fix up the extent records above the changes to reflect
+ * the new changes.
+ *
+ * left_rec: the record on the left.
+ * left_child_el: is the child list pointed to by left_rec
+ * right_rec: the record to the right of left_rec
+ * right_child_el: is the child list pointed to by right_rec
+ *
+ * By definition, this only works on interior nodes.
+ */
+static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
+ struct ocfs2_extent_list *left_child_el,
+ struct ocfs2_extent_rec *right_rec,
+ struct ocfs2_extent_list *right_child_el)
+{
+ uint32_t left_clusters, right_end;
+
+ /*
+ * Interior nodes never have holes. Their cpos is the cpos of
+ * the leftmost record in their child list. Their cluster
+ * count covers the full theoretical range of their child list
+ * - the range between their cpos and the cpos of the record
+ * immediately to their right.
+ */
+ left_clusters = right_child_el->l_recs[0].e_cpos;
+ left_clusters -= left_rec->e_cpos;
+ left_rec->e_clusters = left_clusters;
+
+ /*
+ * Calculate the rightmost cluster count boundary before
+ * moving cpos - we will need to adjust e_clusters after
+ * updating e_cpos to keep the same highest cluster count.
+ */
+ right_end = right_rec->e_cpos;
+ right_end += right_rec->e_clusters;
+
+ right_rec->e_cpos = left_rec->e_cpos;
+ right_rec->e_cpos += left_clusters;
+
+ right_end -= right_rec->e_cpos;
+ right_rec->e_clusters = right_end;
+}
+
+/*
+ * Adjust the adjacent root node records involved in a
+ * rotation. left_el_blkno is passed in as a key so that we can easily
+ * find it's index in the root list.
+ */
+static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
+ struct ocfs2_extent_list *left_el,
+ struct ocfs2_extent_list *right_el,
+ uint64_t left_el_blkno)
+{
+ int i;
+
+ assert(root_el->l_tree_depth > left_el->l_tree_depth);
+
+ for(i = 0; i < root_el->l_next_free_rec - 1; i++) {
+ if (root_el->l_recs[i].e_blkno == left_el_blkno)
+ break;
+ }
+
+ /*
+ * The path walking code should have never returned a root and
+ * two paths which are not adjacent.
+ */
+ assert(i < (root_el->l_next_free_rec - 1));
+
+ ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el,
+ &root_el->l_recs[i + 1], right_el);
+}
+
+/*
+ * We've changed a leaf block (in right_path) and need to reflect that
+ * change back up the subtree.
+ *
+ * This happens in two places:
+ * - When we've moved an extent record from the left path leaf to the right
+ * path leaf to make room for an empty extent in the left path leaf.
+ * - When we've inserted a new record into the right path leaf.
+ */
+static void ocfs2_complete_rotation(ocfs2_filesys *fs,
+ struct ocfs2_path *left_path,
+ struct ocfs2_path *right_path,
+ int subtree_index)
+{
+ int i, idx;
+ uint64_t blkno;
+ struct ocfs2_extent_list *el, *left_el, *right_el;
+ struct ocfs2_extent_rec *left_rec, *right_rec;
+
+ /*
+ * Update the counts and position values within all the
+ * interior nodes to reflect the leaf rotation we just did.
+ *
+ * The root node is handled below the loop.
+ *
+ * We begin the loop with right_el and left_el pointing to the
+ * leaf lists and work our way up.
+ *
+ * NOTE: within this loop, left_el and right_el always refer
+ * to the *child* lists.
+ */
+ left_el = path_leaf_el(left_path);
+ right_el = path_leaf_el(right_path);
+ for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
+
+ /*
+ * One nice property of knowing that all of these
+ * nodes are below the root is that we only deal with
+ * the leftmost right node record and the rightmost
+ * left node record.
+ */
+ el = left_path->p_node[i].el;
+ idx = left_el->l_next_free_rec - 1;
+ left_rec = &el->l_recs[idx];
+
+ el = right_path->p_node[i].el;
+ right_rec = &el->l_recs[0];
+
+ ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
+ right_el);
+
+ /*
+ * Setup our list pointers now so that the current
+ * parents become children in the next iteration.
+ */
+ left_el = left_path->p_node[i].el;
+ right_el = right_path->p_node[i].el;
+ }
+
+ /*
+ * At the root node, adjust the two adjacent records which
+ * begin our path to the leaves.
+ */
+
+ el = left_path->p_node[subtree_index].el;
+ left_el = left_path->p_node[subtree_index + 1].el;
+ right_el = right_path->p_node[subtree_index + 1].el;
+ blkno = left_path->p_node[subtree_index + 1].blkno;
+
+ ocfs2_adjust_root_records(el, left_el, right_el, blkno);
+
+ /* ocfs2_adjust_root_records only update the extent block in the left
+ * path, and actually right_path->p_node[subtree_index].eb indicates the
+ * same extent block, so we must keep them the same content.
+ */
+ memcpy(right_path->p_node[subtree_index].buf,
+ left_path->p_node[subtree_index].buf, fs->fs_blocksize);
+}
+
+/* Rotate the subtree to right.
+ *
+ * Note: After successful rotation, the extent block will be flashed
+ * to disk accordingly.
+ */
+static errcode_t ocfs2_rotate_subtree_right(ocfs2_filesys *fs,
+ struct ocfs2_path *left_path,
+ struct ocfs2_path *right_path,
+ int subtree_index)
+{
errcode_t ret;
- char *buf;
+ int i;
+ char *right_leaf_eb;
+ char *left_leaf_eb = NULL;
+ struct ocfs2_extent_list *right_el, *left_el;
+ struct ocfs2_extent_rec move_rec;
struct ocfs2_extent_block *eb;
- ret = ocfs2_malloc_block(ctxt->fs->fs_io, &buf);
+ left_leaf_eb = path_leaf_buf(left_path);
+ eb = (struct ocfs2_extent_block *)left_leaf_eb;
+ left_el = path_leaf_el(left_path);
+
+ if (left_el->l_next_free_rec != left_el->l_count) {
+ fprintf(stderr,"Inode %"PRIu64" has non-full interior leaf node"
+ " %"PRIu64"(next free = %u)",
+ path_root_blkno(left_path), eb->h_blkno,
+ left_el->l_next_free_rec);
+ return OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+ }
+
+ /*
+ * This extent block may already have an empty record, so we
+ * return early if so.
+ */
+ if (ocfs2_is_empty_extent(&left_el->l_recs[0]))
+ return 0;
+
+ assert(left_path->p_node[subtree_index].blkno ==
+ right_path->p_node[subtree_index].blkno);
+
+ right_leaf_eb = path_leaf_buf(right_path);
+ right_el = path_leaf_el(right_path);
+
+ ocfs2_create_empty_extent(right_el);
+
+ /* Do the copy now. */
+ i = left_el->l_next_free_rec - 1;
+ move_rec = left_el->l_recs[i];
+ right_el->l_recs[0] = move_rec;
+
+ /*
+ * Clear out the record we just copied and shift everything
+ * over, leaving an empty extent in the left leaf.
+ *
+ * We temporarily subtract from next_free_rec so that the
+ * shift will lose the tail record (which is now defunct).
+ */
+ left_el->l_next_free_rec -= 1;
+ ocfs2_shift_records_right(left_el);
+ memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
+ left_el->l_next_free_rec += 1;
+
+ ocfs2_complete_rotation(fs, left_path, right_path, subtree_index);
+
+ ret = ocfs2_sync_path_to_disk(fs, left_path, right_path, subtree_index);
+
+ return ret;
+}
+
+/*
+ * Given a full path, determine what cpos value would return us a path
+ * containing the leaf immediately to the left of the current one.
+ *
+ * Will return zero if the path passed in is already the leftmost path.
+ */
+static int ocfs2_find_cpos_for_left_leaf(struct ocfs2_path *path,
+ uint32_t *cpos)
+{
+ int i, j, ret = 0;
+ uint64_t blkno;
+ struct ocfs2_extent_list *el;
+
+ *cpos = 0;
+
+ blkno = path_leaf_blkno(path);
+
+ /* Start at the tree node just above the leaf and work our way up. */
+ i = path->p_tree_depth - 1;
+ while (i >= 0) {
+ el = path->p_node[i].el;
+
+ /* Find the extent record just before the one in our path. */
+ for(j = 0; j < el->l_next_free_rec; j++) {
+ if (el->l_recs[j].e_blkno == blkno) {
+ if (j == 0) {
+ if (i == 0) {
+ /*
+ * We've determined that the
+ * path specified is already
+ * the leftmost one - return a
+ * cpos of zero.
+ */
+ goto out;
+ }
+ /*
+ * The leftmost record points to our
+ * leaf - we need to travel up the
+ * tree one level.
+ */
+ goto next_node;
+ }
+
+ *cpos = el->l_recs[j - 1].e_cpos;
+ *cpos = *cpos + el->l_recs[j - 1].e_clusters - 1;
+ goto out;
+ }
+ }
+
+ /*
+ * If we got here, we never found a valid node where
+ * the tree indicated one should be.
+ */
+ fprintf(stderr, "Invalid extent tree at extent block"
+ " %"PRIu64"\n", blkno);
+ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+ goto out;
+
+next_node:
+ blkno = path->p_node[i].blkno;
+ i--;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Trap the case where we're inserting into the theoretical range past
+ * the _actual_ left leaf range. Otherwise, we'll rotate a record
+ * whose cpos is less than ours into the right leaf.
+ *
+ * It's only necessary to look at the rightmost record of the left
+ * leaf because the logic that calls us should ensure that the
+ * theoretical ranges in the path components above the leaves are
+ * correct.
+ */
+static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
+ uint32_t insert_cpos)
+{
+ struct ocfs2_extent_list *left_el;
+ struct ocfs2_extent_rec *rec;
+ int next_free;
+
+ left_el = path_leaf_el(left_path);
+ next_free = left_el->l_next_free_rec;
+ rec = &left_el->l_recs[next_free - 1];
+
+ if (insert_cpos > rec->e_cpos)
+ return 1;
+ return 0;
+}
+
+/*
+ * Rotate all the records in a btree right one record, starting at insert_cpos.
+ *
+ * The path to the rightmost leaf should be passed in.
+ *
+ * The array is assumed to be large enough to hold an entire path (tree depth).
+ *
+ * Upon succesful return from this function:
+ *
+ * - The 'right_path' array will contain a path to the leaf block
+ * whose range contains e_cpos.
+ * - That leaf block will have a single empty extent in list index 0.
+ * - In the case that the rotation requires a post-insert update,
+ * *ret_left_path will contain a valid path which can be passed to
+ * ocfs2_insert_path().
+ */
+static int ocfs2_rotate_tree_right(ocfs2_filesys *fs,
+ uint32_t insert_cpos,
+ struct ocfs2_path **ret_left_path,
+ struct ocfs2_path *right_path)
+{
+ int ret, start;
+ uint32_t cpos;
+ struct ocfs2_path *left_path = NULL;
+
+ *ret_left_path = NULL;
+
+ ret = ocfs2_find_cpos_for_left_leaf(right_path, &cpos);
if (ret)
- return ret;
+ goto out;
- ret = ocfs2_read_extent_block(ctxt->fs, eb_blkno, buf);
- if (!ret) {
- eb = (struct ocfs2_extent_block *)buf;
- ret = insert_extent_el(ctxt, &eb->h_list);
+ left_path = ocfs2_new_path(fs, path_root_buf(right_path),
+ path_root_el(right_path));
+ if (!left_path) {
+ ret = OCFS2_ET_NO_MEMORY;
+ goto out;
}
- if (!ret)
- ret = ocfs2_write_extent_block(ctxt->fs, eb_blkno, buf);
+ /*
+ * What we want to do here is:
+ *
+ * 1) Start with the rightmost path.
+ *
+ * 2) Determine a path to the leaf block directly to the left
+ * of that leaf.
+ *
+ * 3) Determine the 'subtree root' - the lowest level tree node
+ * which contains a path to both leaves.
+ *
+ * 4) Rotate the subtree.
+ *
+ * 5) Find the next subtree by considering the left path to be
+ * the new right path.
+ */
+ while (insert_cpos <= cpos) {
+ start = 0;
- ocfs2_free(&buf);
+ ret = ocfs2_find_path(fs, left_path, cpos);
+ if (ret)
+ goto out;
+
+ start = ocfs2_find_subtree_root(left_path, right_path);
+
+ if (ocfs2_rotate_requires_path_adjustment(left_path,
+ insert_cpos)) {
+ /*
+ * We've rotated the tree as much as we
+ * should. The rest is up to
+ * ocfs2_insert_path() to complete, after the
+ * record insertion. We indicate this
+ * situation by returning the left path.
+ *
+ * The reason we don't adjust the records here
+ * before the record insert is that an error
+ * later might break the rule where a parent
+ * record e_cpos will reflect the actual
+ * e_cpos of the 1st nonempty record of the
+ * child list.
+ */
+ *ret_left_path = left_path;
+ goto out_ret_path;
+ }
+
+ ret = ocfs2_rotate_subtree_right(fs, left_path, right_path,
+ start);
+ if (ret)
+ goto out;
+
+ /*
+ * There is no need to re-read the next right path
+ * as we know that it'll be our current left
+ * path. Optimize by copying values instead.
+ */
+ ocfs2_mv_path(right_path, left_path);
+
+ ret = ocfs2_find_cpos_for_left_leaf(right_path, &cpos);
+ if (ret)
+ goto out;
+ }
+
+out:
+ ocfs2_free_path(left_path);
+
+out_ret_path:
return ret;
}
@@ -253,13 +1245,15 @@
* copying all extent records from the dinode into the extent block,
* and then pointing the dinode to the new extent_block.
*/
-static errcode_t shift_tree_depth(struct insert_ctxt *ctxt)
+static errcode_t shift_tree_depth(struct insert_ctxt *ctxt, char **last_eb)
{
errcode_t ret;
- char *buf;
+ char *buf = NULL;
uint64_t blkno;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
+ uint32_t new_clusters;
+ int i;
el = &ctxt->di->id2.i_list;
if (el->l_next_free_rec != el->l_count)
@@ -283,66 +1277,520 @@
memcpy(eb->h_list.l_recs, el->l_recs,
sizeof(struct ocfs2_extent_rec) * el->l_count);
+ new_clusters = ocfs2_sum_rightmost_rec(&eb->h_list);
+
el->l_tree_depth++;
memset(el->l_recs, 0,
sizeof(struct ocfs2_extent_rec) * el->l_count);
el->l_recs[0].e_cpos = 0;
el->l_recs[0].e_blkno = blkno;
- el->l_recs[0].e_clusters = ctxt->di->i_clusters;
+ el->l_recs[0].e_clusters = new_clusters;
+ for(i = 1; i < el->l_next_free_rec; i++) {
+ el->l_recs[i].e_cpos = 0;
+ el->l_recs[i].e_clusters = 0;
+ el->l_recs[i].e_blkno = 0;
+ }
el->l_next_free_rec = 1;
if (el->l_tree_depth == 1)
ctxt->di->i_last_eb_blk = blkno;
ret = ocfs2_write_extent_block(ctxt->fs, blkno, buf);
+ if (!ret)
+ *last_eb = buf;
out:
- ocfs2_free(&buf);
+ if (buf && !*last_eb)
+ ocfs2_free(&buf);
return ret;
}
+static void ocfs2_figure_contig_type(ocfs2_filesys *fs,
+ struct ocfs2_insert_type *insert,
+ struct ocfs2_extent_list *el,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ int i;
+ enum ocfs2_contig_type contig_type = CONTIG_NONE;
+
+ for(i = 0; i < el->l_next_free_rec; i++) {
+ contig_type = ocfs2_extent_contig(fs, &el->l_recs[i],
+ insert_rec);
+ if (contig_type != CONTIG_NONE) {
+ insert->ins_contig_index = i;
+ break;
+ }
+ }
+ insert->ins_contig = contig_type;
+}
+
/*
- * Takes a new contiguous extend, defined by (blkno, clusters), and
- * inserts it into the tree of dinode ino. This follows the driver's
- * allocation pattern. It tries to insert on the existing tree, and
- * if that tree is completely full, then shifts the tree depth.
+ * This should only be called against the righmost leaf extent list.
+ *
+ * ocfs2_figure_appending_type() will figure out whether we'll have to
+ * insert at the tail of the rightmost leaf.
+ *
+ * This should also work against the dinode list for tree's with 0
+ * depth. If we consider the dinode list to be the rightmost leaf node
+ * then the logic here makes sense.
*/
-errcode_t ocfs2_insert_extent(ocfs2_filesys *fs, uint64_t ino,
+static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
+ struct ocfs2_extent_list *el,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ int i;
+ uint32_t cpos = insert_rec->e_cpos;
+ struct ocfs2_extent_rec *rec;
+
+ insert->ins_appending = APPEND_NONE;
+
+ assert(!el->l_tree_depth);
+
+ if (!el->l_next_free_rec)
+ goto set_tail_append;
+
+ if (ocfs2_is_empty_extent(&el->l_recs[0])) {
+ /* Were all records empty? */
+ if (el->l_next_free_rec == 1)
+ goto set_tail_append;
+ }
+
+ i = el->l_next_free_rec - 1;
+ rec = &el->l_recs[i];
+
+ if (cpos >= (rec->e_cpos + rec->e_clusters))
+ goto set_tail_append;
+
+ return;
+
+set_tail_append:
+ insert->ins_appending = APPEND_TAIL;
+}
+
+/*
+ * Helper function called at the begining of an insert.
+ *
+ * This computes a few things that are commonly used in the process of
+ * inserting into the btree:
+ * - Whether the new extent is contiguous with an existing one.
+ * - The current tree depth.
+ * - Whether the insert is an appending one.
+ * - The total # of free records in the tree.
+ *
+ * All of the information is stored on the ocfs2_insert_type
+ * structure.
+ */
+static int ocfs2_figure_insert_type(struct insert_ctxt *ctxt,
+ char **last_eb_buf,
+ struct ocfs2_insert_type *insert)
+{
+ int ret;
+ struct ocfs2_extent_block *eb;
+ struct ocfs2_extent_list *el;
+ struct ocfs2_dinode *di = ctxt->di;
+ struct ocfs2_extent_rec *insert_rec = &ctxt->rec;
+ ocfs2_filesys *fs = ctxt->fs;
+ struct ocfs2_path *path = NULL;
+ char *buf = NULL;
+
+ el = &di->id2.i_list;
+ insert->ins_tree_depth = el->l_tree_depth;
+
+ if (el->l_tree_depth) {
+ /*
+ * If we have tree depth, we read in the
+ * rightmost extent block ahead of time as
+ * ocfs2_figure_insert_type() and ocfs2_add_branch()
+ * may want it later.
+ */
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ return ret;
+
+ ret = ocfs2_read_extent_block(fs, di->i_last_eb_blk, buf);
+ if (ret)
+ goto out;
+
+ eb = (struct ocfs2_extent_block *) buf;
+ el = &eb->h_list;
+ }
+ /*
+ * Unless we have a contiguous insert, we'll need to know if
+ * there is room left in our allocation tree for another
+ * extent record.
+ *
+ * XXX: This test is simplistic, we can search for empty
+ * extent records too.
+ */
+ insert->ins_free_records = el->l_count - el->l_next_free_rec;
+
+ if (!insert->ins_tree_depth) {
+ insert->ins_free_records = el->l_count - el->l_next_free_rec;
+ ocfs2_figure_contig_type(fs, insert, el, insert_rec);
+ ocfs2_figure_appending_type(insert, el, insert_rec);
+ return 0;
+ }
+
+ path = ocfs2_new_inode_path(fs, di);
+ if (!path) {
+ ret = OCFS2_ET_NO_MEMORY;
+ goto out;
+ }
+ /*
+ * In the case that we're inserting past what the tree
+ * currently accounts for, ocf2_find_path() will return for
+ * us the rightmost tree path. This is accounted for below in
+ * the appending code.
+ */
+ ret = ocfs2_find_path(fs, path, insert_rec->e_cpos);
+ if (ret)
+ goto out;
+
+ el = path_leaf_el(path);
+
+ /*
+ * Now that we have the path, there's two things we want to determine:
+ * 1) Contiguousness (also set contig_index if this is so)
+ *
+ * 2) Are we doing an append? We can trivially break this up
+ * into two types of appends: simple record append, or a
+ * rotate inside the tail leaf.
+ */
+ ocfs2_figure_contig_type(fs, insert, el, insert_rec);
+
+ /*
+ * Ok, so we can simply compare against last_eb to figure out
+ * whether the path doesn't exist. This will only happen in
+ * the case that we're doing a tail append, so maybe we can
+ * take advantage of that information somehow.
+ */
+ if (di->i_last_eb_blk == path_leaf_blkno(path)) {
+ /*
+ * Ok, ocfs2_find_path() returned us the rightmost
+ * tree path. This might be an appending insert. There are
+ * two cases:
+ * 1) We're doing a true append at the tail:
+ * -This might even be off the end of the leaf
+ * 2) We're "appending" by rotating in the tail
+ */
+ ocfs2_figure_appending_type(insert, el, insert_rec);
+ }
+
+out:
+ ocfs2_free_path(path);
+
+ if (ret == 0)
+ *last_eb_buf = buf;
+ else if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
+/*
+ * Do the final bits of extent record insertion at the target leaf
+ * list. If this leaf is part of an allocation tree, it is assumed
+ * that the tree above has been prepared.
+ */
+static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
+ struct ocfs2_extent_list *el,
+ struct ocfs2_insert_type *insert)
+{
+ int i = insert->ins_contig_index;
+ unsigned int range;
+ struct ocfs2_extent_rec *rec;
+
+ assert(!el->l_tree_depth);
+
+ /*
+ * Contiguous insert - either left or right.
+ */
+ if (insert->ins_contig != CONTIG_NONE) {
+ rec = &el->l_recs[i];
+ if (insert->ins_contig == CONTIG_LEFT) {
+ rec->e_blkno = insert_rec->e_blkno;
+ rec->e_cpos = insert_rec->e_cpos;
+ }
+ rec->e_clusters += insert_rec->e_clusters;
+ return;
+ }
+
+ /*
+ * Handle insert into an empty leaf.
+ */
+ if (el->l_next_free_rec == 0 ||
+ (el->l_next_free_rec == 1 &&
+ ocfs2_is_empty_extent(&el->l_recs[0]))) {
+ el->l_recs[0] = *insert_rec;
+ el->l_next_free_rec = 1;
+ return;
+ }
+
+ /*
+ * Appending insert.
+ */
+ if (insert->ins_appending == APPEND_TAIL) {
+ i = el->l_next_free_rec - 1;
+ rec = &el->l_recs[i];
+ range = rec->e_cpos + rec->e_clusters;
+ assert(insert_rec->e_cpos >= range);
+
+ i++;
+ el->l_recs[i] = *insert_rec;
+ el->l_next_free_rec += 1;
+ return;
+ }
+
+ /*
+ * Ok, we have to rotate.
+ *
+ * At this point, it is safe to assume that inserting into an
+ * empty leaf and appending to a leaf have both been handled
+ * above.
+ *
+ * This leaf needs to have space, either by the empty 1st
+ * extent record, or by virtue of an l_next_rec < l_count.
+ */
+ ocfs2_rotate_leaf(el, insert_rec);
+}
+
+static int ocfs2_append_rec_to_path(struct ocfs2_path *path,
+ struct ocfs2_extent_rec *insert_rec)
+{
+ int ret, i, next_free;
+ struct ocfs2_extent_list *el;
+
+ el = path_root_el(path);
+ i = 0;
+ while (1) {
+ next_free = el->l_next_free_rec;
+ if (next_free == 0) {
+ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+ goto out;
+ }
+
+ el->l_recs[next_free - 1].e_clusters = insert_rec->e_cpos;
+ el->l_recs[next_free - 1].e_clusters += insert_rec->e_clusters;
+ el->l_recs[next_free - 1].e_clusters -=
+ el->l_recs[next_free - 1].e_cpos;
+
+ if (++i >= path->p_tree_depth)
+ break;
+
+ el = path->p_node[i].el;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * This function only does inserts on an allocation b-tree. For dinode
+ * lists, ocfs2_insert_at_leaf() is called directly.
+ *
+ * right_path is the path we want to do the actual insert
+ * in. left_path should only be passed in if we need to update that
+ * portion of the tree after a rotation.
+ */
+static int ocfs2_insert_path(struct insert_ctxt* ctxt,
+ struct ocfs2_path *left_path,
+ struct ocfs2_path *right_path,
+ struct ocfs2_extent_rec *insert_rec,
+ struct ocfs2_insert_type *insert)
+{
+ int ret, subtree_index;
+ struct ocfs2_extent_list *el;
+
+ /*
+ * At this point, only appends that are not left contig
+ * require an update of the full tree path.
+ */
+ if (insert->ins_appending == APPEND_NONE
+ || insert->ins_contig == CONTIG_LEFT)
+ goto out_update_leaf;
+
+ /*
+ * We should only get a left path for some types of rotations.
+ */
+ assert(!left_path);
+
+ ret = ocfs2_append_rec_to_path(right_path, insert_rec);
+ if (ret)
+ goto out;
+
+out_update_leaf:
+ el = path_leaf_el(right_path);
+
+ ocfs2_insert_at_leaf(insert_rec, el, insert);
+
+ if (left_path) {
+ /*
+ * The rotate code has indicated that we need to fix
+ * up portions of the tree after the insert.
+ */
+ subtree_index = ocfs2_find_subtree_root(left_path, right_path);
+ ocfs2_complete_rotation(ctxt->fs, left_path,
+ right_path, subtree_index);
+ } else
+ subtree_index = 0;
+
+ ret = ocfs2_sync_path_to_disk(ctxt->fs, left_path,
+ right_path, subtree_index);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int ocfs2_do_insert_extent(struct insert_ctxt* ctxt,
+ struct ocfs2_insert_type *type)
+{
+ int ret, rotate = 0;
+ uint32_t cpos;
+ struct ocfs2_path *right_path = NULL;
+ struct ocfs2_path *left_path = NULL;
+ struct ocfs2_extent_rec *insert_rec = &ctxt->rec;
+ ocfs2_filesys *fs = ctxt->fs;
+ struct ocfs2_dinode *di = ctxt->di;
+ struct ocfs2_extent_list *el = &di->id2.i_list;
+
+ if (el->l_tree_depth == 0) {
+ ocfs2_insert_at_leaf(insert_rec, el, type);
+ goto out_update_clusters;
+ }
+
+ right_path = ocfs2_new_inode_path(fs, di);
+ if (!right_path) {
+ ret = OCFS2_ET_NO_MEMORY;
+ goto out;
+ }
+
+ /*
+ * Determine the path to start with. Rotations need the
+ * rightmost path, everything else can go directly to the
+ * target leaf.
+ */
+ cpos = insert_rec->e_cpos;
+ if (type->ins_appending == APPEND_NONE &&
+ type->ins_contig == CONTIG_NONE) {
+ rotate = 1;
+ cpos = UINT_MAX;
+ }
+
+ ret = ocfs2_find_path(fs, right_path, cpos);
+ if (ret)
+ goto out;
+
+ if (!rotate)
+ goto out_insert;
+
+ ret = ocfs2_rotate_tree_right(fs, insert_rec->e_cpos,
+ &left_path, right_path);
+ if (ret)
+ goto out;
+
+out_insert:
+ ret = ocfs2_insert_path(ctxt, left_path, right_path, insert_rec, type);
+ if (ret)
+ goto out;
+
+out_update_clusters:
+ di->i_clusters += insert_rec->e_clusters;
+ ret = 0;
+
+out:
+ ocfs2_free_path(left_path);
+ ocfs2_free_path(right_path);
+
+ return ret;
+}
+
+/*
+ * Insert an extent into an inode btree.
+ */
+errcode_t ocfs2_insert_extent(ocfs2_filesys *fs, uint64_t ino, uint32_t cpos,
uint64_t c_blkno, uint32_t clusters)
{
errcode_t ret;
+ int shift;
struct insert_ctxt ctxt;
- char *buf;
+ struct ocfs2_insert_type insert = {0, };
+ char *di_buf = NULL, *last_eb = NULL, *eb_buf = NULL;
- ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
if (ret)
return ret;
ctxt.fs = fs;
- ctxt.di = (struct ocfs2_dinode *)buf;
+ ctxt.di = (struct ocfs2_dinode *)di_buf;
- ret = ocfs2_read_inode(fs, ino, buf);
+ ret = ocfs2_read_inode(fs, ino, di_buf);
if (ret)
- goto out_free_buf;
+ goto bail;
- ctxt.rec.e_cpos = ctxt.di->i_clusters;
+ ctxt.rec.e_cpos = cpos;
ctxt.rec.e_blkno = c_blkno;
ctxt.rec.e_clusters = clusters;
- ret = insert_extent_el(&ctxt, &ctxt.di->id2.i_list);
- if (ret == OCFS2_ET_NO_SPACE) {
- ret = shift_tree_depth(&ctxt);
- if (!ret)
- ret = insert_extent_el(&ctxt,
- &ctxt.di->id2.i_list);
+
+ ret = ocfs2_figure_insert_type(&ctxt,&last_eb, &insert);
+ if (ret)
+ goto bail;
+
+ /*
+ * Avoid growing the tree unless we're out of records and the
+ * insert type requres one.
+ */
+ if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records)
+ goto out_add;
+
+ shift = ocfs2_find_branch_target(fs, ctxt.di, &eb_buf);
+ if (shift < 0) {
+ ret = shift;
+ goto bail;
}
- if (!ret) {
- ctxt.di->i_clusters += clusters;
- ret = ocfs2_write_inode(fs, ino, buf);
+
+ /* We traveled all the way to the bottom of the allocation tree
+ * and didn't find room for any more extents - we need to add
+ * another tree level */
+ if (shift) {
+
+ /* shift_tree_depth will return us a buffer with
+ * the new extent block (so we can pass that to
+ * ocfs2_add_branch). */
+ ret = shift_tree_depth(&ctxt, &eb_buf);
+ if (ret)
+ goto bail;
+
+ insert.ins_tree_depth++;
+
+ if (insert.ins_tree_depth == 1)
+ goto out_add;
}
-out_free_buf:
- ocfs2_free(&buf);
+ /* call ocfs2_add_branch to add the final part of the tree with
+ * the new data. */
+ ret = ocfs2_add_branch(ctxt.fs, ctxt.di, eb_buf, last_eb);
+ if (ret)
+ goto bail;
+out_add:
+ /* Finally, we can add clusters. This might rotate the tree for us. */
+ ret = ocfs2_do_insert_extent(&ctxt, &insert);
+ if (ret)
+ goto bail;
+
+ ret = ocfs2_write_inode(fs, ino, di_buf);
+
+bail:
+ if (eb_buf)
+ ocfs2_free(&eb_buf);
+ if (last_eb)
+ ocfs2_free(&last_eb);
+ if (di_buf)
+ ocfs2_free(&di_buf);
+
return ret;
}
@@ -350,34 +1798,84 @@
uint32_t new_clusters)
{
errcode_t ret = 0;
- uint32_t n_clusters = 0;
- uint64_t blkno;
+ uint32_t n_clusters = 0, cpos;
+ uint64_t blkno, file_size;
+ char *buf = NULL;
+ struct ocfs2_dinode* di = NULL;
if (!(fs->fs_flags & OCFS2_FLAG_RW))
return OCFS2_ET_RO_FILESYS;
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ goto out_free_buf;
+
+ ret = ocfs2_read_inode(fs, ino, buf);
+ if (ret)
+ goto out_free_buf;
+
+ di = (struct ocfs2_dinode *)buf;
+
+ file_size = di->i_size;
+ cpos = (file_size + fs->fs_clustersize - 1) / fs->fs_clustersize;
while (new_clusters) {
n_clusters = 1;
ret = ocfs2_new_clusters(fs, 1, new_clusters, &blkno,
- &n_clusters);
+ &n_clusters);
if (ret)
break;
- ret = ocfs2_insert_extent(fs, ino, blkno, n_clusters);
+ ret = ocfs2_insert_extent(fs, ino, cpos, blkno, n_clusters);
if (ret) {
/* XXX: We don't wan't to overwrite the error
* from insert_extent(). But we probably need
* to BE LOUDLY UPSET. */
ocfs2_free_clusters(fs, n_clusters, blkno);
- break;
+ goto out_free_buf;
}
new_clusters -= n_clusters;
+ cpos += n_clusters;
}
+out_free_buf:
+ if (buf)
+ ocfs2_free(&buf);
return ret;
}
+errcode_t ocfs2_extend_file(ocfs2_filesys *fs, uint64_t ino, uint64_t new_size)
+{
+ errcode_t ret = 0;
+ char *buf = NULL;
+ struct ocfs2_dinode* di = NULL;
+
+ if (!(fs->fs_flags & OCFS2_FLAG_RW))
+ return OCFS2_ET_RO_FILESYS;
+ ret = ocfs2_malloc_block(fs->fs_io, &buf);
+ if (ret)
+ return ret;
+
+ ret = ocfs2_read_inode(fs, ino, buf);
+ if (ret)
+ goto out_free_buf;
+
+ di = (struct ocfs2_dinode *)buf;
+ if (di->i_size >= new_size) {
+ ret = EINVAL;
+ goto out_free_buf;
+ }
+
+ di->i_size = new_size;
+
+ ret = ocfs2_write_inode(fs, ino, buf);
+
+out_free_buf:
+ if (buf)
+ ocfs2_free(&buf);
+ return ret;
+}
+
#ifdef DEBUG_EXE
#include <stdio.h>
#include <stdlib.h>
Modified: branches/sparse-files/libocfs2/extent_map.c
===================================================================
--- branches/sparse-files/libocfs2/extent_map.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/extent_map.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -79,7 +79,9 @@
static errcode_t ocfs2_extent_map_find_leaf(ocfs2_cached_inode *cinode,
uint32_t cpos,
uint32_t clusters,
- struct ocfs2_extent_list *el)
+ struct ocfs2_extent_list *el,
+ uint32_t parent_begin,
+ uint32_t parent_len)
{
errcode_t ret;
int i;
@@ -87,6 +89,7 @@
uint64_t blkno;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_rec *rec;
+ uint32_t leaf_begin = UINT32_MAX, leaf_end = 0;
if (el->l_tree_depth) {
ret = ocfs2_malloc_block(cinode->ci_fs->fs_io, &eb_buf);
@@ -100,9 +103,17 @@
for (i = 0; i < el->l_next_free_rec; i++) {
rec = &el->l_recs[i];
+ /* In an unsuccessful insertion, we may shift a tree,
+ * add a new branch for it and do no insertion. So we
+ * may meet with an extent block which has
+ * rec->e_clusters == 0, this should only happen
+ * in the last extent rec. */
+ if (!rec->e_clusters && i == el->l_next_free_rec - 1)
+ break;
+
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
if (rec->e_cpos >=
- cinode->ci_inode->i_clusters)
+ cinode->ci_map->em_clusters)
goto out_free;
if ((rec->e_cpos + rec->e_clusters) <= cpos) {
@@ -138,6 +149,12 @@
if (blkno)
goto out_free;
blkno = rec->e_blkno;
+ /* now the parent is changed, so we need to
+ * to change parent_begin and parent_len
+ * accordingly.
+ */
+ parent_begin = rec->e_cpos;
+ parent_len = rec->e_clusters;
}
/*
@@ -162,12 +179,65 @@
for (i = 0; i < el->l_next_free_rec; i++) {
rec = &el->l_recs[i];
+ /* For a sparse file, we may find an empty record
+ * in the left most record. Just skip it.
+ */
+ if (!i && !rec->e_clusters)
+ continue;
+ if (leaf_begin > rec->e_cpos)
+ leaf_begin = rec->e_cpos;
ret = ocfs2_extent_map_insert(cinode, rec,
el->l_tree_depth);
if (ret)
goto out_free;
+
+ /* insert the hole before this extent rec if exists. */
+ if (leaf_end) {
+ if (leaf_end != rec->e_cpos) {
+ struct ocfs2_extent_rec hole = {
+ .e_cpos = leaf_end,
+ .e_clusters = rec->e_cpos -
+ leaf_end,
+ .e_blkno = 0,
+ };
+
+ ret = ocfs2_extent_map_insert(cinode, &hole,
+ el->l_tree_depth);
+ if (ret)
+ goto out_free;
+ }
+ }
+ leaf_end = rec->e_cpos + rec->e_clusters;
}
+ /* insert the hole before the first rec. */
+ if (leaf_begin != UINT32_MAX && leaf_begin > parent_begin) {
+ struct ocfs2_extent_rec hole = {
+ .e_cpos = parent_begin,
+ .e_clusters = leaf_begin - parent_begin,
+ .e_blkno = 0,
+ };
+
+ ret = ocfs2_extent_map_insert(cinode, &hole,
+ el->l_tree_depth);
+ if (ret)
+ goto out_free;
+ }
+
+ /* insert the hole after the last rec. */
+ if (parent_len && leaf_end < parent_begin + parent_len) {
+ struct ocfs2_extent_rec hole = {
+ .e_cpos = leaf_end,
+ .e_clusters = parent_begin + parent_len - leaf_end,
+ .e_blkno = 0,
+ };
+
+ ret = ocfs2_extent_map_insert(cinode, &hole,
+ el->l_tree_depth);
+ if (ret)
+ goto out_free;
+ }
+
ret = 0;
out_free:
@@ -193,6 +263,9 @@
ocfs2_extent_map *em = cinode->ci_map;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
+ uint32_t parent_begin, parent_len, last_cluster;
+ uint16_t next_free_rec, tree_depth;
+ struct ocfs2_extent_rec *rec;
ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
if (ent) {
@@ -216,12 +289,53 @@
eb = (struct ocfs2_extent_block *)eb_buf;
el = &eb->h_list;
- } else
+ parent_begin = ent->e_rec.e_cpos;
+ parent_len = ent->e_rec.e_clusters;
+ } else {
el = &cinode->ci_inode->id2.i_list;
+ parent_begin = 0;
+ parent_len = cinode->ci_map->em_clusters;
+ }
- ret = ocfs2_extent_map_find_leaf(cinode, cpos, clusters, el);
+ ret = ocfs2_extent_map_find_leaf(cinode, cpos, clusters, el,
+ parent_begin, parent_len);
if (eb_buf)
ocfs2_free(&eb_buf);
+
+ /* ocfs2_extent_map_find_leaf handles the the insertion of extent map.
+ * Both normal allocated clusters and holes will be inserted during the
+ * extent list iteration, and ocfs2_extent_map_lookup will search the
+ * extent map for the specified entry again.
+ *
+ * But there is one situation that ocfs2_extent_map_find_leaf can't
+ * handle. When a file has tree depth > 1 and we want to get the
+ * clusters which exceeds the scope of the last extent block but within
+ * the file's size. In this issue, ocfs2_extent_map_find_leaf can't
+ * find the suitable extent block and return error, but it is actually
+ * good for a sparse file, so we have to handle this here by adding a
+ * hole entry manually.
+ */
+ next_free_rec = cinode->ci_inode->id2.i_list.l_next_free_rec;
+ tree_depth = cinode->ci_inode->id2.i_list.l_tree_depth;
+ if (next_free_rec > 0 && tree_depth > 0) {
+ rec = &cinode->ci_inode->id2.i_list.l_recs[next_free_rec - 1];
+ if (rec->e_cpos + rec->e_clusters <= cpos) {
+ last_cluster = rec->e_cpos + rec->e_clusters;
+ struct ocfs2_extent_rec hole = {
+ .e_cpos = last_cluster,
+ .e_clusters = em->em_clusters - last_cluster,
+ .e_blkno = 0,
+ };
+ /* since all the hole should only exist for a leaf,
+ * so here the empty hole use 0 as the tree depth.
+ * And we replaced the "ret" by the new value since
+ * ocfs2_extent_map_find_leaf will return error for
+ * this scenario.
+ */
+ ret = ocfs2_extent_map_insert(cinode, &hole, 0);
+ }
+ }
+
if (ret)
return ret;
@@ -385,7 +499,7 @@
if (!em)
return OCFS2_ET_INVALID_ARGUMENT;
- if (cpos >= cinode->ci_inode->i_clusters)
+ if (cpos >= em->em_clusters)
return OCFS2_ET_INVALID_EXTENT_LOOKUP;
ent = ocfs2_extent_map_lookup(em, cpos, 1, NULL, NULL);
@@ -428,7 +542,8 @@
return OCFS2_ET_INVALID_EXTENT_LOOKUP;
coff = v_cpos - ent->e_rec.e_cpos;
- *p_cpos = ocfs2_blocks_to_clusters(fs,
+ if (ent->e_rec.e_blkno)
+ *p_cpos = ocfs2_blocks_to_clusters(fs,
ent->e_rec.e_blkno) +
coff;
@@ -481,7 +596,8 @@
boff = ocfs2_clusters_to_blocks(fs, cpos - rec->e_cpos);
boff += (v_blkno % bpc);
- *p_blkno = rec->e_blkno + boff;
+ if (rec->e_blkno)
+ *p_blkno = rec->e_blkno + boff;
if (ret_count) {
*ret_count = ocfs2_clusters_to_blocks(fs,
@@ -498,13 +614,29 @@
ocfs2_cached_inode *cinode)
{
errcode_t ret;
+ uint64_t blocks;
+ uint32_t clusters;
ret = ocfs2_malloc0(sizeof(struct _ocfs2_extent_map),
&cinode->ci_map);
if (ret)
return ret;
- cinode->ci_map->em_clusters = cinode->ci_inode->i_clusters;
+ /* we may meet with two different situation here:
+ * 1. The user call ocfs2_extend_allocation but don't increase the size.
+ * 2. The user call ocfs2_extend_file but don't increase the clusters.
+ * so we have to use the larger one to indicate the cluster's range for
+ * an inode.
+ *
+ * Actually, there still exists a situation that we can't handle here.
+ * A user first call ocfs2_extend_file to increase the size and then do
+ * ocfs2_extend_allocation to append clusters to the end of the file,
+ * there is no way for us to decide the real file's size.
+ */
+ blocks = ocfs2_blocks_in_bytes(fs, cinode->ci_inode->i_size);
+ clusters = ocfs2_max(cinode->ci_inode->i_clusters,
+ ocfs2_clusters_in_blocks(fs, blocks));
+ cinode->ci_map->em_clusters = clusters;
cinode->ci_map->em_extents = RB_ROOT;
return 0;
@@ -532,7 +664,7 @@
int iret = 0;
struct extent_map_context *ctxt = priv_data;
- if (rec->e_cpos >= ctxt->cinode->ci_inode->i_clusters) {
+ if (rec->e_cpos >= ctxt->cinode->ci_map->em_clusters) {
ctxt->errcode = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
iret |= OCFS2_EXTENT_ABORT;
} else {
@@ -973,7 +1105,7 @@
&blkno,
&contig);
if (ret) {
- com_err(argv[0], ret,
+ com_err(argv[0], ret,
"looking up block range %"PRIu64":%d", blkoff, count);
goto out_free;
}
@@ -988,6 +1120,18 @@
&coff,
&contig);
if (ret) {
+ com_err(argv[0], ret,
+ "looking up cluster range %"PRIu32":%d", cpos, count);
+ goto out_free;
+ }
+ fprintf(stdout, "Lookup of cluster range %"PRIu32":%d returned %"PRIu32":%d\n",
+ cpos, count, coff, contig);
+ ret = ocfs2_extent_map_get_clusters(cinode,
+ 11,
+ count,
+ &coff,
+ &contig);
+ if (ret) {
com_err(argv[0], ret,
"looking up cluster range %"PRIu32":%d", cpos, count);
goto out_free;
Modified: branches/sparse-files/libocfs2/extents.c
===================================================================
--- branches/sparse-files/libocfs2/extents.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/extents.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -261,6 +261,11 @@
}
} else {
+ /* For a sparse file, we may find an empty record
+ * in the left most record. Just skip it.
+ */
+ if (!i && !el->l_recs[i].e_clusters)
+ continue;
iret |= (*ctxt->func)(ctxt->fs, &el->l_recs[i],
el->l_tree_depth,
ctxt->ccount, ref_blkno,
@@ -508,7 +513,7 @@
uint64_t blkno, bcount, bend;
int iret = 0;
- bcount = ocfs2_clusters_to_blocks(fs, ccount);
+ bcount = ocfs2_clusters_to_blocks(fs, rec->e_cpos);
bend = bcount + ocfs2_clusters_to_blocks(fs, rec->e_clusters);
for (blkno = rec->e_blkno; bcount < bend; blkno++, bcount++) {
Modified: branches/sparse-files/libocfs2/fileio.c
===================================================================
--- branches/sparse-files/libocfs2/fileio.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/fileio.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -128,7 +128,7 @@
errcode_t ret = 0;
char *ptr = (char *) buf;
uint32_t wanted_blocks;
- uint32_t contig_blocks;
+ int contig_blocks;
uint64_t v_blkno;
uint64_t p_blkno;
uint32_t tmp;
@@ -162,9 +162,15 @@
if (contig_blocks > wanted_blocks)
contig_blocks = wanted_blocks;
- ret = io_read_block(fs->fs_io, p_blkno, contig_blocks, ptr);
- if (ret)
- return ret;
+ if (!p_blkno) {
+ /* we meet with a hole, just empty the content.*/
+ memset(ptr, 0, contig_blocks * fs->fs_blocksize);
+ } else {
+ ret = io_read_block(fs->fs_io, p_blkno,
+ contig_blocks, ptr);
+ if (ret)
+ return ret;
+ }
*got += (contig_blocks <<
OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits);
@@ -191,12 +197,14 @@
errcode_t ret = 0;
char *ptr = (char *) buf;
uint32_t wanted_blocks;
- uint32_t contig_blocks;
+ int contig_blocks;
uint64_t v_blkno;
uint64_t p_blkno;
uint32_t tmp;
uint64_t num_blocks;
int bs_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+ uint64_t ino = ci->ci_blkno;
+ uint32_t n_clusters, cluster_begin, cluster_end;
/* o_direct requires aligned io */
tmp = fs->fs_blocksize - 1;
@@ -225,6 +233,41 @@
if (contig_blocks > wanted_blocks)
contig_blocks = wanted_blocks;
+ if (!p_blkno) {
+ cluster_begin = ocfs2_blocks_to_clusters(fs, v_blkno);
+ cluster_end = ocfs2_blocks_to_clusters(fs,
+ v_blkno + contig_blocks -1);
+ n_clusters = cluster_end - cluster_begin + 1;
+ ret = ocfs2_new_clusters(fs, 1, n_clusters, &p_blkno,
+ &n_clusters);
+ if (ret)
+ return ret;
+
+ ret = ocfs2_insert_extent(fs, ci->ci_blkno,
+ ocfs2_blocks_to_clusters(fs,v_blkno),
+ p_blkno, n_clusters);
+ if (ret) {
+ /* XXX: We don't wan't to overwrite the error
+ * from insert_extent(). But we probably need
+ * to BE LOUDLY UPSET. */
+ ocfs2_free_clusters(fs, n_clusters, p_blkno);
+ return ret;
+ }
+
+ /* since the extent information has been changed, we
+ * may need to reinitialize it. */
+ ocfs2_extent_map_free(ci);
+ ocfs2_free_cached_inode(fs, ci);
+ ret = ocfs2_read_cached_inode(fs,ino, &ci);
+ ocfs2_extent_map_init(fs,ci);
+ ret = ocfs2_extent_map_get_blocks(ci, v_blkno, 1,
+ &p_blkno, &contig_blocks);
+ if (ret)
+ return ret;
+ if (contig_blocks > wanted_blocks)
+ contig_blocks = wanted_blocks;
+ }
+
ret = io_write_block(fs->fs_io, p_blkno, contig_blocks, ptr);
if (ret)
return ret;
@@ -432,4 +475,3 @@
return 0;
}
#endif /* DEBUG_EXE */
-
Modified: branches/sparse-files/libocfs2/include/ocfs2.h
===================================================================
--- branches/sparse-files/libocfs2/include/ocfs2.h 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/include/ocfs2.h 2007-03-19 05:47:26 UTC (rev 1319)
@@ -538,7 +538,7 @@
errcode_t ocfs2_new_dir_block(ocfs2_filesys *fs, uint64_t dir_ino,
uint64_t parent_ino, char **block);
-errcode_t ocfs2_insert_extent(ocfs2_filesys *fs, uint64_t ino,
+errcode_t ocfs2_insert_extent(ocfs2_filesys *fs, uint64_t ino, uint32_t cpos,
uint64_t c_blkno, uint32_t clusters);
errcode_t ocfs2_extent_allocation(ocfs2_filesys *fs, uint64_t ino,
uint32_t new_clusters);
@@ -548,8 +548,14 @@
errcode_t ocfs2_delete_inode(ocfs2_filesys *fs, uint64_t ino);
errcode_t ocfs2_new_extent_block(ocfs2_filesys *fs, uint64_t *blkno);
errcode_t ocfs2_delete_extent_block(ocfs2_filesys *fs, uint64_t blkno);
+/*
+ * Allocate the blocks and insert them to the file.
+ * only i_clusters of dinode will be updated accordingly, i_size not changed.
+ */
errcode_t ocfs2_extend_allocation(ocfs2_filesys *fs, uint64_t ino,
uint32_t new_clusters);
+/* Extend the file to the new size. No clusters will be allocated. */
+errcode_t ocfs2_extend_file(ocfs2_filesys *fs, uint64_t ino, uint64_t new_size);
errcode_t ocfs2_truncate(ocfs2_filesys *fs, uint64_t ino, uint64_t new_i_size);
errcode_t ocfs2_new_clusters(ocfs2_filesys *fs,
uint32_t min,
Modified: branches/sparse-files/libocfs2/include/ocfs2_fs.h
===================================================================
--- branches/sparse-files/libocfs2/include/ocfs2_fs.h 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/include/ocfs2_fs.h 2007-03-19 05:47:26 UTC (rev 1319)
@@ -86,7 +86,8 @@
OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
-#define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT
+#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
+ | OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
/*
@@ -311,7 +312,10 @@
/*00*/ __le16 l_tree_depth; /* Extent tree depth from this
point. 0 means data extents
hang directly off this
- header (a leaf) */
+ header (a leaf)
+ NOTE: The high 8 bits cannot be
+ used - tree_depth is never that big.
+ */
__le16 l_count; /* Number of extent records */
__le16 l_next_free_rec; /* Next unused extent slot */
__le16 l_reserved1;
Modified: branches/sparse-files/libocfs2/truncate.c
===================================================================
--- branches/sparse-files/libocfs2/truncate.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/libocfs2/truncate.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -94,8 +94,8 @@
errcode_t ret;
char *buf;
struct ocfs2_dinode *di;
- uint32_t new_i_clusters;
- uint64_t new_i_blocks;
+ uint32_t new_i_clusters, old_i_clusters;
+ uint64_t new_i_blocks, old_i_blocks;
ret = ocfs2_malloc_block(fs->fs_io, &buf);
if (ret)
@@ -111,10 +111,11 @@
new_i_blocks = ocfs2_blocks_in_bytes(fs, new_i_size);
new_i_clusters = ocfs2_clusters_in_blocks(fs, new_i_blocks);
+ old_i_blocks = ocfs2_blocks_in_bytes(fs, di->i_size);
+ old_i_clusters = ocfs2_clusters_in_blocks(fs, old_i_blocks);
- if (di->i_clusters < new_i_clusters) {
- ret = ocfs2_extend_allocation(fs, ino,
- new_i_clusters - di->i_clusters);
+ if (old_i_clusters < new_i_clusters) {
+ ret = ocfs2_extend_file(fs, ino, new_i_size);
if (ret)
goto out;
Modified: branches/sparse-files/mkfs.ocfs2/mkfs.c
===================================================================
--- branches/sparse-files/mkfs.ocfs2/mkfs.c 2007-03-16 23:57:45 UTC (rev 1318)
+++ branches/sparse-files/mkfs.ocfs2/mkfs.c 2007-03-19 05:47:26 UTC (rev 1319)
@@ -1750,7 +1750,7 @@
di->id2.i_super.s_max_slots = s->initial_slots;
di->id2.i_super.s_first_cluster_group = s->first_cluster_group_blkno;
- incompat = 0;
+ incompat = OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC;
if (s->hb_dev)
incompat |= OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV;
More information about the Ocfs2-tools-commits
mailing list