[Ocfs2-tools-devel] [RFC 2/8] dx_dirs v4: indexed dirs core code in libocfs2
Tao Ma
tao.ma at oracle.com
Fri Jan 22 00:21:45 PST 2010
Coly Li wrote:
> This patch contains the core part of current indexed dirs support in libocfs2, includes,
> - Indexed tree truncate.
> - Build indexed tree.
> - Insert dx record into indexed tree.
> - Expand inlined dx_root to an extent tree.
> - Rebalance indexed tree.
>
> I worry about whether I use the extent tree code correctly, though I tried my best to understand the code, IMHO it's
> still a gray area to me so far. Any review/comment is helpful !
>
> Signed-off-by: Coly Li <coly.li at suse.de>
> Cc: Tao Ma <tao.ma at oracle.com>
> ---
> Makefile | 3
> dir_indexed.c | 1117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> dir_indexed.h | 25 +
> inode.c | 4
> 4 files changed, 1148 insertions(+), 1 deletion(-)
>
> diff --git a/libocfs2/Makefile b/libocfs2/Makefile
> index 8e94cc3..6d3b563 100644
> --- a/libocfs2/Makefile
> +++ b/libocfs2/Makefile
> @@ -76,7 +76,8 @@ CFILES = \
> quota.c \
> image.c \
> xattr.c \
> - extent_tree.c
> + extent_tree.c \
> + dir_indexed.c
>
> HFILES = \
> bitmap.h \
> diff --git a/libocfs2/inode.c b/libocfs2/inode.c
> index c1a8646..b41b286 100644
> --- a/libocfs2/inode.c
> +++ b/libocfs2/inode.c
> @@ -139,6 +139,10 @@ static void ocfs2_swap_inode_second(struct ocfs2_dinode *di)
> sb->s_uuid_hash = bswap_32(sb->s_uuid_hash);
> sb->s_first_cluster_group = bswap_64(sb->s_first_cluster_group);
> sb->s_xattr_inline_size = bswap_16(sb->s_xattr_inline_size);
> + sb->s_dx_seed[0] = bswap_32(sb->s_dx_seed[0]);
> + sb->s_dx_seed[1] = bswap_32(sb->s_dx_seed[1]);
> + sb->s_dx_seed[2] = bswap_32(sb->s_dx_seed[2]);
> + sb->s_dx_seed[3] = bswap_32(sb->s_dx_seed[3]);
I just noticed that in ocfs2_super_block we define
__le32 s_dx_seed[3];
So swap s_dx_seed[3] is wrong?
>
> } else if (di->i_flags & OCFS2_LOCAL_ALLOC_FL) {
> struct ocfs2_local_alloc *la = &di->id2.i_lab;
>
> diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c
> new file mode 100644
> index 0000000..2d1d602
> --- /dev/null
> +++ b/libocfs2/dir_indexed.c
> @@ -0,0 +1,1117 @@
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> + * Copyright (C) 2009 Novell. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; if not, write to the
> + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
> + * Boston, MA 021110-1307, USA.
Please use the right information.
See http://oss.oracle.com/pipermail/ocfs2-devel/2010-January/005711.html.
> + */
> +#include <assert.h>
> +#include <ocfs2/ocfs2.h>
> +#include <ocfs2/bitops.h>
> +#include <ocfs2/kernel-rbtree.h>
> +#include "extent_tree.h"
> +#include "dir_indexed.h"
> +
> +
> +int ocfs2_dx_dir_truncate(ocfs2_filesys *fs,
> + uint64_t dir)
> +{
> + struct ocfs2_dx_root_block *dx_root;
> + char *dx_root_buf = NULL, *di_buf = NULL;
> + struct ocfs2_dinode *di;
> + int ret = 0;
> +
> + ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
> + if (ret)
> + goto out;
> + ret = ocfs2_read_inode(fs, dir, di_buf);
> + if (ret)
> + goto out;
> + di = (struct ocfs2_dinode *)di_buf;
> +
> + /* we have to trust i_dyn_features */
> + if ((!ocfs2_dir_indexed(di)) ||
> + (di->i_dyn_features & OCFS2_INLINE_DATA_FL))
> + goto out;
Do we need to check S_ISDIR or only dir inode can call this function?
> +
> + ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf);
> + if (ret)
> + goto out;
> + ret = ocfs2_read_dx_root(fs, (uint64_t)di->i_dx_root, dx_root_buf);
> + if (ret)
> + goto out;
> + dx_root = (struct ocfs2_dx_root_block *)dx_root_buf;
> +
> + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
> + goto remove_index;
> +
> + ret = ocfs2_dir_indexed_tree_truncate(fs, dx_root);
> +
> +remove_index:
> + ret = ocfs2_delete_dx_root(fs, dx_root->dr_blkno);
> +
> + di->i_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
> + di->i_dx_root = 0;
> +
> + ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di);
> +out:
> + if (di_buf)
> + ocfs2_free(&di_buf);
> + if (dx_root_buf)
> + ocfs2_free(&dx_root_buf);
> + return ret;
> +}
> +
> +static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
> +{
> + unsigned int hole;
> +
> + if (de->inode == 0)
> + hole = de->rec_len;
> + else
> + hole = de->rec_len - OCFS2_DIR_REC_LEN(de->name_len);
> +
> + return hole;
> +}
> +
> +static int ocfs2_find_max_rec_len(ocfs2_filesys *fs,
> + char *buf)
> +{
> + int size, this_hole, largest_hole = 0;
> + char *de_buf, *limit, *start;
> + struct ocfs2_dir_block_trailer *trailer;
> + struct ocfs2_dir_entry *de;
> +
> + start = buf;
> + trailer = ocfs2_dir_trailer_from_block(fs, buf);
> + size = ocfs2_dir_trailer_blk_off(fs);
> + limit = start + size;
> + de_buf = start;
> + de = (struct ocfs2_dir_entry *)de_buf;
> + do {
> + if (de_buf != (char *)trailer) {
does this check redundant? if de_buf== trailer, we should already exit
since de_buf = limit? btw, "start" is set to "buf" and never changed, I
guess we can remove it.
> + this_hole = ocfs2_figure_dirent_hole(de);
> + if (this_hole > largest_hole)
> + largest_hole = this_hole;
> + }
> +
> + de_buf += de->rec_len;
> + de = (struct ocfs2_dir_entry *)de_buf;
> + } while (de_buf < limit);
> +
> + if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
> + return largest_hole;
> + return 0;
> +}
> +
> +struct trailer_ctxt {
> + struct ocfs2_dx_root_block *dx_root;
> + struct ocfs2_dinode *di;
> +};
> +
> +
> +static int dir_trailer_func(ocfs2_filesys *fs,
> + uint64_t blkno,
> + uint64_t bcount,
> + uint16_t ext_flags,
> + void *priv_data)
> +{
> + struct trailer_ctxt *ctxt = (struct trailer_ctxt *)priv_data;
> + struct ocfs2_dinode *di = ctxt->di;
> + struct ocfs2_dx_root_block *dx_root = ctxt->dx_root;
> + struct ocfs2_dir_block_trailer *trailer;
> + int max_rec_len = 0;
> + errcode_t ret = 0;
> + char *blk = NULL;
> +
> + ret = ocfs2_malloc_block(fs->fs_io, &blk);
> + if (ret)
> + goto out;
> +
> + /* here we don't trust trailer, cannot use
> + * ocfs2_read_dir_block() */
> + ret = ocfs2_read_blocks(fs, blkno, 1, blk);
> + if (ret)
> + goto out;
> +
> + max_rec_len = ocfs2_find_max_rec_len(fs, blk);
> + trailer = ocfs2_dir_trailer_from_block(fs, blk);
> + trailer->db_free_rec_len = max_rec_len;
> + ocfs2_init_dir_trailer(fs, di, blkno, blk);
sorry for a silly qs. How could you make sure the dir_block has enough
space for adding a trailer? What if the block is already full?
> +
> + if (max_rec_len) {
> + trailer->db_free_next = dx_root->dr_free_blk;
> + dx_root->dr_free_blk = blkno;
> + }
> +
> + /* comput trailer->db_check here, after writes out,
> + * trailer is trustable */
> + ret = ocfs2_write_dir_block(fs, di, blkno, blk);
> +out:
> + if (blk)
> + ocfs2_free(&blk);
> + return ret;
> +}
> +
> +static errcode_t ocfs2_init_dir_trailers(ocfs2_filesys *fs,
> + struct ocfs2_dinode *di,
> + struct ocfs2_dx_root_block *dx_root)
> +{
> + errcode_t ret = 0;
> + struct trailer_ctxt ctxt;
> +
> + if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) {
> + ret = OCFS2_ET_INODE_NOT_VALID;
> + goto out;
why not return 0 here since actually there is no error?
> + }
> +
> + ctxt.di = di;
> + ctxt.dx_root = dx_root;
> +
> + ret = ocfs2_block_iterate_inode(fs, di,
> + 0, dir_trailer_func, &ctxt);
> +out:
> + return ret;
> +}
> +
> +static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
> + struct ocfs2_dx_hinfo *hinfo,
> + uint64_t dirent_blk)
> +{
> + int i;
> + struct ocfs2_dx_entry *dx_entry;
> +
> + i = entry_list->de_num_used;
> + dx_entry = &entry_list->de_entries[i];
> +
> + memset(dx_entry, 0, sizeof(struct ocfs2_dx_entry));
> + dx_entry->dx_major_hash = hinfo->major_hash;
> + dx_entry->dx_minor_hash = hinfo->minor_hash;
> + dx_entry->dx_dirent_blk = dirent_blk;
> +
> + entry_list->de_num_used += 1;
> +}
> +
> +struct dx_insert_ctxt {
> + uint64_t dir_blkno;
> + uint64_t dx_root_blkno;
> + ocfs2_filesys *fs;
> +};
> +
> +
> +inline static int ocfs2_inline_dx_has_space(struct ocfs2_dx_root_block *dx_root)
> +{
> + struct ocfs2_dx_entry_list *entry_list;
> +
> + entry_list = &dx_root->dr_entries;
> +
> + if (entry_list->de_num_used >= entry_list->de_count)
> + return OCFS2_ET_DIR_NO_SPACE;
hey, the below you check ocfs2_inline_dx_has_space with 0, here neither
of the 2 returns have the value 0.
> +
> + return 1;
> +}
> +
> +static struct ocfs2_dx_leaf **ocfs2_dx_dir_alloc_leaves(ocfs2_filesys *fs,
> + int *ret_num_leaves)
> +{
> + int ret, num_dx_leaves = ocfs2_clusters_to_blocks(fs, 1);
> + char *dx_leaves_buf = NULL;
> +
> +
> + ret = ocfs2_malloc0(num_dx_leaves * sizeof(void *), &dx_leaves_buf);
> + if (dx_leaves_buf && ret_num_leaves)
> + *ret_num_leaves = num_dx_leaves;
> +
> + return (struct ocfs2_dx_leaf **)dx_leaves_buf;
&dx_leaves_buf?
I just wonder whether you have written a test case for your patch? These
errors in the patch should be pretty easy to find out.
Regards,
Tao
More information about the Ocfs2-tools-devel
mailing list