[Ocfs2-tools-devel] [PATCH 07/15] dx_dirs v11: more library support for directory indexing

Coly Li coly.li at suse.de
Sun Apr 11 01:10:04 PDT 2010


This patch adds more library support for indexed dirs,
- dx_root alloc/delete
- dx_leaf alloc/delete
- dx_root read/write
- dx_leaf read/write
- indexed tree insert/truncate
- dx_root extent tree operations
- indexed dir support in ocfs2_lookup_system_inode(),
  ocfs2_init_dir(), ocfs2_link(), ocfs2_unlink(),
  ocfs2_lookup().
With this patch, indexed dirs support in fsck.ocfs2 is possible.

Signed-off-by: Coly Li <coly.li at suse.de>
Cc: Mark Fasheh <mfasheh at suse.com>
---
 include/ocfs2-kernel/ocfs2_fs.h |   29 +
 include/ocfs2/ocfs2.h           |   55 ++-
 libocfs2/Makefile               |    3 +-
 libocfs2/alloc.c                |   96 +++
 libocfs2/dir_indexed.c          | 1461 +++++++++++++++++++++++++++++++++++++++
 libocfs2/dir_iterate.c          |   16 +-
 libocfs2/dir_iterate.h          |    1 +
 libocfs2/dirblock.c             |  195 +++++-
 libocfs2/expanddir.c            |   18 +-
 libocfs2/extent_tree.c          |   53 ++
 libocfs2/extent_tree.h          |    5 +
 libocfs2/inode.c                |    3 +
 libocfs2/link.c                 |    9 +
 libocfs2/lookup.c               |   71 ++-
 libocfs2/ocfs2_err.et           |    6 +
 libocfs2/truncate.c             |   16 +
 libocfs2/unlink.c               |  187 +++++-
 sizetest/sizetest.c             |    2 +-
 18 files changed, 2165 insertions(+), 61 deletions(-)
 create mode 100644 libocfs2/dir_indexed.c

diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h
index be266a7..0e6513a 100644
--- a/include/ocfs2-kernel/ocfs2_fs.h
+++ b/include/ocfs2-kernel/ocfs2_fs.h
@@ -1463,6 +1463,35 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize)
 	return size / sizeof(struct ocfs2_extent_rec);
 }
 
+static inline int ocfs2_dx_entries_per_leaf(int blocksize)
+{
+	int size;
+	size = blocksize -
+		offsetof(struct ocfs2_dx_leaf, dl_list.de_entries);
+
+		return size / sizeof(struct ocfs2_dx_entry);
+}
+
+static inline int ocfs2_dx_entries_per_root(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
+
+	return size / sizeof(struct ocfs2_dx_entry);
+}
+
+static inline int ocfs2_extent_recs_per_dx_root(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct ocfs2_dx_root_block, dr_list.l_recs);
+
+	return size / sizeof(struct ocfs2_extent_rec);
+}
+
 static inline int ocfs2_local_alloc_size(int blocksize)
 {
 	int size;
diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
index edd48a8..07e0805 100644
--- a/include/ocfs2/ocfs2.h
+++ b/include/ocfs2/ocfs2.h
@@ -253,6 +253,22 @@ struct _ocfs2_quota_hash {
 	ocfs2_cached_dquot **hash;
 };
 
+struct ocfs2_dx_hinfo {
+	uint32_t major_hash;
+	uint32_t minor_hash;
+};
+
+struct ocfs2_dir_lookup_result {
+	struct ocfs2_dx_hinfo       dl_hinfo;         /* name hash results */
+	char *                      dl_leaf;          /* unindexed block buffer */
+	uint64_t                    dl_leaf_blkno;    /* blk number of dl_leaf */
+	struct ocfs2_dir_entry *    dl_entry;         /* dirent pointed into dl_leaf */
+	struct ocfs2_dx_leaf *      dl_dx_leaf;       /* indexed block buffer */
+	uint64_t                    dl_dx_leaf_blkno; /* blk number of dl_dx_leaf */
+	struct ocfs2_dx_entry *     dl_dx_entry;      /* indexed entry pointed to dl_dx_leaf */
+	int                         dl_dx_entry_idx;  /* index of dl_dx_entry in entries list */
+};
+
 typedef struct _ocfs2_quota_hash ocfs2_quota_hash;
 
 errcode_t ocfs2_malloc(unsigned long size, void *ptr);
@@ -331,7 +347,8 @@ errcode_t ocfs2_read_inode(ocfs2_filesys *fs, uint64_t blkno,
 errcode_t ocfs2_write_inode(ocfs2_filesys *fs, uint64_t blkno,
 			    char *inode_buf);
 errcode_t ocfs2_check_directory(ocfs2_filesys *fs, uint64_t dir);
-
+int ocfs2_check_dir_entry(ocfs2_filesys *fs, struct ocfs2_dir_entry *de,
+				char *dir_buf, unsigned int offset);
 errcode_t ocfs2_read_cached_inode(ocfs2_filesys *fs, uint64_t blkno,
 				  ocfs2_cached_inode **ret_ci);
 errcode_t ocfs2_write_cached_inode(ocfs2_filesys *fs,
@@ -477,7 +494,7 @@ errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block,
 errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block,
 			     void *buf);
 int ocfs2_dir_indexed(struct ocfs2_dinode *di);
-
+errcode_t ocfs2_dx_dir_truncate(ocfs2_filesys *fs, uint64_t dir);
 errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs,
 			     uint64_t dir,
 			     int flags,
@@ -485,6 +502,7 @@ errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs,
 			     int (*func)(uint64_t	dir,
 					 int		entry,
 					 struct ocfs2_dir_entry *dirent,
+					 uint64_t blocknr,
 					 int	offset,
 					 int	blocksize,
 					 char	*buf,
@@ -495,6 +513,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs,
 				   int flags,
 				   char *block_buf,
 				   int (*func)(struct ocfs2_dir_entry *dirent,
+					       uint64_t blocknr,
 					       int	offset,
 					       int	blocksize,
 					       char	*buf,
@@ -674,7 +693,10 @@ errcode_t ocfs2_new_inode(ocfs2_filesys *fs, uint64_t *ino, int mode);
 errcode_t ocfs2_new_system_inode(ocfs2_filesys *fs, uint64_t *ino, int mode, int flags);
 errcode_t ocfs2_delete_inode(ocfs2_filesys *fs, uint64_t ino);
 errcode_t ocfs2_new_extent_block(ocfs2_filesys *fs, uint64_t *blkno);
+errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs, struct ocfs2_dinode *di, uint64_t *dr_blkno);
 errcode_t ocfs2_delete_extent_block(ocfs2_filesys *fs, uint64_t blkno);
+errcode_t ocfs2_delete_dx_root(ocfs2_filesys *fs, uint64_t dr_blkno);
+
 /*
  * Allocate the blocks and insert them to the file.
  * only i_clusters of dinode will be updated accordingly, i_size not changed.
@@ -1346,6 +1368,10 @@ static inline int ocfs2_refcount_tree(struct ocfs2_super_block *osb)
 #define OCFS2_BLOCK_ABORT	0x02
 #define OCFS2_BLOCK_ERROR	0x04
 
+
+#define OCFS2_IS_VALID_DX_ROOT(ptr)					\
+		(!strcmp((char *)(ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
+
 /*
  * Block iterate flags
  *
@@ -1486,5 +1512,30 @@ errcode_t ocfs2_extent_iterate_xattr(ocfs2_filesys *fs,
 				     void *priv_data,
 				     int *changed);
 errcode_t ocfs2_delete_xattr_block(ocfs2_filesys *fs, uint64_t blkno);
+errcode_t ocfs2_dir_indexed_tree_truncate(ocfs2_filesys *fs,
+					struct ocfs2_dx_root_block *dx_root);
+errcode_t ocfs2_write_dx_root(ocfs2_filesys *fs, uint64_t block, char *buf);
+errcode_t ocfs2_write_dx_leaf(ocfs2_filesys *fs, uint64_t block, void *buf);
+errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, uint64_t dir);
+errcode_t ocfs2_dx_dir_insert_entry(ocfs2_filesys *fs, uint64_t dir, const char *name,
+					uint64_t ino, uint64_t blkno);
+int ocfs2_search_dirblock(ocfs2_filesys *fs, char *dir_buf,
+	       		const char *name, int namelen, unsigned int bytes,
+			struct ocfs2_dir_entry **res_dir);
+void ocfs2_dx_dir_name_hash(ocfs2_filesys *fs, const char *name,
+			int len, struct ocfs2_dx_hinfo *hinfo);
+errcode_t ocfs2_dx_dir_lookup(ocfs2_filesys *fs, struct ocfs2_dx_root_block *dx_root,
+			struct ocfs2_extent_list *el, struct ocfs2_dx_hinfo *hinfo,
+			uint32_t *ret_cpos, uint64_t *ret_phys_blkno);
+errcode_t ocfs2_dx_dir_search(ocfs2_filesys *fs, const char *name,
+			int namelen, struct ocfs2_dx_root_block *dx_root,
+			struct ocfs2_dir_lookup_result *res);
+void release_lookup_res(struct ocfs2_dir_lookup_result *res);
+int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf);
+void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, int index);
+int ocfs2_is_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, unsigned long de_off);
+
+
+
 
 #endif  /* _FILESYS_H */
diff --git a/libocfs2/Makefile b/libocfs2/Makefile
index 622b89e..cff5061 100644
--- a/libocfs2/Makefile
+++ b/libocfs2/Makefile
@@ -77,7 +77,8 @@ CFILES = 		\
 	image.c		\
 	xattr.c		\
 	extent_tree.c	\
-	refcount.c
+	refcount.c	\
+	dir_indexed.c
 
 HFILES =		\
 	bitmap.h	\
diff --git a/libocfs2/alloc.c b/libocfs2/alloc.c
index 842ebdb..18eda16 100644
--- a/libocfs2/alloc.c
+++ b/libocfs2/alloc.c
@@ -622,6 +622,102 @@ out:
 	return ret;
 }
 
+/* only initiate part of dx_root:
+ *   dr_subllaoc_slot
+ *   dr_sbualloc_bit
+ *   dr_fs_generation
+ *   dr_blkno
+ *   dr_flags
+ */
+static void init_dx_root(ocfs2_filesys *fs,
+			struct ocfs2_dx_root_block *dx_root,
+			int slot, uint64_t gd_blkno, uint64_t dr_blkno)
+{
+
+	memset(dx_root, 0, fs->fs_blocksize);
+	strcpy((char *)dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
+	dx_root->dr_suballoc_slot = slot;
+	dx_root->dr_suballoc_bit = (uint16_t)(dr_blkno - gd_blkno);
+	dx_root->dr_fs_generation = fs->fs_super->i_fs_generation;
+	dx_root->dr_blkno = dr_blkno;
+	dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
+}
+
+errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs,
+				struct ocfs2_dinode *di,
+				uint64_t *dr_blkno)
+{
+	errcode_t ret;
+	char *buf = NULL;
+	uint64_t gd_blkno;
+	struct ocfs2_dx_root_block *dx_root;
+	int slot;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	slot = di->i_suballoc_slot;
+	ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE,
+				slot, &fs->fs_eb_allocs[slot]);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_chain_alloc_with_io(fs, fs->fs_eb_allocs[slot],
+	    				&gd_blkno, dr_blkno);
+	if (ret == OCFS2_ET_BIT_NOT_FOUND) {
+		ret = ocfs2_chain_add_group(fs, fs->fs_eb_allocs[slot]);
+		if (ret)
+			goto out;
+		ret = ocfs2_chain_alloc_with_io(fs, fs->fs_eb_allocs[slot],
+						&gd_blkno, dr_blkno);
+		if (ret)
+			goto out;
+	} else if (ret)
+		goto out;
+
+	dx_root = (struct ocfs2_dx_root_block *)buf;
+	init_dx_root(fs, dx_root, slot, gd_blkno, *dr_blkno);
+
+	ret = ocfs2_write_dx_root(fs, *dr_blkno, (char *)dx_root);
+out:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
+errcode_t ocfs2_delete_dx_root(ocfs2_filesys *fs, uint64_t dr_blkno)
+{
+	errcode_t ret;
+	char *buf = NULL;
+	struct ocfs2_dx_root_block *dx_root;
+	int slot;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_read_dx_root(fs, dr_blkno, buf);
+	if (ret)
+		goto out;
+
+	dx_root = (struct ocfs2_dx_root_block *)buf;
+	slot = dx_root->dr_suballoc_slot;
+
+	ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE, slot,
+		       	&fs->fs_eb_allocs[slot]);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_chain_free_with_io(fs, fs->fs_eb_allocs[slot], dr_blkno);
+
+out:
+	if (buf)
+		ocfs2_free(&buf);
+
+	return ret;
+}
+
 /* XXX what to do about local allocs?
  * XXX Well, we shouldn't use local allocs to allocate, as we are
  *     userspace and we have the entire bitmap in memory.  However, this
diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c
new file mode 100644
index 0000000..abb1695
--- /dev/null
+++ b/libocfs2/dir_indexed.c
@@ -0,0 +1,1461 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * Copyright (C) 2009, 2010 Novell.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <assert.h>
+#include <ocfs2/ocfs2.h>
+#include <ocfs2/bitops.h>
+#include <ocfs2/kernel-rbtree.h>
+#include "ocfs2_err.h"
+#include "extent_tree.h"
+
+
+errcode_t ocfs2_dx_dir_truncate(ocfs2_filesys *fs,
+			uint64_t dir)
+{
+	struct ocfs2_dx_root_block *dx_root;
+	char *dx_root_buf = NULL, *di_buf = NULL;
+	struct ocfs2_dinode *di;
+	uint64_t dx_root_blk;
+	errcode_t ret = 0;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+	di = (struct ocfs2_dinode *)di_buf;
+
+	/* we have to trust i_dyn_features */
+	if (!S_ISDIR(di->i_mode) ||
+	    !ocfs2_dir_indexed(di) ||
+	    di->i_dyn_features & OCFS2_INLINE_DATA_FL)
+		goto out;
+
+	dx_root_blk = di->i_dx_root;
+
+	di->i_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
+	di->i_dx_root = 0;
+
+	/* update inode firstly */
+	ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di);
+	if (ret)
+		goto out;
+
+	/* inode is updated, the rested errors are not fatal */
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_dx_root(fs, dx_root_blk, dx_root_buf);
+	if (ret)
+		goto out;
+	dx_root = (struct ocfs2_dx_root_block *)dx_root_buf;
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
+		goto remove_index;
+
+	ret = ocfs2_dir_indexed_tree_truncate(fs, dx_root);
+
+	/*
+	 * even ocfs2_dir_indexed_tree_truncate() failed,
+	 * we still want to call ocfs2_delete_dx_root().
+	 */
+
+remove_index:
+	ret = ocfs2_delete_dx_root(fs, dx_root->dr_blkno);
+out:
+	if (di_buf)
+		ocfs2_free(&di_buf);
+	if (dx_root_buf)
+		ocfs2_free(&dx_root_buf);
+	return ret;
+}
+
+static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
+{
+	unsigned int hole;
+
+	if (de->inode == 0)
+		hole = de->rec_len;
+	else
+		hole = de->rec_len - OCFS2_DIR_REC_LEN(de->name_len);
+
+	return hole;
+}
+
+int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf)
+{
+	int size, this_hole, largest_hole = 0;
+	char *de_buf, *limit;
+	struct ocfs2_dir_entry *de;
+
+	size = ocfs2_dir_trailer_blk_off(fs);
+	limit = buf + size;
+	de_buf = buf;
+	de = (struct ocfs2_dir_entry *)de_buf;
+	do {
+		this_hole = ocfs2_figure_dirent_hole(de);
+		if (this_hole > largest_hole)
+			largest_hole = this_hole;
+
+		de_buf += de->rec_len;
+		de = (struct ocfs2_dir_entry *)de_buf;
+	} while (de_buf < limit);
+
+	if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
+		return largest_hole;
+	return 0;
+}
+
+struct trailer_ctxt {
+	struct ocfs2_dx_root_block *dx_root;
+	struct ocfs2_dinode *di;
+};
+
+/* make sure the space for trailer is reserved */
+static errcode_t ocfs2_check_dir_trailer_space(ocfs2_filesys *fs,
+					struct ocfs2_dinode *di,
+					uint64_t blkno,
+					char *blk)
+{
+	errcode_t ret = 0;
+	struct ocfs2_dir_entry *dirent;
+	unsigned int offset = 0;
+	unsigned int toff = ocfs2_dir_trailer_blk_off(fs);
+	unsigned int real_rec_len = 0;
+
+	while(offset < fs->fs_blocksize) {
+		dirent = (struct ocfs2_dir_entry *)(blk + offset);
+		if (!ocfs2_check_dir_entry(fs, dirent, blk, offset)) {
+			ret = OCFS2_ET_DIR_CORRUPTED;
+			break;
+		}
+
+		real_rec_len = dirent->inode ?
+			OCFS2_DIR_REC_LEN(dirent->name_len) :
+			OCFS2_DIR_REC_LEN(1);
+		if ((offset + real_rec_len) <= toff)
+			goto next;
+
+		if (dirent->inode) {
+			ret = OCFS2_ET_DIR_NO_SPACE;
+			break;
+		}
+next:
+		offset += dirent->rec_len;
+	}
+
+out:
+	return ret;
+}
+
+static int dir_trailer_func(ocfs2_filesys *fs,
+				uint64_t blkno,
+				uint64_t bcount,
+				uint16_t ext_flags,
+				void *priv_data)
+{
+	struct trailer_ctxt *ctxt = (struct trailer_ctxt *)priv_data;
+	struct ocfs2_dinode *di = ctxt->di;
+	struct ocfs2_dx_root_block *dx_root = ctxt->dx_root;
+	struct ocfs2_dir_block_trailer *trailer;
+	int max_rec_len = 0;
+	errcode_t ret = 0;
+	char *blk = NULL;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &blk);
+	if (ret)
+		goto out;
+
+	/* here we don't trust trailer, cannot use
+	 * ocfs2_read_dir_block() */
+	ret = ocfs2_read_blocks(fs, blkno, 1, blk);
+	if (ret)
+		goto out;
+	ret = ocfs2_check_dir_trailer_space(fs, di, blkno, blk);
+	if (ret)
+		goto out;
+	ocfs2_init_dir_trailer(fs, di, blkno, blk);
+	max_rec_len = ocfs2_find_max_rec_len(fs, blk);
+	trailer = ocfs2_dir_trailer_from_block(fs, blk);
+	trailer->db_free_rec_len = max_rec_len;
+
+	if (max_rec_len) {
+		trailer->db_free_next = dx_root->dr_free_blk;
+		dx_root->dr_free_blk = blkno;
+	}
+
+	/* comput trailer->db_check here, after writes out,
+	 * trailer is trustable */
+	ret = ocfs2_write_dir_block(fs, di, blkno, blk);
+out:
+	if (blk)
+		ocfs2_free(&blk);
+	return ret;
+}
+
+static errcode_t ocfs2_init_dir_trailers(ocfs2_filesys *fs,
+				struct ocfs2_dinode *di,
+				struct ocfs2_dx_root_block *dx_root)
+{
+	errcode_t ret = 0;
+	struct trailer_ctxt ctxt;
+
+	if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) {
+		ret = OCFS2_ET_INODE_NOT_VALID;
+		goto out;
+	}
+
+	ctxt.di = di;
+	ctxt.dx_root = dx_root;
+
+	ret = ocfs2_block_iterate_inode(fs, di,
+			0, dir_trailer_func, &ctxt);
+out:
+	return ret;
+}
+
+static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
+					struct ocfs2_dx_hinfo *hinfo,
+					uint64_t dirent_blk)
+{
+	int i;
+	struct ocfs2_dx_entry *dx_entry;
+
+	i = entry_list->de_num_used;
+	dx_entry = &entry_list->de_entries[i];
+
+	memset(dx_entry, 0, sizeof(struct ocfs2_dx_entry));
+	dx_entry->dx_major_hash = hinfo->major_hash;
+	dx_entry->dx_minor_hash = hinfo->minor_hash;
+	dx_entry->dx_dirent_blk = dirent_blk;
+
+	entry_list->de_num_used += 1;
+}
+
+struct dx_insert_ctxt {
+	uint64_t dir_blkno;
+	uint64_t dx_root_blkno;
+	ocfs2_filesys *fs;
+};
+
+
+inline static int ocfs2_inline_dx_has_space(struct ocfs2_dx_root_block *dx_root)
+{
+	struct ocfs2_dx_entry_list *entry_list;
+
+	entry_list = &dx_root->dr_entries;
+
+	if (entry_list->de_num_used >= entry_list->de_count)
+		return 0;
+
+	return 1;
+}
+
+static struct ocfs2_dx_leaf **ocfs2_dx_dir_alloc_leaves(ocfs2_filesys *fs,
+					int *ret_num_leaves)
+{
+	errcode_t num_dx_leaves = ocfs2_clusters_to_blocks(fs, 1);
+	char **dx_leaves_buf = NULL;
+
+	dx_leaves_buf = calloc(num_dx_leaves, sizeof (void *));
+	if (dx_leaves_buf && ret_num_leaves)
+		*ret_num_leaves = num_dx_leaves;
+
+	return (struct ocfs2_dx_leaf **)dx_leaves_buf;
+}
+
+static errcode_t ocfs2_dx_dir_format_cluster(ocfs2_filesys *fs,
+				struct ocfs2_dx_leaf  **dx_leaves,
+				int num_dx_leaves,
+				uint64_t start_blk)
+{
+	errcode_t ret;
+	int i;
+	struct ocfs2_dx_leaf *dx_leaf;
+	char *blk;
+
+	for (i = 0; i < num_dx_leaves; i++) {
+		ret = ocfs2_malloc_block(fs->fs_io, &blk);
+		if (ret)
+			goto out;
+
+		dx_leaves[i] = (struct ocfs2_dx_leaf *)blk;
+		dx_leaf = (struct ocfs2_dx_leaf *)blk;
+
+		memset(dx_leaf, 0, fs->fs_blocksize);
+		strcpy((char *)dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE);
+		dx_leaf->dl_fs_generation = fs->fs_super->i_fs_generation;
+		dx_leaf->dl_blkno = start_blk + i;
+		dx_leaf->dl_list.de_count = ocfs2_dx_entries_per_leaf(fs->fs_blocksize);
+
+		ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf);
+		if (ret)
+			goto out;
+	}
+	ret = 0;
+out:
+	return ret;
+}
+
+static inline unsigned int __ocfs2_dx_dir_hash_idx(ocfs2_filesys *fs,
+						uint32_t minor_hash)
+{
+	unsigned int cbits, bbits, dx_mask;
+
+	cbits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits;
+	bbits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits;
+	dx_mask = (1 << (cbits - bbits)) -1;
+
+	return (minor_hash & dx_mask);
+}
+
+static inline unsigned int ocfs2_dx_dir_hash_idx(ocfs2_filesys *fs,
+					struct ocfs2_dx_hinfo *hinfo)
+{
+	return __ocfs2_dx_dir_hash_idx(fs, hinfo->minor_hash);
+}
+
+static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
+				struct ocfs2_dx_entry *dx_new_entry)
+{
+	int i;
+
+	i = dx_leaf->dl_list.de_num_used;
+	dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
+
+	dx_leaf->dl_list.de_num_used += 1;
+}
+
+static errcode_t ocfs2_expand_inline_dx_root(ocfs2_filesys *fs,
+					struct ocfs2_dx_root_block *dx_root)
+{
+	errcode_t ret;
+	int num_dx_leaves, i, j;
+	uint64_t start_blkno = 0;
+	uint32_t clusters_found = 0;
+	struct ocfs2_dx_leaf **dx_leaves = NULL;
+	struct ocfs2_dx_leaf *target_leaf;
+	struct ocfs2_dx_entry_list *entry_list;
+	struct ocfs2_extent_tree et;
+	struct ocfs2_dx_entry *dx_entry;
+
+	dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, &num_dx_leaves);
+	if (!dx_leaves) {
+		ret = OCFS2_ET_NO_MEMORY;
+		goto out;
+	}
+
+	ret = ocfs2_new_clusters(fs, 1, 1, &start_blkno, &clusters_found);
+	if (ret)
+		goto out;
+	assert(clusters_found == 1);
+	ret = ocfs2_dx_dir_format_cluster(fs, dx_leaves,
+				num_dx_leaves, start_blkno);
+	if (ret)
+		goto out;
+
+	/*
+	 * Transfer the entries from inline dx_root into the appropriate
+	 * block
+	 */
+	entry_list = &dx_root->dr_entries;
+
+	for (i = 0; i < entry_list->de_num_used; i++) {
+		dx_entry = &entry_list->de_entries[i];
+		j = __ocfs2_dx_dir_hash_idx(fs, dx_entry->dx_minor_hash);
+		target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j];
+		ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
+	}
+
+	/*
+	 * Write out all leaves.
+	 * If ocfs2_write_dx_leaf() failed, since dx_root is not cleared
+	 * yet, and the leaves are not inserted into indexed tree yet,
+	 * this cluster will be recoganized as orphan in blocks scan of
+	 * fsck.ocfs2
+	 */
+	for (i = 0; i < num_dx_leaves; i ++) {
+		target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[i];
+		ret = ocfs2_write_dx_leaf(fs, target_leaf->dl_blkno,
+					  target_leaf);
+		if (ret)
+			goto out;
+	}
+
+	dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
+	memset(&dx_root->dr_list, 0, fs->fs_blocksize -
+		offsetof(struct ocfs2_dx_root_block, dr_list));
+	dx_root->dr_list.l_count =
+		ocfs2_extent_recs_per_dx_root(fs->fs_blocksize);
+
+	/* This should never fail considering we start with an empty
+	 * dx_root */
+	ocfs2_init_dx_root_extent_tree(&et, fs, (char *)dx_root, dx_root->dr_blkno);
+	ret = ocfs2_tree_insert_extent(fs, &et, 0, start_blkno, 1, 0);
+	if (ret)
+		goto out;
+
+out:
+	return ret;
+}
+
+static errcode_t ocfs2_dx_dir_lookup_rec(ocfs2_filesys *fs,
+		struct ocfs2_dx_root_block *dx_root,
+		struct ocfs2_extent_list *el,
+		uint32_t major_hash,
+		uint32_t *ret_cpos,
+		uint64_t *ret_phys_blkno,
+		unsigned int *ret_clen)
+{
+	errcode_t ret = 0;
+	int i, found;
+	struct ocfs2_extent_block *eb;
+	struct ocfs2_extent_rec *rec = NULL;
+	char *eb_buf = NULL;
+
+	if (el->l_tree_depth) {
+		ret = ocfs2_tree_find_leaf(fs,
+					&dx_root->dr_list,
+					dx_root->dr_blkno,
+					(char *)dx_root,
+					major_hash, &eb_buf);
+		if (ret)
+			goto out;
+
+		eb = (struct ocfs2_extent_block *)eb_buf;
+		el = &eb->h_list;
+
+		if (el->l_tree_depth) {
+			ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+			goto out;
+		}
+	}
+
+	found = 0;
+	for (i = el->l_next_free_rec - 1; i >= 0; i--) {
+		rec = &el->l_recs[i];
+		if (rec->e_cpos <= major_hash) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+		goto out;
+	}
+
+	if (ret_phys_blkno)
+		*ret_phys_blkno = rec->e_blkno;
+	if (ret_cpos)
+		*ret_cpos = rec->e_cpos;
+	if (ret_clen)
+		*ret_clen = rec->e_leaf_clusters;
+
+out:
+	if (eb_buf)
+		ocfs2_free(&eb_buf);
+	return ret;
+}
+
+errcode_t ocfs2_dx_dir_lookup(ocfs2_filesys *fs,
+			struct ocfs2_dx_root_block *dx_root,
+			struct ocfs2_extent_list *el,
+			struct ocfs2_dx_hinfo *hinfo,
+			uint32_t *ret_cpos,
+			uint64_t *ret_phys_blkno)
+{
+	errcode_t ret = 0;
+	unsigned int cend = 0, clen = 0;
+	uint32_t cpos = 0;
+	uint64_t blkno = 0;
+	uint32_t name_hash = hinfo->major_hash;
+
+	ret = ocfs2_dx_dir_lookup_rec(fs, dx_root, el,
+			name_hash, &cpos, &blkno, &clen);
+	if (ret)
+		goto out;
+	cend = cpos + clen;
+	if (name_hash >= cend) {
+		blkno += ocfs2_clusters_to_blocks(fs, clen - 1);
+		cpos += clen - 1;
+	} else {
+		blkno += ocfs2_clusters_to_blocks(fs, name_hash - cpos);
+		cpos = name_hash;
+	}
+
+	blkno += ocfs2_dx_dir_hash_idx(fs, hinfo);
+
+	if (ret_phys_blkno)
+		*ret_phys_blkno = blkno;
+	if (ret_cpos)
+		*ret_cpos = cpos;
+
+out:
+	return ret;
+}
+
+static int dx_leaf_sort_cmp(const void *a, const void *b)
+{
+	const struct ocfs2_dx_entry *e1 = a;
+	const struct ocfs2_dx_entry *e2 = b;
+	uint32_t major_hash1 = e1->dx_major_hash;
+	uint32_t major_hash2 = e2->dx_major_hash;
+	uint32_t minor_hash1 = e1->dx_minor_hash;
+	uint32_t minor_hash2 = e2->dx_minor_hash;
+
+	if (major_hash1 > major_hash2)
+		return 1;
+	if (major_hash1 < major_hash2)
+		return -1;
+
+	/* it is not strictly necessary to sort by minor */
+	if (minor_hash1 > minor_hash2)
+		return 1;
+	if (minor_hash1 < minor_hash2)
+		return -1;
+	return 0;
+}
+
+static void dx_leaf_sort_swap(void *a, void *b, int size)
+{
+	struct ocfs2_dx_entry *e1 = a;
+	struct ocfs2_dx_entry *e2 = b;
+	struct ocfs2_dx_entry tmp;
+
+	assert(size == sizeof (struct ocfs2_dx_entry));
+
+	tmp = *e1;
+	*e1 = *e2;
+	*e2 = tmp;
+}
+
+static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
+{
+	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
+	int i, num = dl_list->de_num_used;
+
+	for (i = 0; i < (num - 1); i++) {
+		if (dl_list->de_entries[i].dx_major_hash !=
+		    dl_list->de_entries[i + 1].dx_major_hash)
+			return 0;
+	}
+	return 1;
+}
+
+/*
+ * Find the optimal value to split this leaf on. This expects the leaf
+ * entries to be in sorted order.
+ *
+ * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is
+ * the hash we want to insert.
+ *
+ * This function is only concerned with the major hash - that which
+ * determines which cluster an item belongs to.
+ */
+static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf,
+					uint32_t leaf_cpos,
+					uint32_t insert_hash,
+					uint32_t *split_hash)
+{
+	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
+	int i, num_used = dl_list->de_num_used;
+	int allsame;
+
+	/*
+	 * There's a couple rare, but nasty corner cases we have to
+	 * check for here. All of them involve a leaf where all value
+	 * have the same hash, which is what we look for first.
+	 *
+	 * Most of the time, all of the above is false, and we simply
+	 * pick the median value for a split.
+	 */
+	allsame = ocfs2_dx_leaf_same_major(dx_leaf);
+	if (allsame) {
+		uint32_t val = dl_list->de_entries[0].dx_major_hash;
+		if (val == insert_hash) {
+			/*
+			 * No matter where we would choose to split,
+			 * the new entry would want to occupy the same
+			 * block as these. Since there's no space left
+			 * in their existing block, we know there
+			 * won't be space after the split.
+			 */
+			return OCFS2_ET_DIR_NO_SPACE;
+		}
+
+		if (val == leaf_cpos) {
+			/*
+			 * Because val is the same as leaf_cpos (which
+			 * is the smallest value this leaf can have),
+			 * yet is not equal to insert_hash, then we
+			 * know that insert_hash *must* be larger than
+			 * val (and leaf_cpos). At least cpos+1 in value.
+			 *
+			 * We also know then, that there cannot be an
+			 * adjacent extent (otherwise we'd be looking
+			 * at it). Choosing this value gives us a
+			 * chance to get some continguousness.
+			 */
+			*split_hash = leaf_cpos + 1;
+			return 0;
+		}
+
+		if (val > insert_hash) {
+			/*
+			 * val can not be the same as insert_hash, and
+			 * also must be larger than leaf_cpos. Also,
+			 * we know that there can't be a leaf between
+			 * cpos and val, otherwise the entries with
+			 * hash 'val' would be there.
+			 */
+			*split_hash = val;
+			return 0;
+		}
+
+		*split_hash = insert_hash;
+		return 0;
+	}
+
+	/*
+	 * Since the records are sorted and the checks above
+	 * guaranteed that not all records in this block are the same,
+	 * we simple travel forward, from the median, and pick the 1st
+	 * record whose value is larger than leaf_cpos.
+	 */
+	for (i = (num_used /2); i < num_used; i++) {
+		if (dl_list->de_entries[i].dx_major_hash > leaf_cpos)
+			break;
+	}
+
+	assert(i < num_used); /* Should be impossible */
+	*split_hash = dl_list->de_entries[i].dx_major_hash;
+	return 0;
+}
+
+static errcode_t ocfs2_read_dx_leaves(ocfs2_filesys *fs,
+				uint64_t start,
+				int num,
+				struct ocfs2_dx_leaf **dx_leaves)
+{
+	errcode_t ret;
+	int i;
+	struct ocfs2_dx_leaf *dx_leaf;
+	for (i = 0; i < num; i++) {
+		assert(!dx_leaves[i]);
+		ret = ocfs2_malloc_block(fs->fs_io, (char **)&dx_leaf);
+		if (ret)
+			goto bail;
+		ret = ocfs2_read_dx_leaf(fs, start + i, (char *)dx_leaf);
+		if (ret)
+			goto bail;
+		dx_leaves[i] = dx_leaf;
+	}
+	goto out;
+
+bail:
+	for (; i >= 0; i--) {
+		if (dx_leaves[i])
+			ocfs2_free(&dx_leaves[i]);
+	}
+out:
+	return ret;
+}
+
+static errcode_t __ocfs2_dx_dir_new_cluster(ocfs2_filesys *fs,
+					uint32_t cpos,
+					struct ocfs2_dx_leaf **dx_leaves,
+					int num_dx_leaves,
+					uint64_t *ret_phys_blkno)
+{
+	errcode_t ret;
+	uint32_t num;
+	uint64_t phys;
+
+	ret = ocfs2_new_clusters(fs, 1, 1, &phys, &num);
+	if (ret)
+		goto out;
+	assert(num == 1);
+	ret = ocfs2_dx_dir_format_cluster(fs, dx_leaves,
+				num_dx_leaves, phys);
+	if (ret)
+		goto out;
+
+	*ret_phys_blkno = phys;
+
+out:
+	return ret;
+}
+
+static errcode_t ocfs2_dx_dir_new_cluster(ocfs2_filesys *fs,
+				struct ocfs2_extent_tree *et,
+				uint32_t cpos,
+				uint64_t *phys_blocknr,
+				struct ocfs2_dx_leaf **dx_leaves,
+				int num_dx_leaves)
+{
+	errcode_t ret;
+	uint64_t blkno;
+	ret = __ocfs2_dx_dir_new_cluster(fs, cpos, dx_leaves,
+					num_dx_leaves, &blkno);
+	 if (ret)
+		 goto out;
+
+	 *phys_blocknr = blkno;
+	 ret = ocfs2_tree_insert_extent(fs, et, cpos, blkno, 1, 0);
+
+out:
+	 return ret;
+}
+
+
+static errcode_t ocfs2_dx_dir_transfer_leaf(ocfs2_filesys *fs,
+				uint32_t split_hash,
+				struct ocfs2_dx_leaf *tmp_dx_leaf,
+				struct ocfs2_dx_leaf **orig_dx_leaves,
+				uint64_t orig_dx_leaves_blkno,
+				struct ocfs2_dx_leaf **new_dx_leaves,
+				uint64_t new_dx_leaves_blkno,
+				int num_dx_leaves)
+{
+	errcode_t ret;
+	int i, j, num_used;
+	uint32_t major_hash;
+	struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
+	struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
+	struct ocfs2_dx_entry *dx_entry;
+
+	tmp_list = &tmp_dx_leaf->dl_list;
+
+	for (i = 0; i < num_dx_leaves; i++) {
+		orig_dx_leaf = orig_dx_leaves[i];
+		orig_list = &orig_dx_leaf->dl_list;
+		new_dx_leaf = new_dx_leaves[i];
+		new_list = &new_dx_leaf->dl_list;
+
+		num_used = orig_list->de_num_used;
+
+		memcpy(tmp_dx_leaf, orig_dx_leaf, fs->fs_blocksize);
+		tmp_list->de_num_used = 0;
+		memset(&tmp_list->de_entries, 0,
+				sizeof(struct ocfs2_dx_entry) * num_used);
+
+		for (j = 0; j < num_used; j++) {
+			dx_entry = &orig_list->de_entries[j];
+			major_hash = dx_entry->dx_major_hash;
+			if (major_hash >= split_hash)
+				ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf,
+								dx_entry);
+			else
+				ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf,
+								dx_entry);
+		}
+		memcpy(orig_dx_leaf, tmp_dx_leaf, fs->fs_blocksize);
+
+		ret = ocfs2_write_dx_leaf(fs, orig_dx_leaves_blkno + i,
+						(char *)orig_dx_leaf);
+		if (ret)
+			goto out;
+		ret = ocfs2_write_dx_leaf(fs, new_dx_leaves_blkno + i,
+						(char *)new_dx_leaf);
+		if (ret)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int ocfs2_dx_dir_free_leaves(ocfs2_filesys *fs,
+				struct ocfs2_dx_leaf **dx_leaves)
+{
+	int i, num;
+
+	num = ocfs2_clusters_to_blocks(fs, 1);
+	for (i = 0; i < num; i++) {
+		if (dx_leaves[i])
+			ocfs2_free(&dx_leaves[i]);
+	}
+	free(dx_leaves);
+	return 0;
+}
+
+/* from Linux kernel lib/sort.c */
+static void ocfs2_sort(void *base, size_t num, size_t size,
+			int (*cmp_func)(const void *, const void *),
+			void (*swap_func)(void *, void *, int size))
+{
+	/* pre-scale counters for performance */
+	int i = (num/2 - 1) * size, n = num * size, c, r;
+
+	/* heapify */
+	for (; i >= 0; i -= size) {
+		for (r = i; r * 2 + size < n; r = c) {
+			c = r * 2 + size;
+			if (c < n - size &&
+			    cmp_func(base + c, base + c + size) < 0)
+				c += size;
+			if (cmp_func(base + r, base + c) >= 0)
+				break;
+			swap_func(base + r, base + c, size);
+		}
+	}
+
+	/* sort */
+	for (i = n - size; i > 0; i -= size) {
+		swap_func(base, base + i, size);
+		for (r = 0; r * 2 + size < i; r = c) {
+			c = r * 2 + size;
+			if (c < i - size &&
+			    cmp_func(base + c, base + c + size) < 0)
+				c += size;
+			if (cmp_func(base + r, base + c) >= 0)
+				break;
+			swap_func(base + r, base + c, size);
+		}
+	}
+
+}
+
+static errcode_t ocfs2_dx_dir_rebalance(ocfs2_filesys *fs,
+			struct ocfs2_dx_root_block *dx_root,
+			struct ocfs2_dx_leaf *dx_leaf,
+			struct ocfs2_dx_hinfo *hinfo,
+			uint32_t leaf_cpos,
+			uint64_t leaf_blkno)
+{
+	struct ocfs2_extent_tree et;
+	struct ocfs2_dx_leaf **orig_dx_leaves = NULL;
+	struct ocfs2_dx_leaf **new_dx_leaves = NULL;
+	struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
+	uint32_t insert_hash = hinfo->major_hash;
+	uint32_t split_hash, cpos;
+	uint64_t orig_leaves_start, new_leaves_start;
+	errcode_t ret;
+	int num_used, num_dx_leaves;
+
+	ocfs2_init_dx_root_extent_tree(&et, fs, (char *)dx_root, dx_root->dr_blkno);
+
+	if (dx_root->dr_clusters == UINT_MAX) {
+		ret = OCFS2_ET_DIR_NO_SPACE;
+		goto out;
+	}
+
+	num_used = dx_leaf->dl_list.de_num_used;
+	if (num_used < dx_leaf->dl_list.de_count) {
+		ret = OCFS2_ET_DX_BALANCE_EMPTY_LEAF;
+		goto out;
+	}
+
+	orig_dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, &num_dx_leaves);
+	if (!orig_dx_leaves) {
+		ret = OCFS2_ET_NO_MEMORY;
+		goto out;
+	}
+
+	new_dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, NULL);
+	if (!new_dx_leaves) {
+		ret = OCFS2_ET_NO_MEMORY;
+		goto out;
+	}
+
+	ocfs2_sort(dx_leaf->dl_list.de_entries, num_used,
+		sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
+		dx_leaf_sort_swap);
+
+	ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos,
+				insert_hash, &split_hash);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_malloc_block(fs->fs_io, (char **)(&tmp_dx_leaf));
+	if (ret)
+		goto out;
+	orig_leaves_start = ocfs2_blocks_to_clusters(fs, leaf_blkno);
+	ret = ocfs2_read_dx_leaves(fs, orig_leaves_start, num_dx_leaves,
+					orig_dx_leaves);
+	if (ret)
+		goto out;
+
+	cpos = split_hash;
+	ret = ocfs2_dx_dir_new_cluster(fs, &et, cpos, &new_leaves_start,
+					new_dx_leaves, num_dx_leaves);
+	if (ret)
+		goto out;
+	ret = ocfs2_dx_dir_transfer_leaf(fs, split_hash, tmp_dx_leaf,
+				orig_dx_leaves, orig_leaves_start,
+				new_dx_leaves, new_leaves_start,
+				num_dx_leaves);
+
+out:
+	if (tmp_dx_leaf)
+		ocfs2_free((char **)(&tmp_dx_leaf));
+
+	if (orig_dx_leaves)
+		ocfs2_dx_dir_free_leaves(fs, orig_dx_leaves);
+	if (new_dx_leaves)
+		ocfs2_dx_dir_free_leaves(fs, new_dx_leaves);
+	return ret;
+}
+
+static errcode_t ocfs2_find_dir_space_dx(ocfs2_filesys *fs,
+			struct ocfs2_dx_root_block *dx_root,
+			const char *name, int namelen,
+			struct ocfs2_dir_lookup_result *lookup)
+{
+	errcode_t ret;
+	int rebalanced = 0;
+	struct ocfs2_dx_leaf *dx_leaf;
+	char *dx_leaf_buf = NULL;
+	uint64_t blkno;
+	uint32_t leaf_cpos;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf);
+	if (ret)
+		goto out;
+
+restart_search:
+	ret = ocfs2_dx_dir_lookup(fs, dx_root, &dx_root->dr_list,
+		      		&lookup->dl_hinfo, &leaf_cpos, &blkno);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_dx_leaf(fs, blkno, dx_leaf_buf);
+	if (ret)
+		goto out;
+	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf;
+	if (dx_leaf->dl_list.de_num_used >= dx_leaf->dl_list.de_count) {
+		if (rebalanced) {
+			/*
+			 * Rebalancing should have provided us with
+			 * space in an appropriate leaf.
+			 */
+			ret = OCFS2_ET_DIR_NO_SPACE;
+			goto out;
+		}
+
+		ret = ocfs2_dx_dir_rebalance(fs, dx_root, dx_leaf,
+					&lookup->dl_hinfo, leaf_cpos, blkno);
+		if (ret)
+			goto out;
+		rebalanced = 1;
+		goto restart_search;
+	}
+	lookup->dl_dx_leaf_blkno = blkno;
+
+out:
+	if (dx_leaf_buf)
+		ocfs2_free(&dx_leaf_buf);
+	return ret;
+}
+
+/*
+ * Hashing code adapted from ext3
+ */
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(uint32_t buf[4], uint32_t const in[])
+{
+	uint32_t sum = 0;
+	uint32_t b0 = buf[0], b1 = buf[1];
+	uint32_t a = in[0], b = in[1], c = in[2], d = in[3];
+	int n = 16;
+
+	do {
+		sum += DELTA;
+		b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+		b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+	} while (--n);
+
+	buf[0] += b0;
+	buf[1] += b1;
+}
+
+static void str2hashbuf(const char *msg, int len, uint32_t *buf, int num)
+{
+	uint32_t pad, val;
+	int i;
+
+	pad = (uint32_t)len | ((uint32_t)len << 8);
+	pad |= pad << 16;
+
+	val = pad;
+	if (len > (num * 4))
+		len = num * 4;
+	for (i = 0; i < len; i++) {
+		if ((i % 4) == 0)
+			val = pad;
+		val = msg[i] + (val << 8);
+		if ((i % 4) == 3) {
+			*buf++ = val;
+			val = pad;
+			num --;
+		}
+	}
+	if (--num >= 0)
+		*buf++ = val;
+	while(--num >= 0)
+		*buf++ = pad;
+}
+
+void ocfs2_dx_dir_name_hash(ocfs2_filesys *fs,
+				const char *name,
+				int len,
+				struct ocfs2_dx_hinfo *hinfo)
+{
+	const char *p;
+	uint32_t in[8], buf[4];
+
+	/*
+	 * XXX: Is this really necessary, if the index is never looked
+	 * at by readdir? Is a hash value of '0' a bad idea ?
+	 */
+	if ((len == 1 && !strncmp(".", name, 1)) ||
+	    (len == 2 && !strncmp("..", name, 2))) {
+		buf[0] = buf[1] = 0;
+		goto out;
+	}
+
+	memcpy(buf, OCFS2_RAW_SB(fs->fs_super)->s_dx_seed, sizeof(buf));
+
+	p = name;
+	while(len > 0) {
+		str2hashbuf(p, len, in, 4);
+		TEA_transform(buf, in);
+		len -= 16;
+		p += 16;
+	}
+
+out:
+	hinfo->major_hash = buf[0];
+	hinfo->minor_hash = buf[1];
+}
+
+static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry,
+				uint64_t blocknr,
+				int offset,
+				int blocksize,
+				char *buf,
+				void *priv_data)
+{
+	errcode_t ret = 0;
+	char *dx_buf = NULL;
+	char *dx_leaf_buf = NULL;
+	struct ocfs2_dx_root_block *dx_root = NULL;
+	struct ocfs2_dx_leaf *dx_leaf = NULL;
+	struct ocfs2_dir_lookup_result lookup;
+	struct ocfs2_dx_entry_list *entry_list;
+	struct dx_insert_ctxt *ctxt = (struct dx_insert_ctxt *)priv_data;
+	ocfs2_filesys *fs = ctxt->fs;
+	uint64_t dx_root_blkno = ctxt->dx_root_blkno;
+	int write_dx_leaf = 0;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_read_dx_root(fs, dx_root_blkno, dx_buf);
+	if (ret)
+		goto out;
+
+	dx_root = (struct ocfs2_dx_root_block *)dx_buf;
+	memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result));
+	ocfs2_dx_dir_name_hash(fs, dentry->name,
+				dentry->name_len, &lookup.dl_hinfo);
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) {
+		if (ocfs2_inline_dx_has_space(dx_root)) {
+			entry_list = &dx_root->dr_entries;
+			goto insert_into_entries;
+		} else {
+			/* root block is full, expand it to an extent */
+			ret = ocfs2_expand_inline_dx_root(fs, dx_root);
+			if (ret)
+				goto out;
+		}
+	}
+
+	ret = ocfs2_find_dir_space_dx(fs, dx_root,
+				dentry->name, dentry->name_len, &lookup);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_dx_leaf(fs, lookup.dl_dx_leaf_blkno, dx_leaf_buf);
+	if (ret)
+		goto out;
+	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf;
+	entry_list = &dx_leaf->dl_list;
+	write_dx_leaf = 1;
+
+insert_into_entries:
+	ocfs2_dx_entry_list_insert(entry_list, &lookup.dl_hinfo, blocknr);
+	if (write_dx_leaf) {
+		ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf);
+		if (ret)
+			goto out;
+	}
+	dx_root->dr_num_entries += 1;
+	ret = ocfs2_write_dx_root(fs, dx_root_blkno, dx_buf);
+out:
+	if (dx_leaf_buf)
+		ocfs2_free(&dx_leaf_buf);
+	if (dx_buf)
+		ocfs2_free(&dx_buf);
+	return ret;
+}
+
+errcode_t ocfs2_dx_dir_insert_entry(ocfs2_filesys *fs, uint64_t dir, const char *name,
+				uint64_t ino, uint64_t blkno)
+{
+	struct ocfs2_dir_entry dummy_de;
+	struct dx_insert_ctxt dummy_ctxt;
+	char *di_buf = NULL;
+	struct ocfs2_dinode *di;
+	errcode_t ret = 0;
+
+	if (!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)))
+		goto out;
+
+	assert(name);
+	memset(&dummy_de, 0, sizeof(struct ocfs2_dir_entry));
+	memcpy(dummy_de.name, name, strlen(name));
+	dummy_de.name_len = strlen(name);
+
+	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+	di = (struct ocfs2_dinode *)di_buf;
+
+	if (!(di->i_dyn_features & OCFS2_INDEXED_DIR_FL))
+		goto out;
+
+	memset(&dummy_ctxt, 0, sizeof(struct dx_insert_ctxt));
+	dummy_ctxt.dir_blkno = dir;
+	dummy_ctxt.fs = fs;
+	dummy_ctxt.dx_root_blkno = di->i_dx_root;
+
+	ret = ocfs2_dx_dir_insert(&dummy_de, blkno, 0,
+			fs->fs_blocksize, NULL, &dummy_ctxt);
+out:
+	if (di_buf)
+		ocfs2_free(&di_buf);
+	return ret;
+}
+
+
+/*
+ * This function overwite the indexed dir attribute of
+ * the given inode. The caller should make sure the dir's
+ * indexed tree is truncated.
+ * Currently tunefs.ocfs2 is the only user, before calling
+ * this function, tunefs.ocfs2 makes sure there is space
+ * for directory trailer. So directory entry moves here.
+ */
+errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs,
+			uint64_t dir)
+{
+	errcode_t ret = 0, err;
+	uint64_t dr_blkno;
+	char *dx_buf = NULL, *di_buf = NULL;
+	struct ocfs2_dinode *di;
+	struct ocfs2_dx_root_block *dx_root;
+	struct dx_insert_ctxt ctxt;
+	ocfs2_quota_hash *usrhash = NULL, *grphash = NULL;
+	uint32_t uid, gid;
+	long long change;
+
+	ret = ocfs2_load_fs_quota_info(fs);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_init_quota_change(fs, &usrhash, &grphash);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+	di = (struct ocfs2_dinode *)di_buf;
+
+	if ((ocfs2_dir_indexed(di)) ||
+	    (di->i_dyn_features & OCFS2_INLINE_DATA_FL))
+		goto out;
+
+	ret = ocfs2_new_dx_root(fs, di, &dr_blkno);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf);
+	if (ret)
+		goto out;
+	dx_root = (struct ocfs2_dx_root_block *)dx_buf;
+
+	ret = ocfs2_init_dir_trailers(fs, di, dx_root);
+	if (ret)
+		goto out;
+
+	dx_root->dr_dir_blkno = di->i_blkno;
+	dx_root->dr_num_entries = 0;
+	dx_root->dr_entries.de_count = ocfs2_dx_entries_per_root(fs->fs_blocksize);
+
+	di->i_dx_root = dr_blkno;
+	di->i_dyn_features |= OCFS2_INDEXED_DIR_FL;
+
+	ret = ocfs2_write_dx_root(fs, dr_blkno, dx_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_write_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+
+	ctxt.dir_blkno = dir;
+	ctxt.dx_root_blkno = dr_blkno;
+	ctxt.fs = fs;
+	ret = ocfs2_dir_iterate(fs, dir, 0, NULL,
+				ocfs2_dx_dir_insert,  &ctxt);
+
+	/* check quota for dx_leaf */
+	ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+
+	change = ocfs2_clusters_to_bytes(fs,
+				dx_root->dr_clusters);
+	uid = di->i_uid;
+	gid = di->i_gid;
+
+	ret = ocfs2_apply_quota_change(fs, usrhash, grphash,
+					uid, gid, change, 0);
+	if (ret) {
+		/* exceed quota, truncate the indexed tree */
+		ret = ocfs2_dx_dir_truncate(fs, dir);
+	}
+
+out:
+	err = ocfs2_finish_quota_change(fs, usrhash, grphash);
+	if (!ret)
+		ret = err;
+
+	if (di_buf)
+		ocfs2_free(&di_buf);
+	if (dx_buf)
+		ocfs2_free(&dx_buf);
+
+	return ret;
+}
+
+void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
+				int index)
+{
+	int num_used = entry_list->de_num_used;
+	if (num_used == 1 || index == (num_used - 1))
+		goto clear;
+
+	memmove(&entry_list->de_entries[index],
+		&entry_list->de_entries[index + 1],
+		(num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
+clear:
+	num_used --;
+	memset(&entry_list->de_entries[num_used], 0,
+		sizeof(struct ocfs2_dx_entry));
+	entry_list->de_num_used = num_used;
+}
+
+static int ocfs2_match(int len,
+			const char *name,
+			struct ocfs2_dir_entry *de)
+{
+	if (len != de->name_len)
+		return 0;
+	if (!de->inode)
+		return 0;
+	return !memcmp((char *)name, de->name, len);
+}
+
+int ocfs2_check_dir_entry(ocfs2_filesys *fs,
+			struct ocfs2_dir_entry *de,
+			char *dir_buf,
+			unsigned int offset)
+{
+	int rlen = de->rec_len;
+	int ret = 1;
+
+	if ((rlen < OCFS2_DIR_REC_LEN(1)) ||
+	    (rlen % 4 != 0) ||
+	    (rlen < OCFS2_DIR_REC_LEN(de->name_len)) ||
+	    (((char *)de - dir_buf) > fs->fs_blocksize))
+		ret = 0;
+
+	return ret;
+}
+
+int ocfs2_search_dirblock(ocfs2_filesys *fs,
+				char *dir_buf,
+				const char *name,
+				int namelen,
+				unsigned int bytes,
+				struct ocfs2_dir_entry **res_dir)
+{
+	struct ocfs2_dir_entry *de;
+	char *dlimit, *de_buf;
+	int de_len, offset = 0;
+	int ret = 0;
+
+	de_buf = (char *)dir_buf;
+	dlimit = de_buf + bytes;
+
+	while(de_buf < dlimit) {
+		de = (struct ocfs2_dir_entry *)de_buf;
+
+		if ((de_buf + namelen <= dlimit) &&
+		     ocfs2_match(namelen, name, de)) {
+			if (!ocfs2_check_dir_entry(fs, de, dir_buf, offset)) {
+				ret = -1;
+				goto out;
+			}
+			if (res_dir)
+				*res_dir = de;
+			ret = 1;
+			goto out;
+		}
+
+		de_len = de->rec_len;
+		if (de_len <= 0) {
+			ret = -1;
+			goto out;
+		}
+		de_buf += de_len;
+		offset += de_len;
+	}
+out:
+	return ret;
+}
+
+errcode_t ocfs2_dx_dir_search(ocfs2_filesys *fs,
+			const char *name,
+			int namelen,
+			struct ocfs2_dx_root_block *dx_root,
+			struct ocfs2_dir_lookup_result *lookup)
+{
+	errcode_t ret;
+	char *di_buf = NULL, *dir_buf = NULL, *dx_leaf_buf = NULL;
+	struct ocfs2_dx_entry_list *entry_list;
+	struct ocfs2_dx_leaf *dx_leaf;
+	struct ocfs2_dx_entry *dx_entry;
+	struct ocfs2_dir_entry *dir_ent;
+	uint32_t leaf_cpos;
+	uint64_t blkno;
+	int i, found;
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
+		entry_list = &dx_root->dr_entries;
+	else {
+		ret = ocfs2_dx_dir_lookup(fs, dx_root, &dx_root->dr_list,
+				&lookup->dl_hinfo, &leaf_cpos, &blkno);
+		if (ret)
+			goto out;
+
+		ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf);
+		if (ret)
+			goto out;
+
+		ret = ocfs2_read_dx_leaf(fs, blkno, dx_leaf_buf);
+		if (ret)
+			goto out;
+		dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf;
+		entry_list = &dx_leaf->dl_list;
+	}
+
+	assert(entry_list->de_count > 0);
+	assert(entry_list->de_num_used > 0);
+	assert(dx_root->dr_num_entries > 0);
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dir_buf);
+	if (ret)
+		goto out;
+
+	found = 0;
+	for (i = 0; i < entry_list->de_num_used; i++) {
+		dx_entry = &entry_list->de_entries[i];
+		if ((lookup->dl_hinfo.major_hash != dx_entry->dx_major_hash) ||
+		    (lookup->dl_hinfo.minor_hash != dx_entry->dx_minor_hash))
+			continue;
+
+		ret = ocfs2_read_blocks(fs, dx_entry->dx_dirent_blk, 1, dir_buf);
+		if (ret)
+			goto out;
+
+		found = ocfs2_search_dirblock(fs, dir_buf, name, namelen,
+						fs->fs_blocksize, &dir_ent);
+		if (found == 1)
+			break;
+
+		if (found == -1) {
+			ret = OCFS2_ET_DIR_CORRUPTED;
+			goto out;
+		}
+	}
+
+	if (found <= 0) {
+		ret = OCFS2_ET_DIRENT_NOT_FOUND;
+		goto out;
+	}
+
+	lookup->dl_leaf = dir_buf;
+	lookup->dl_leaf_blkno = dx_entry->dx_dirent_blk;
+	lookup->dl_entry = dir_ent;
+	lookup->dl_dx_entry = dx_entry;
+	lookup->dl_dx_entry_idx = i;
+	if (!(dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)) {
+		lookup->dl_dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf;
+		lookup->dl_dx_leaf_blkno = blkno;
+	}
+	ret = 0;
+out:
+	if (di_buf)
+		ocfs2_free(&di_buf);
+	if (ret) {
+		if (dir_buf)
+			ocfs2_free(&dir_buf);
+		if (dx_leaf_buf)
+			ocfs2_free(&dx_leaf_buf);
+	}
+	return ret;
+}
+
+void release_lookup_res(struct ocfs2_dir_lookup_result *res)
+{
+	if (res->dl_leaf)
+		ocfs2_free(&res->dl_leaf);
+	if (res->dl_dx_leaf)
+		ocfs2_free(&res->dl_dx_leaf);
+}
+
+
diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c
index 1064d9f..8a3f5a9 100644
--- a/libocfs2/dir_iterate.c
+++ b/libocfs2/dir_iterate.c
@@ -68,6 +68,7 @@ errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs,
 			     int (*func)(uint64_t	dir,
 					 int		entry,
 					 struct ocfs2_dir_entry *dirent,
+					 uint64_t blocknr,
 					 int	offset,
 					 int	blocksize,
 					 char	*buf,
@@ -131,6 +132,7 @@ out:
 
 struct xlate {
 	int (*func)(struct ocfs2_dir_entry *dirent,
+		    uint64_t	blocknr,
 		    int		offset,
 		    int		blocksize,
 		    char	*buf,
@@ -140,12 +142,12 @@ struct xlate {
 
 static int xlate_func(uint64_t dir,
 		      int entry,
-		      struct ocfs2_dir_entry *dirent, int offset,
-		      int blocksize, char *buf, void *priv_data)
+		      struct ocfs2_dir_entry *dirent, uint64_t blocknr,
+		      int offset, int blocksize, char *buf, void *priv_data)
 {
 	struct xlate *xl = (struct xlate *) priv_data;
 
-	return (*xl->func)(dirent, offset, blocksize, buf, xl->real_private);
+	return (*xl->func)(dirent, blocknr, offset, blocksize, buf, xl->real_private);
 }
 
 extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, 
@@ -153,6 +155,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs,
 				   int flags,
 				   char *block_buf,
 				   int (*func)(struct ocfs2_dir_entry *dirent,
+					       uint64_t blocknr,
 					       int	offset,
 					       int	blocksize,
 					       char	*buf,
@@ -169,6 +172,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs,
 }
 
 static int ocfs2_process_dir_entry(ocfs2_filesys *fs,
+				   uint64_t blocknr,
 				   unsigned int offset,
 				   int entry,
 				   int *changed,
@@ -203,7 +207,7 @@ static int ocfs2_process_dir_entry(ocfs2_filesys *fs,
 		ret = (ctx->func)(ctx->dir,
 				  (next_real_entry > offset) ?
 				  OCFS2_DIRENT_DELETED_FILE : entry,
-				  dirent, offset,
+				  dirent, blocknr, offset,
 				  fs->fs_blocksize, ctx->buf,
 				  ctx->priv_data);
 		if (entry < OCFS2_DIRENT_OTHER_FILE)
@@ -250,7 +254,7 @@ static int ocfs2_inline_dir_iterate(ocfs2_filesys *fs,
 
 	entry = OCFS2_DIRENT_DOT_FILE;
 
-	ret = ocfs2_process_dir_entry(fs, offset, entry, &changed,
+	ret = ocfs2_process_dir_entry(fs, di->i_blkno, offset, entry, &changed,
 				      &do_abort, ctx);
 	if (ret)
 		return ret;
@@ -291,7 +295,7 @@ int ocfs2_process_dir_block(ocfs2_filesys *fs,
 	if (ctx->errcode)
 		return OCFS2_BLOCK_ABORT;
 
-	ret = ocfs2_process_dir_entry(fs, offset, entry, &changed,
+	ret = ocfs2_process_dir_entry(fs, blocknr, offset, entry, &changed,
 				      &do_abort, ctx);
 	if (ret)
 		return ret;
diff --git a/libocfs2/dir_iterate.h b/libocfs2/dir_iterate.h
index 2d9587b..7f44702 100644
--- a/libocfs2/dir_iterate.h
+++ b/libocfs2/dir_iterate.h
@@ -35,6 +35,7 @@ struct dir_context {
 	int (*func)(uint64_t dir,
 		    int entry,
 		    struct ocfs2_dir_entry *dirent,
+		    uint64_t blocknr,
 		    int offset,
 		    int blocksize,
 		    char *buf,
diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c
index 06a1b64..c22d843 100644
--- a/libocfs2/dirblock.c
+++ b/libocfs2/dirblock.c
@@ -33,7 +33,6 @@
 #include "ocfs2/byteorder.h"
 #include "ocfs2/ocfs2.h"
 
-
 unsigned int ocfs2_dir_trailer_blk_off(ocfs2_filesys *fs)
 {
 	return fs->fs_blocksize - sizeof(struct ocfs2_dir_block_trailer);
@@ -79,6 +78,15 @@ int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 	return 1;
 }
 
+/*
+ * We are sure there is prepared space for the trailer, no directory
+ * entry will overlap with the trailer:
+ * - if we rebuild the indexed tree for a directory, no dir entry
+ *   will overwrite the trailer's space.
+ * - if we build the indexed tree by tunefs.ocfs2, it will enable
+ *   meta ecc feature before enable indexed dirs feature. Which
+ *   means space for each trailer is well prepared already.
+ */
 void ocfs2_init_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 			    uint64_t blkno, void *buf)
 {
@@ -105,9 +113,9 @@ static void ocfs2_swap_dir_entry(struct ocfs2_dir_entry *dirent)
 static errcode_t ocfs2_swap_dir_entries_direction(void *buf, uint64_t bytes,
 						  int to_cpu)
 {
-	char		*p, *end;
+	char *p, *end;
 	struct ocfs2_dir_entry *dirent;
-	unsigned int	name_len, rec_len;
+	unsigned int name_len, rec_len;
 	errcode_t retval = 0;
 
 	p = (char *) buf;
@@ -160,8 +168,8 @@ void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer)
 errcode_t ocfs2_read_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 			       uint64_t block, void *buf)
 {
-	errcode_t	retval;
-	int		end = fs->fs_blocksize;
+	errcode_t retval;
+	int end = fs->fs_blocksize;
 	struct ocfs2_dir_block_trailer *trailer = NULL;
 
 	retval = ocfs2_read_blocks(fs, block, 1, buf);
@@ -197,9 +205,9 @@ out:
 errcode_t ocfs2_write_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 				uint64_t block, void *inbuf)
 {
-	errcode_t	retval;
-	char		*buf = NULL;
-	int		end = fs->fs_blocksize;
+	errcode_t retval;
+	char *buf = NULL;
+	int end = fs->fs_blocksize;
 	struct ocfs2_dir_block_trailer *trailer = NULL;
 
 	retval = ocfs2_malloc_block(fs->fs_io, &buf);
@@ -230,28 +238,36 @@ out:
 	return retval;
 }
 
-static void ocfs2_swap_dx_entry_to_cpu(struct ocfs2_dx_entry *dx_entry)
+static void ocfs2_swap_dx_entry(struct ocfs2_dx_entry *dx_entry)
 {
-	if (cpu_is_little_endian)
-		return;
-
 	dx_entry->dx_major_hash		= bswap_32(dx_entry->dx_major_hash);
 	dx_entry->dx_minor_hash		= bswap_32(dx_entry->dx_minor_hash);
 	dx_entry->dx_dirent_blk		= bswap_64(dx_entry->dx_dirent_blk);
 }
 
-static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list)
+static void ocfs2_swap_dx_entry_list(struct ocfs2_dx_entry_list *dl_list)
 {
 	int i;
 
-	if (cpu_is_little_endian)
-		return;
-
 	dl_list->de_count	= bswap_16(dl_list->de_count);
 	dl_list->de_num_used	= bswap_16(dl_list->de_num_used);
 
 	for (i = 0; i < dl_list->de_count; i++)
-		ocfs2_swap_dx_entry_to_cpu(&dl_list->de_entries[i]);
+		ocfs2_swap_dx_entry(&dl_list->de_entries[i]);
+}
+
+static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list)
+{
+	if (cpu_is_little_endian)
+		return;
+	ocfs2_swap_dx_entry_list(dl_list);
+}
+
+static void ocfs2_swap_dx_entry_list_from_cpu(struct ocfs2_dx_entry_list *dl_list)
+{
+	if (cpu_is_little_endian)
+		return;
+	ocfs2_swap_dx_entry_list(dl_list);
 }
 
 static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs,
@@ -276,45 +292,124 @@ static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs,
 		ocfs2_swap_extent_list_to_cpu(fs, dx_root, &dx_root->dr_list);
 }
 
+static void ocfs2_swap_dx_root_from_cpu(ocfs2_filesys *fs,
+				struct ocfs2_dx_root_block *dx_root)
+{
+	if (cpu_is_little_endian)
+		return;
+
+	dx_root->dr_suballoc_slot	= bswap_16(dx_root->dr_suballoc_slot);
+	dx_root->dr_suballoc_bit	= bswap_16(dx_root->dr_suballoc_bit);
+	dx_root->dr_fs_generation	= bswap_32(dx_root->dr_fs_generation);
+	dx_root->dr_blkno		= bswap_64(dx_root->dr_blkno);
+	dx_root->dr_last_eb_blk		= bswap_64(dx_root->dr_last_eb_blk);
+	dx_root->dr_clusters		= bswap_32(dx_root->dr_clusters);
+	dx_root->dr_dir_blkno		= bswap_64(dx_root->dr_dir_blkno);
+	dx_root->dr_num_entries		= bswap_32(dx_root->dr_num_entries);
+	dx_root->dr_free_blk		= bswap_64(dx_root->dr_free_blk);
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
+		ocfs2_swap_dx_entry_list_from_cpu(&dx_root->dr_entries);
+	else
+		ocfs2_swap_extent_list_from_cpu(fs, dx_root, &dx_root->dr_list);
+}
+
+/* XXX: should use the errcode_t return value */
 errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block,
 			     void *buf)
 {
-	errcode_t	ret;
+	errcode_t ret;
 	struct ocfs2_dx_root_block *dx_root;
+	char *dx_root_buf = NULL;
 
-	ret = ocfs2_read_blocks(fs, block, 1, buf);
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf);
 	if (ret)
-		return ret;
+		goto out;
 
-	dx_root = (struct ocfs2_dx_root_block *)buf;
-	ret = ocfs2_validate_meta_ecc(fs, buf, &dx_root->dr_check);
+	ret = ocfs2_read_blocks(fs, block, 1, dx_root_buf);
 	if (ret)
-		return ret;
+		goto out;
+
+	dx_root = (struct ocfs2_dx_root_block *)dx_root_buf;
+	ret = ocfs2_validate_meta_ecc(fs, dx_root_buf, &dx_root->dr_check);
+	if (ret)
+		goto out;
 
 	if (memcmp(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE,
-		   strlen(OCFS2_DX_ROOT_SIGNATURE)))
-		return OCFS2_ET_DIR_CORRUPTED;
+		   strlen(OCFS2_DX_ROOT_SIGNATURE))) {
+		ret = OCFS2_ET_DIR_CORRUPTED;
+		goto out;
+	}
 
 	ocfs2_swap_dx_root_to_cpu(fs, dx_root);
+	memcpy(buf, dx_root_buf, fs->fs_blocksize);
+	ret = 0;
+out:
+	if (dx_root_buf)
+		ocfs2_free(&dx_root_buf);
+	return ret;
+}
 
-	return 0;
+errcode_t ocfs2_write_dx_root(ocfs2_filesys *fs, uint64_t block,
+				char *buf)
+{
+	errcode_t ret;
+	char *dx_root_buf = NULL;
+	struct ocfs2_dx_root_block *dx_root;
+
+	if (!(fs->fs_flags & OCFS2_FLAG_RW))
+		return OCFS2_ET_RO_FILESYS;
+
+	if ((block < OCFS2_SUPER_BLOCK_BLKNO) ||
+	    (block > fs->fs_blocks))
+		return OCFS2_ET_BAD_BLKNO;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf);
+	if (ret)
+		goto out;
+
+	memcpy(dx_root_buf, buf, fs->fs_blocksize);
+
+	dx_root = (struct ocfs2_dx_root_block *)dx_root_buf;
+	ocfs2_swap_dx_root_from_cpu(fs, dx_root);
+
+	ocfs2_compute_meta_ecc(fs, dx_root_buf, &dx_root->dr_check);
+	ret = io_write_block(fs->fs_io, block, 1, dx_root_buf);
+	if (!ret)
+		fs->fs_flags |= OCFS2_FLAG_CHANGED;
+
+out:
+	if (dx_root_buf)
+		ocfs2_free(&dx_root_buf);
+	return ret;
+}
+
+static void ocfs2_swap_dx_leaf(struct ocfs2_dx_leaf *dx_leaf)
+{
+	dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno);
+	dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation);
+
+	ocfs2_swap_dx_entry_list(&dx_leaf->dl_list);
 }
 
 static void ocfs2_swap_dx_leaf_to_cpu(struct ocfs2_dx_leaf *dx_leaf)
 {
 	if (cpu_is_little_endian)
 		return;
+	ocfs2_swap_dx_leaf(dx_leaf);
+}
 
-	dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno);
-	dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation);
-
-	ocfs2_swap_dx_entry_list_to_cpu(&dx_leaf->dl_list);
+static void ocfs2_swap_dx_leaf_from_cpu(struct ocfs2_dx_leaf *dx_leaf)
+{
+	if (cpu_is_little_endian)
+		return;
+	ocfs2_swap_dx_leaf(dx_leaf);
 }
 
 errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block,
 			     void *buf)
 {
-	errcode_t	ret;
+	errcode_t ret;
 	struct ocfs2_dx_leaf *dx_leaf;
 
 	ret = ocfs2_read_blocks(fs, block, 1, buf);
@@ -335,6 +430,42 @@ errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block,
 	return 0;
 }
 
+errcode_t ocfs2_write_dx_leaf(ocfs2_filesys *fs, uint64_t block,
+				void *buf)
+{
+	errcode_t ret;
+	char *dx_leaf_buf = NULL;
+	struct ocfs2_dx_leaf *dx_leaf;
+
+	if (!(fs->fs_flags & OCFS2_FLAG_RW))
+		return OCFS2_ET_RO_FILESYS;
+
+	if ((block < OCFS2_SUPER_BLOCK_BLKNO) ||
+	    (block > fs->fs_blocks))
+		return OCFS2_ET_BAD_BLKNO;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf);
+	if (ret)
+		goto out;
+
+	memcpy(dx_leaf_buf, buf, fs->fs_blocksize);
+	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf;
+	ocfs2_swap_dx_leaf_from_cpu(dx_leaf);
+
+	ocfs2_compute_meta_ecc(fs, dx_leaf_buf, &dx_leaf->dl_check);
+	ret = io_write_block(fs->fs_io, block, 1, dx_leaf_buf);
+
+	if (ret)
+		goto out;
+
+	fs->fs_flags |= OCFS2_FLAG_CHANGED;
+
+out:
+	if (dx_leaf_buf)
+		ocfs2_free(&dx_leaf_buf);
+	return ret;
+}
+
 int ocfs2_dir_indexed(struct ocfs2_dinode *di)
 {
 	if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL)
@@ -345,7 +476,7 @@ int ocfs2_dir_indexed(struct ocfs2_dinode *di)
 /*
  * Only use this when we already know the directory is indexed.
  */
-int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off)
+static int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off)
 {
 	if (de_off == ocfs2_dir_trailer_blk_off(fs))
 		return 1;
diff --git a/libocfs2/expanddir.c b/libocfs2/expanddir.c
index 6e9a192..ec05b74 100644
--- a/libocfs2/expanddir.c
+++ b/libocfs2/expanddir.c
@@ -69,6 +69,10 @@ errcode_t ocfs2_expand_dir(ocfs2_filesys *fs,
 	if (ocfs2_support_inline_data(OCFS2_RAW_SB(fs->fs_super)) &&
 	    cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL) {
 		ret = ocfs2_convert_inline_data_to_extents(cinode);
+		if ((ret == 0) &&
+		     ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super))) {
+			ret = ocfs2_dx_dir_build(fs, dir);
+		}
 		goto bail;
 	}
 
@@ -228,6 +232,18 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs,
 			goto bail;
 	}
 
+	/*
+	 * Only build indexed tree if the directory is initiated as non-inline.
+	 * Otherwise, the indexed tree will be build when convert the inlined
+	 * directory to extent in ocfs2_expand_dir()
+	 */
+	if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) &&
+	    (!cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL)) {
+		ret = ocfs2_dx_dir_build(fs, dir);
+		if (ret)
+			goto bail;
+	}
+
 	/* set link count of the parent */
 	ret = ocfs2_read_inode(fs, parent_dir, buf);
 	if (ret)
@@ -243,8 +259,6 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs,
 
 	/* update the inode */
 	ret = ocfs2_write_cached_inode(fs, cinode);
-	if (ret)
-		goto bail;
 
 bail:
 	if (buf)
diff --git a/libocfs2/extent_tree.c b/libocfs2/extent_tree.c
index fe1be7a..3cbdcf8 100644
--- a/libocfs2/extent_tree.c
+++ b/libocfs2/extent_tree.c
@@ -153,6 +153,49 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
 	.eo_fill_root_el	= ocfs2_xattr_value_fill_root_el,
 };
 
+static void ocfs2_dx_root_set_last_eb_blk (struct ocfs2_extent_tree *et,
+						uint64_t blkno)
+{
+	struct ocfs2_dx_root_block *dx_root = et->et_object;
+	dx_root->dr_last_eb_blk = blkno;
+}
+
+static uint64_t ocfs2_dx_root_get_last_eb_blk (struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_dx_root_block *dx_root = et->et_object;
+	return dx_root->dr_last_eb_blk;
+}
+
+static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
+					uint32_t clusters)
+{
+	struct ocfs2_dx_root_block *dx_root = et->et_object;
+	dx_root->dr_clusters += clusters;
+}
+
+static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_dx_root_block *dx_root = (struct ocfs2_dx_root_block *)et->et_object;
+	assert(OCFS2_IS_VALID_DX_ROOT(dx_root));
+
+	return 0;
+}
+
+static void ocfs2_dx_root_fill_root_el (struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+	et->et_root_el = &dx_root->dr_list;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
+	.eo_set_last_eb_blk	= ocfs2_dx_root_set_last_eb_blk,
+	.eo_get_last_eb_blk	= ocfs2_dx_root_get_last_eb_blk,
+	.eo_update_clusters	= ocfs2_dx_root_update_clusters,
+	.eo_sanity_check	= ocfs2_dx_root_sanity_check,
+	.eo_fill_root_el	= ocfs2_dx_root_fill_root_el,
+};
+
 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 				     ocfs2_filesys *fs,
 				     char *buf,
@@ -202,6 +245,15 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 				 xv, &ocfs2_xattr_value_et_ops);
 }
 
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+				    ocfs2_filesys *fs,
+				    char *buf, uint64_t blkno)
+{
+	__ocfs2_init_extent_tree(et, fs, buf, blkno,
+				ocfs2_write_dx_root,
+				buf, &ocfs2_dx_root_et_ops);
+}
+
 static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					    uint64_t new_last_eb_blk)
 {
@@ -4182,3 +4234,4 @@ out:
 	ocfs2_free_path(path);
 	return ret;
 }
+
diff --git a/libocfs2/extent_tree.h b/libocfs2/extent_tree.h
index b7513dd..3f9b0dc 100644
--- a/libocfs2/extent_tree.h
+++ b/libocfs2/extent_tree.h
@@ -123,6 +123,9 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 					char *buf, uint64_t blkno,
 					ocfs2_root_write_func write,
 					struct ocfs2_xattr_value_root *xv);
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+				    ocfs2_filesys *fs,
+				    char *buf, uint64_t blkno);
 errcode_t ocfs2_tree_insert_extent(ocfs2_filesys *fs,
 				   struct ocfs2_extent_tree *et,
 				   uint32_t cpos, uint64_t c_blkno,
@@ -135,6 +138,8 @@ int ocfs2_change_extent_flag(ocfs2_filesys *fs,
 int ocfs2_remove_extent(ocfs2_filesys *fs,
 			struct ocfs2_extent_tree *et,
 			uint32_t cpos, uint32_t len);
+
+
 /*
  * Structures which describe a path through a btree, and functions to
  * manipulate them.
diff --git a/libocfs2/inode.c b/libocfs2/inode.c
index 95419f4..fd88dc8 100644
--- a/libocfs2/inode.c
+++ b/libocfs2/inode.c
@@ -139,6 +139,9 @@ static void ocfs2_swap_inode_second(struct ocfs2_dinode *di)
 		sb->s_uuid_hash           = bswap_32(sb->s_uuid_hash);
 		sb->s_first_cluster_group = bswap_64(sb->s_first_cluster_group);
 		sb->s_xattr_inline_size   = bswap_16(sb->s_xattr_inline_size);
+		sb->s_dx_seed[0]          = bswap_32(sb->s_dx_seed[0]);
+		sb->s_dx_seed[1]          = bswap_32(sb->s_dx_seed[1]);
+		sb->s_dx_seed[2]          = bswap_32(sb->s_dx_seed[2]);
 
 	} else if (di->i_flags & OCFS2_LOCAL_ALLOC_FL) {
 		struct ocfs2_local_alloc *la = &di->id2.i_lab;
diff --git a/libocfs2/link.c b/libocfs2/link.c
index c89471d..310c24e 100644
--- a/libocfs2/link.c
+++ b/libocfs2/link.c
@@ -43,10 +43,12 @@ struct link_struct  {
 					      of the block.  This handles
 					      the directory trailer if it
 					      exists */
+	int			blkno;
 	struct ocfs2_dinode	*sb;
 };	
 
 static int link_proc(struct ocfs2_dir_entry *dirent,
+		     uint64_t	blocknr,
 		     int	offset,
 		     int	blocksize,
 		     char	*buf,
@@ -101,6 +103,7 @@ static int link_proc(struct ocfs2_dir_entry *dirent,
 	strncpy(dirent->name, ls->name, ls->namelen);
 	dirent->file_type = ls->flags;
 
+	ls->blkno = blocknr;
 	ls->done++;
 	return OCFS2_DIRENT_ABORT|OCFS2_DIRENT_CHANGED;
 }
@@ -172,6 +175,12 @@ errcode_t ocfs2_link(ocfs2_filesys *fs, uint64_t dir, const char *name,
 			retval = OCFS2_ET_INTERNAL_FAILURE;
 	}
 
+	if (ls.done) {
+		if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) &&
+		    (di->i_dyn_features & OCFS2_INDEXED_DIR_FL))
+			retval = ocfs2_dx_dir_insert_entry(fs, dir, ls.name,
+							ls.inode, ls.blkno);
+	}
 out_free:
 	ocfs2_free(&buf);
 
diff --git a/libocfs2/lookup.c b/libocfs2/lookup.c
index 374113c..dac83c0 100644
--- a/libocfs2/lookup.c
+++ b/libocfs2/lookup.c
@@ -30,6 +30,7 @@
 
 #include <string.h>
 #include <inttypes.h>
+#include <assert.h>
 
 #include "ocfs2/ocfs2.h"
 
@@ -45,6 +46,7 @@ struct lookup_struct  {
  #pragma argsused
 #endif
 static int lookup_proc(struct ocfs2_dir_entry *dirent,
+		       uint64_t	blocknr,
 		       int	offset,
 		       int	blocksize,
 		       char	*buf,
@@ -61,24 +63,81 @@ static int lookup_proc(struct ocfs2_dir_entry *dirent,
 	return OCFS2_DIRENT_ABORT;
 }
 
+static errcode_t ocfs2_find_entry_dx(ocfs2_filesys *fs,
+				struct ocfs2_dinode *di,
+				char *buf,
+				struct lookup_struct *ls)
+{
+	char *dx_root_buf = NULL;
+	struct ocfs2_dx_root_block *dx_root;
+	struct ocfs2_dir_lookup_result lookup;
+	errcode_t ret;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_dx_root(fs, di->i_dx_root, dx_root_buf);
+	if (ret)
+		goto out;
+	dx_root = (struct ocfs2_dx_root_block *)dx_root_buf;
+
+	memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result));
+	ocfs2_dx_dir_name_hash(fs, ls->name,
+			ls->len, &lookup.dl_hinfo);
+
+	ret = ocfs2_dx_dir_search(fs, ls->name, ls->len,
+				dx_root, &lookup);
+	if (ret)
+		goto out;
+
+	*ls->inode = lookup.dl_entry->inode;
+	ls->found++;
+	ret = 0;
+
+out:
+	release_lookup_res(&lookup);
+	if (dx_root_buf)
+		ocfs2_free(&dx_root_buf);
+	return ret;
+}
 
 errcode_t ocfs2_lookup(ocfs2_filesys *fs, uint64_t dir,
                        const char *name, int namelen, char *buf,
                        uint64_t *inode)
 {
-	errcode_t	retval;
+	errcode_t ret;
 	struct lookup_struct ls;
+	char *di_buf = NULL;
+	struct ocfs2_dinode *di;
 
 	ls.name = name;
 	ls.len = namelen;
 	ls.inode = inode;
 	ls.found = 0;
 
-	retval = ocfs2_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls);
-	if (retval)
-		return retval;
+	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+	di = (struct ocfs2_dinode *)di_buf;
+
+	if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) &&
+	    ocfs2_dir_indexed(di)) {
+		ret = ocfs2_find_entry_dx(fs, di, buf, &ls);
+	} else {
+		ret = ocfs2_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls);
+	}
+	if (ret)
+		goto out;
 
-	return (ls.found) ? 0 : OCFS2_ET_FILE_NOT_FOUND;
+	ret = (ls.found) ? 0 : OCFS2_ET_FILE_NOT_FOUND;
+
+out:
+	if(di_buf)
+		ocfs2_free(&di_buf);
+	return ret;
 }
 
 
diff --git a/libocfs2/ocfs2_err.et b/libocfs2/ocfs2_err.et
index 13b03c4..1226681 100644
--- a/libocfs2/ocfs2_err.et
+++ b/libocfs2/ocfs2_err.et
@@ -102,6 +102,9 @@ ec	OCFS2_ET_DIR_CORRUPTED,
 ec	OCFS2_ET_NO_DIRECTORY,
 	"OCFS2 inode is not a directory"
 
+ec	OCFS2_ET_DIRENT_NOT_FOUND,
+	"Directory entry not found"
+
 ec	OCFS2_ET_FILE_NOT_FOUND,
 	"File not found by ocfs2_lookup"
 
@@ -189,6 +192,9 @@ ec	OCFS2_ET_CORRUPT_QUOTA_FILE,
 ec	OCFS2_ET_CANNOT_DETERMINE_SECTOR_SIZE,
 	"Cannot determine sector size"
 
+ec	OCFS2_ET_DX_BALANCE_EMPTY_LEAF,
+	"Trying to rebalance empty leaf for indexed dir"
+
 ec	OCFS2_ET_NONEMTY_QUOTA_HASH,
 	"Freeing non-empty quota hash"
 
diff --git a/libocfs2/truncate.c b/libocfs2/truncate.c
index 1902366..bd3f811 100644
--- a/libocfs2/truncate.c
+++ b/libocfs2/truncate.c
@@ -436,6 +436,22 @@ errcode_t ocfs2_xattr_tree_truncate(ocfs2_filesys *fs,
 					&ctxt, &changed);
 }
 
+
+errcode_t ocfs2_dir_indexed_tree_truncate(ocfs2_filesys *fs,
+					struct ocfs2_dx_root_block *dx_root)
+{
+	struct truncate_ctxt ctxt;
+
+	memset(&ctxt, 0, sizeof (struct truncate_ctxt));
+	ctxt.new_i_clusters = dx_root->dr_clusters;
+	ctxt.new_size_in_clusters = 0;
+
+	return ocfs2_extent_iterate_dx_root(fs, dx_root,
+					OCFS2_EXTENT_FLAG_DEPTH_TRAVERSE,
+		       			NULL, truncate_iterate,	&ctxt);
+}
+
+
 #ifdef DEBUG_EXE
 #include <stdlib.h>
 #include <getopt.h>
diff --git a/libocfs2/unlink.c b/libocfs2/unlink.c
index 286ef59..9ebba38 100644
--- a/libocfs2/unlink.c
+++ b/libocfs2/unlink.c
@@ -30,6 +30,7 @@
 #define _LARGEFILE64_SOURCE
 
 #include <string.h>
+#include <assert.h>
 
 #include "ocfs2/ocfs2.h"
 
@@ -46,6 +47,7 @@ struct link_struct  {
  #pragma argsused
 #endif
 static int unlink_proc(struct ocfs2_dir_entry *dirent,
+		     uint64_t	blocknr,
 		     int	offset,
 		     int	blocksize,
 		     char	*buf,
@@ -66,6 +68,161 @@ static int unlink_proc(struct ocfs2_dir_entry *dirent,
 	return OCFS2_DIRENT_ABORT|OCFS2_DIRENT_CHANGED;
 }
 
+static errcode_t ocfs2_unlink_el(ocfs2_filesys *fs,
+				uint64_t dir,
+				const char *name,
+				uint64_t ino,
+				int flags)
+{
+	errcode_t ret;
+	struct link_struct ls;
+
+	ls.name = name;
+	ls.namelen = name ? strlen(name) : 0;
+	ls.inode = ino;
+	ls.flags = 0;
+	ls.done = 0;
+
+	ret = ocfs2_dir_iterate(fs, dir, 0, 0, unlink_proc, &ls);
+	if (ret)
+		goto out;
+
+	if (!ls.done)
+		ret = OCFS2_ET_DIR_NO_SPACE;
+out:
+	return ret;
+}
+
+static errcode_t __ocfs2_delete_entry(ocfs2_filesys *fs,
+				struct ocfs2_dir_entry *de_del,
+				char *dir_buf)
+{
+	struct ocfs2_dir_entry *de, *pde;
+	int offset = 0;
+	errcode_t ret = 0;
+
+	pde = NULL;
+	de = (struct ocfs2_dir_entry *)dir_buf;
+
+	while( offset < fs->fs_blocksize) {
+		if (!ocfs2_check_dir_entry(fs, de, dir_buf, offset)) {
+			ret = OCFS2_ET_DIR_CORRUPTED;
+			goto out;
+		}
+
+		if (de == de_del) {
+			if (pde)
+				pde->rec_len += de->rec_len;
+			else
+				de->inode = 0;
+
+			goto out;
+		}
+		if (de->rec_len <= 0) {
+			ret = OCFS2_ET_DIR_CORRUPTED;
+			goto out;
+		}
+		pde = de;
+		offset += de->rec_len;
+		de = (struct ocfs2_dir_entry *)((char *)de + de->rec_len);
+	}
+
+out:
+	return ret;
+}
+
+static errcode_t ocfs2_unlink_dx(ocfs2_filesys *fs,
+				uint64_t dir,
+				const char *name,
+				uint64_t ino,
+				int flags)
+{
+	char *di_buf = NULL, *dx_root_buf = NULL;
+	struct ocfs2_dinode *di;
+	struct ocfs2_dx_root_block *dx_root;
+	struct ocfs2_dx_entry_list *entry_list;
+	struct ocfs2_dir_block_trailer *trailer;
+	int write_dx_leaf = 0;
+	int add_to_free_list = 0;
+	int max_rec_len = 0;
+	struct ocfs2_dir_lookup_result lookup;
+	errcode_t ret;
+
+	assert(name);
+
+	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+	di = (struct ocfs2_dinode *)di_buf;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_dx_root(fs, di->i_dx_root, dx_root_buf);
+	if (ret)
+		goto out;
+	dx_root = (struct ocfs2_dx_root_block *)dx_root_buf;
+
+	memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result));
+	ret= ocfs2_dx_dir_search(fs, name, strlen(name), dx_root, &lookup);
+	if (ret)
+		goto out;
+
+	trailer = ocfs2_dir_trailer_from_block(fs, lookup.dl_leaf);
+	if (trailer->db_free_rec_len == 0)
+		add_to_free_list = 1;
+
+	ret = __ocfs2_delete_entry(fs, lookup.dl_entry, lookup.dl_leaf);
+	if (ret)
+		goto out;
+
+	max_rec_len = ocfs2_find_max_rec_len(fs, lookup.dl_leaf);
+	trailer->db_free_rec_len = max_rec_len;
+	if (add_to_free_list) {
+		trailer->db_free_next = dx_root->dr_free_blk;
+		dx_root->dr_free_blk = lookup.dl_leaf_blkno;
+	}
+
+	ret = ocfs2_write_dir_block(fs, di,
+			lookup.dl_leaf_blkno, lookup.dl_leaf);
+	if (ret)
+		goto out;
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
+		entry_list = &dx_root->dr_entries;
+	else {
+		entry_list = &(lookup.dl_dx_leaf->dl_list);
+		write_dx_leaf = 1;
+	}
+
+	ocfs2_dx_list_remove_entry(entry_list,
+				   lookup.dl_dx_entry_idx);
+
+	if (write_dx_leaf) {
+		ret = ocfs2_write_dx_leaf(fs, lookup.dl_dx_leaf_blkno, lookup.dl_dx_leaf);
+		if (ret)
+			goto out;
+	}
+
+	dx_root->dr_num_entries --;
+	ret = ocfs2_write_dx_root(fs, di->i_dx_root, dx_root_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_write_inode(fs, di->i_blkno, di_buf);
+
+out:
+	release_lookup_res(&lookup);
+	if (dx_root_buf)
+		ocfs2_free(&dx_root_buf);
+	if (di_buf)
+		ocfs2_free(&di_buf);
+
+	return ret;
+}
+
 #ifdef __TURBOC__
  #pragma argsused
 #endif
@@ -73,22 +230,30 @@ errcode_t ocfs2_unlink(ocfs2_filesys *fs, uint64_t dir,
 			const char *name, uint64_t ino,
 			int flags)
 {
-	errcode_t	retval;
-	struct link_struct ls;
+	errcode_t ret;
+	char *di_buf = NULL;
+	struct ocfs2_dinode *di;
 
 	if (!(fs->fs_flags & OCFS2_FLAG_RW))
 		return OCFS2_ET_RO_FILESYS;
 
-	ls.name = name;
-	ls.namelen = name ? strlen(name) : 0;
-	ls.inode = ino;
-	ls.flags = 0;
-	ls.done = 0;
+	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
+	if (ret)
+		goto out;
+	ret = ocfs2_read_inode(fs, dir, di_buf);
+	if (ret)
+		goto out;
+	di = (struct ocfs2_dinode *)di_buf;
 
-	retval = ocfs2_dir_iterate(fs, dir, 0, 0, unlink_proc, &ls);
-	if (retval)
-		return retval;
+	if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) &&
+	    (ocfs2_dir_indexed(di)))
+		ret = ocfs2_unlink_dx(fs, dir, name, ino, flags);
+	else
+		ret = ocfs2_unlink_el(fs, dir, name, ino, flags);
 
-	return (ls.done) ? 0 : OCFS2_ET_DIR_NO_SPACE;
+out:
+	if (di_buf)
+		ocfs2_free(&di_buf);
+	return ret;
 }
 
diff --git a/sizetest/sizetest.c b/sizetest/sizetest.c
index e01e800..a68e6c3 100644
--- a/sizetest/sizetest.c
+++ b/sizetest/sizetest.c
@@ -144,7 +144,7 @@ static void print_ocfs2_super_block(void)
 	SHOW_OFFSET(struct ocfs2_super_block, s_cluster_info);
 	SHOW_OFFSET(struct ocfs2_super_block, s_xattr_inline_size);
 	SHOW_OFFSET(struct ocfs2_super_block, s_reserved0);
-	SHOW_OFFSET(struct ocfs2_super_block, s_reserved1);
+	SHOW_OFFSET(struct ocfs2_super_block, s_dx_seed);
 	SHOW_OFFSET(struct ocfs2_super_block, s_reserved2);
 	
         END_TYPE(struct ocfs2_super_block);
-- 
1.7.0.2




More information about the Ocfs2-tools-devel mailing list