[Ocfs2-tools-devel] [PATCH 01/12] dx_dirs v9: Add library support for directory indexing

Coly Li coly.li at suse.de
Mon Mar 8 00:28:14 PST 2010


This patch updates ocs2_fs.h with the relevant directory indexing
structures. Additionally, libocfs2/ is given swap
functions for the new disk structures. The library also gets three new
iteration functions:

ocfs2_dx_entries_iterate() - to iterate all index entries in an inline,
or external index.

ocfs2_dx_frees_iterate() - an iterator for the dirblock free list.

ocfs2_extent_iterate_dx_root() - iterate the extent blocks of an index
tree.

Caveats:

Right now, this is all read-only. A major 'TODO' item is adding the
appropriate API's to enable creation, deletion and
various manipulation of the dx tree, as well as individual items.

None of the other library code really knows about the directory index.
This means that things like ocfs2_looup() is
still using the old-style lookup via the unindexed dirent tree.

We need to add support for the newly increased links_count maximum. This
should probably be a seperate patch though.

Signed-off-by: Mark Fasheh <mfasheh at suse.com>
Signed-off-by: Coly Li <coly.li at suse.de>
---
 include/ocfs2-kernel/ocfs2_fs.h |  129 +++++++++++++++++++++++++++++++-
 include/ocfs2/ocfs2.h           |   46 ++++++++++++
 libocfs2/dir_iterate.c          |  155 +++++++++++++++++++++++++++++++++++++++
 libocfs2/dirblock.c             |  140 +++++++++++++++++++++++++++++++++++-
 libocfs2/extents.c              |   89 ++++++++++++++++++++++
 libocfs2/feature_string.c       |   12 +++-
 libocfs2/inode.c                |    2 +
 sizetest/sizes.txt              |    2 +-
 sizetest/sizetest.c             |    3 +-
 9 files changed, 570 insertions(+), 8 deletions(-)

diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h
index c0638fc..b62a017 100644
--- a/include/ocfs2-kernel/ocfs2_fs.h
+++ b/include/ocfs2-kernel/ocfs2_fs.h
@@ -67,6 +67,8 @@
 #define OCFS2_XATTR_BLOCK_SIGNATURE	"XATTR01"
 #define OCFS2_DIR_TRAILER_SIGNATURE	"DIRTRL1"
 #define OCFS2_REFCOUNT_BLOCK_SIGNATURE	"REFCNT1"
+#define OCFS2_DX_ROOT_SIGNATURE		"DXDIR01"
+#define OCFS2_DX_LEAF_SIGNATURE		"DXLEAF1"

 /* Compatibility flags */
 #define OCFS2_HAS_COMPAT_FEATURE(sb,mask)			\
@@ -97,7 +99,8 @@
 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
 					 | OCFS2_FEATURE_INCOMPAT_META_ECC \
 					 | OCFS2_FEATURE_INCOMPAT_XATTR \
-					 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
+					 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
+					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
 					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
 					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -153,6 +156,9 @@
 /* Support for extended attributes */
 #define OCFS2_FEATURE_INCOMPAT_XATTR		0x0200

+/* Support for indexed directores */
+#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS	0x0400
+
 /* Metadata checksum and error correction */
 #define OCFS2_FEATURE_INCOMPAT_META_ECC		0x0800

@@ -432,8 +438,11 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
 #define OCFS2_DIR_REC_LEN(name_len)	(((name_len) + OCFS2_DIR_MEMBER_LEN + \
                                           OCFS2_DIR_ROUND) & \
 					 ~OCFS2_DIR_ROUND)
+#define OCFS2_DIR_MIN_REC_LEN	OCFS2_DIR_REC_LEN(1)

 #define OCFS2_LINK_MAX		32000
+#define	OCFS2_DX_LINK_MAX	((1U << 31) - 1U)
+#define	OCFS2_LINKS_HI_SHIFT	16

 #define S_SHIFT			12
 static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -649,8 +658,9 @@ struct ocfs2_super_block {
 /*B8*/	__le16 s_xattr_inline_size;	/* extended attribute inline size
 					   for this fs*/
 	__le16 s_reserved0;
-	__le32 s_reserved1;
-/*C0*/  __le64 s_reserved2[16];		/* Fill out superblock */
+	__le32 s_dx_seed[3];		/* seed[0-2] for dx dir hash.
+					 * s_uuid_hash serves as seed[3]. */
+/*C0*/  __le64 s_reserved2[15];		/* Fill out superblock */
 /*140*/

 	/*
@@ -700,7 +710,7 @@ struct ocfs2_dinode {
 					   belongs to */
 	__le16 i_suballoc_bit;		/* Bit offset in suballocator
 					   block group */
-/*10*/	__le16 i_reserved0;
+/*10*/	__le16 i_links_count_hi;	/* High 16 bits of links count */
 	__le16 i_xattr_inline_size;
 	__le32 i_clusters;		/* Cluster count */
 	__le32 i_uid;			/* Owner UID */
@@ -804,6 +814,87 @@ struct ocfs2_dir_block_trailer {
 /*40*/
 };

+ /*
+ * A directory entry in the indexed tree. We don't store the full name here,
+ * but instead provide a pointer to the full dirent in the unindexed tree.
+ *
+ * We also store name_len here so as to reduce the number of leaf blocks we
+ * need to search in case of collisions.
+ */
+struct ocfs2_dx_entry {
+	__le32		dx_major_hash;	/* Used to find logical
+					 * cluster in index */
+	__le32		dx_minor_hash;	/* Lower bits used to find
+					 * block in cluster */
+	__le64		dx_dirent_blk;	/* Physical block in unindexed
+					 * tree holding this dirent. */
+};
+
+struct ocfs2_dx_entry_list {
+	__le32		de_reserved;
+	__le16		de_count;	/* Maximum number of entries
+					 * possible in de_entries */
+	__le16		de_num_used;	/* Current number of
+					 * de_entries entries */
+	struct	ocfs2_dx_entry		de_entries[0];	/* Indexed dir entries
+							 * in a packed array of
+							 * length de_num_used */
+};
+
+#define OCFS2_DX_FLAG_INLINE	0x01
+
+/*
+ * A directory indexing block. Each indexed directory has one of these,
+ * pointed to by ocfs2_dinode.
+ *
+ * This block stores an indexed btree root, and a set of free space
+ * start-of-list pointers.
+ */
+struct ocfs2_dx_root_block {
+	__u8		dr_signature[8];	/* Signature for verification */
+	struct ocfs2_block_check dr_check;	/* Error checking */
+	__le16		dr_suballoc_slot;	/* Slot suballocator this
+						 * block belongs to. */
+	__le16		dr_suballoc_bit;	/* Bit offset in suballocator
+						 * block group */
+	__le32		dr_fs_generation;	/* Must match super block */
+	__le64		dr_blkno;		/* Offset on disk, in blocks */
+	__le64		dr_last_eb_blk;		/* Pointer to last
+						 * extent block */
+	__le32		dr_clusters;		/* Clusters allocated
+						 * to the indexed tree. */
+	__u8		dr_flags;		/* OCFS2_DX_FLAG_* flags */
+	__u8		dr_reserved0;
+	__le16		dr_reserved1;
+	__le64		dr_dir_blkno;		/* Pointer to parent inode */
+	__le64		dr_reserved2;
+	__le64		dr_free_blk;		/* Pointer to head of free
+						 * unindexed block list. */
+	__le64		dr_reserved3[15];
+	union {
+		struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
+						   * bits for maximum space
+						   * efficiency. */
+		struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
+							* entries. We grow out
+							* to extents if this
+							* gets too big. */
+	};
+};
+
+/*
+ * The header of a leaf block in the indexed tree.
+ */
+struct ocfs2_dx_leaf {
+	__u8		dl_signature[8];/* Signature for verification */
+	struct ocfs2_block_check dl_check;	/* Error checking */
+	__le64		dl_blkno;	/* Offset on disk, in blocks */
+	__le32		dl_fs_generation;/* Must match super block */
+	__le32		dl_reserved0;
+	__le64		dl_reserved1;
+	struct ocfs2_dx_entry_list	dl_list;
+};
+
 /*
  * On disk allocator group structure for OCFS2
  */
@@ -1190,6 +1281,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr(
 	return size / sizeof(struct ocfs2_extent_rec);
 }

+static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_dx_root_block, dr_list.l_recs);
+
+	return size / sizeof(struct ocfs2_extent_rec);
+}
+
 static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
 {
 	int size;
@@ -1210,6 +1311,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
 	return size / sizeof(struct ocfs2_extent_rec);
 }

+static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_dx_leaf, dl_list.de_entries);
+
+	return size / sizeof(struct ocfs2_dx_entry);
+}
+
+static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
+
+	return size / sizeof(struct ocfs2_dx_entry);
+}
+
 static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
 {
 	u16 size;
diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
index b9da66c..f5c2638 100644
--- a/include/ocfs2/ocfs2.h
+++ b/include/ocfs2/ocfs2.h
@@ -471,6 +471,11 @@ int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 			   struct ocfs2_dir_entry *de, unsigned long offset);
 void ocfs2_init_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di,
 			    uint64_t blkno, void *buf);
+errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block,
+			     void *buf);
+errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block,
+			     void *buf);
+int ocfs2_dir_indexed(struct ocfs2_dinode *di);

 errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs,
 			     uint64_t dir,
@@ -495,6 +500,27 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs,
 					       void	*priv_data),
 				   void *priv_data);

+extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs,
+			struct ocfs2_dinode *dir,
+			int flags,
+			int (*func)(ocfs2_filesys *fs,
+				    struct ocfs2_dx_entry_list *entry_list,
+				    struct ocfs2_dx_root_block *dx_root,
+				    struct ocfs2_dx_leaf *dx_leaf,
+				    void *priv_data),
+			void *priv_data);
+
+extern errcode_t ocfs2_dx_frees_iterate(ocfs2_filesys *fs,
+			struct ocfs2_dinode *dir,
+			struct ocfs2_dx_root_block *dx_root,
+			int flags,
+			int (*func)(ocfs2_filesys *fs,
+				    uint64_t blkno,
+				    struct ocfs2_dir_block_trailer *trailer,
+				    char *dirblock,
+				    void *priv_data),
+			void *priv_data);
+
 errcode_t ocfs2_lookup(ocfs2_filesys *fs, uint64_t dir,
 		       const char *name, int namelen, char *buf,
 		       uint64_t *inode);
@@ -1216,6 +1242,13 @@ static inline int ocfs2_support_xattr(struct ocfs2_super_block *osb)
 	return 0;
 }

+static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super_block *osb)
+{
+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
+		return 1;
+	return 0;
+}
+
 /*
  * When we're swapping some of our disk structures, a garbage count
  * can send us past the edge of a block buffer.  This function guards
@@ -1344,6 +1377,19 @@ errcode_t ocfs2_extent_iterate_inode(ocfs2_filesys *fs,
 					         int ref_recno,
 					         void *priv_data),
 					         void *priv_data);
+errcode_t ocfs2_extent_iterate_dx_root(ocfs2_filesys *fs,
+				       struct ocfs2_dx_root_block *dx_root,
+				       int flags,
+				       char *block_buf,
+				       int (*func)(ocfs2_filesys *fs,
+						   struct ocfs2_extent_rec *rec,
+						   int tree_depth,
+						   uint32_t ccount,
+						   uint64_t ref_blkno,
+						   int ref_recno,
+						   void *priv_data),
+				       void *priv_data);
+
 errcode_t ocfs2_block_iterate(ocfs2_filesys *fs,
 			      uint64_t blkno,
 			      int flags,
diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c
index 00c8d16..1064d9f 100644
--- a/libocfs2/dir_iterate.c
+++ b/libocfs2/dir_iterate.c
@@ -307,6 +307,161 @@ int ocfs2_process_dir_block(ocfs2_filesys *fs,
 	return 0;
 }

+struct dx_iterator_data {
+	int (*dx_func)(ocfs2_filesys *fs,
+		       struct ocfs2_dx_entry_list *entry_list,
+		       struct ocfs2_dx_root_block *dx_root,
+		       struct ocfs2_dx_leaf *dx_leaf,
+		       void *priv_data);
+	void *dx_priv_data;
+	char *leaf_buf;
+	struct ocfs2_dx_root_block *dx_root;
+};
+
+static int dx_iterator(ocfs2_filesys *fs,
+		       struct ocfs2_extent_rec *rec,
+		       int tree_depth,
+		       uint32_t ccount,
+		       uint64_t ref_blkno,
+		       int ref_recno,
+		       void *priv_data)
+{
+	int ret, i;
+	struct ocfs2_dx_leaf *dx_leaf;
+	struct dx_iterator_data *iter = priv_data;
+	uint64_t blkno, count;
+
+	count = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters);
+
+	blkno = rec->e_blkno;
+	for (i = 0; i < count; i++) {
+		ret = ocfs2_read_dx_leaf(fs, blkno, iter->leaf_buf);
+		if (ret)
+			return ret;
+
+		dx_leaf = (struct ocfs2_dx_leaf *)iter->leaf_buf;
+		iter->dx_func(fs, &dx_leaf->dl_list, iter->dx_root, dx_leaf,
+			      iter->dx_priv_data);
+
+		blkno++;
+	}
+
+	return 0;
+}
+
+extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs,
+			struct ocfs2_dinode *dir,
+			int flags,
+			int (*func)(ocfs2_filesys *fs,
+				    struct ocfs2_dx_entry_list *entry_list,
+				    struct ocfs2_dx_root_block *dx_root,
+				    struct ocfs2_dx_leaf *dx_leaf,
+				    void *priv_data),
+			void *priv_data)
+{
+	errcode_t ret = 0;
+	struct ocfs2_dx_root_block *dx_root;
+	uint64_t dx_blkno;
+	char *buf = NULL, *eb_buf = NULL, *leaf_buf = NULL;
+	struct dx_iterator_data data;
+
+	if (!S_ISDIR(dir->i_mode) && !ocfs2_dir_indexed(dir)) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	dx_blkno = (uint64_t) dir->i_dx_root;
+
+	ret = ocfs2_read_dx_root(fs, dx_blkno, buf);
+	if (ret)
+		goto out;
+
+	dx_root = (struct ocfs2_dx_root_block *)buf;
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) {
+		func(fs, &dx_root->dr_entries, dx_root, NULL, priv_data);
+		ret = 0;
+		goto out;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &eb_buf);
+	if (ret)
+		goto out;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &leaf_buf);
+	if (ret)
+		goto out;
+
+	data.dx_func = func;
+	data.dx_priv_data = priv_data;
+	data.leaf_buf = leaf_buf;
+	data.dx_root = dx_root;
+	ret = ocfs2_extent_iterate_dx_root(fs, dx_root,
+					   OCFS2_EXTENT_FLAG_DATA_ONLY, eb_buf,
+					   dx_iterator, &data);
+
+out:
+	if (buf)
+		ocfs2_free(&buf);
+	if (eb_buf)
+		ocfs2_free(&eb_buf);
+	if (leaf_buf)
+		ocfs2_free(&leaf_buf);
+	return ret;
+}
+
+extern errcode_t ocfs2_dx_frees_iterate(ocfs2_filesys *fs,
+			struct ocfs2_dinode *dir,
+			struct ocfs2_dx_root_block *dx_root,
+			int flags,
+			int (*func)(ocfs2_filesys *fs,
+				    uint64_t blkno,
+				    struct ocfs2_dir_block_trailer *trailer,
+				    char *dirblock,
+				    void *priv_data),
+			void *priv_data)
+{
+	errcode_t ret = 0;
+	uint64_t blkno;
+	char *buf = NULL;
+	struct ocfs2_dir_block_trailer *trailer;
+
+	if (!S_ISDIR(dir->i_mode) || !(ocfs2_dir_indexed(dir))) {
+		ret = 0;
+		goto out;
+	}
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out;
+
+	blkno = dx_root->dr_free_blk;
+	while (blkno) {
+		ret = ocfs2_read_dir_block(fs, dir, blkno, buf);
+		if (ret)
+			goto out;
+
+		trailer = ocfs2_dir_trailer_from_block(fs, buf);
+
+		func(fs, blkno, trailer, buf, priv_data);
+
+		blkno = trailer->db_free_next;
+	}
+
+out:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}

 #ifdef DEBUG_EXE
 #include <stdlib.h>
diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c
index fec8ffc..d68e5c0 100644
--- a/libocfs2/dirblock.c
+++ b/libocfs2/dirblock.c
@@ -54,12 +54,17 @@ int ocfs2_dir_has_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di)
 	    (di->i_dyn_features & OCFS2_INLINE_DATA_FL))
 		return 0;

+	if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) &&
+	    di->i_dyn_features & OCFS2_INDEXED_DIR_FL)
+		return 1;
+
 	return ocfs2_meta_ecc(OCFS2_RAW_SB(fs->fs_super));
 }

 int ocfs2_supports_dir_trailer(ocfs2_filesys *fs)
 {
-	return ocfs2_meta_ecc(OCFS2_RAW_SB(fs->fs_super));
+	return ocfs2_meta_ecc(OCFS2_RAW_SB(fs->fs_super)) ||
+		ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super));
 }

 int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di,
@@ -148,6 +153,8 @@ void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer)
 	bswap_64(trailer->db_compat_rec_len);
 	bswap_64(trailer->db_blkno);
 	bswap_64(trailer->db_parent_dinode);
+	bswap_16(trailer->db_free_rec_len);
+	bswap_64(trailer->db_free_next);
 }

 errcode_t ocfs2_read_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di,
@@ -222,3 +229,134 @@ out:
 	ocfs2_free(&buf);
 	return retval;
 }
+
+static void ocfs2_swap_dx_entry_to_cpu(struct ocfs2_dx_entry *dx_entry)
+{
+	if (cpu_is_little_endian)
+		return;
+
+	dx_entry->dx_major_hash		= bswap_32(dx_entry->dx_major_hash);
+	dx_entry->dx_minor_hash		= bswap_32(dx_entry->dx_minor_hash);
+	dx_entry->dx_dirent_blk		= bswap_64(dx_entry->dx_dirent_blk);
+}
+
+static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list)
+{
+	int i;
+
+	if (cpu_is_little_endian)
+		return;
+
+	dl_list->de_count	= bswap_16(dl_list->de_count);
+	dl_list->de_num_used	= bswap_16(dl_list->de_num_used);
+
+	for (i = 0; i < dl_list->de_count; i++)
+		ocfs2_swap_dx_entry_to_cpu(&dl_list->de_entries[i]);
+}
+
+static void ocfs2_swap_dx_root_to_cpu(struct ocfs2_dx_root_block *dx_root)
+{
+	if (cpu_is_little_endian)
+		return;
+
+	dx_root->dr_suballoc_slot	= bswap_16(dx_root->dr_suballoc_slot);
+	dx_root->dr_suballoc_bit	= bswap_16(dx_root->dr_suballoc_bit);
+	dx_root->dr_fs_generation	= bswap_32(dx_root->dr_fs_generation);
+	dx_root->dr_blkno		= bswap_64(dx_root->dr_blkno);
+	dx_root->dr_last_eb_blk		= bswap_64(dx_root->dr_last_eb_blk);
+	dx_root->dr_clusters		= bswap_32(dx_root->dr_clusters);
+	dx_root->dr_dir_blkno		= bswap_64(dx_root->dr_dir_blkno);
+	dx_root->dr_free_blk		= bswap_64(dx_root->dr_free_blk);
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
+		ocfs2_swap_dx_entry_list_to_cpu(&dx_root->dr_entries);
+	else
+		ocfs2_swap_extent_list_to_cpu(&dx_root->dr_list);
+}
+
+errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block,
+			     void *buf)
+{
+	errcode_t	ret;
+	struct ocfs2_dx_root_block *dx_root;
+
+	ret = ocfs2_read_blocks(fs, block, 1, buf);
+	if (ret)
+		return ret;
+
+	dx_root = (struct ocfs2_dx_root_block *)buf;
+	ret = ocfs2_validate_meta_ecc(fs, buf, &dx_root->dr_check);
+	if (ret)
+		return ret;
+
+	if (memcmp(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE,
+		   strlen(OCFS2_DX_ROOT_SIGNATURE)))
+		return OCFS2_ET_DIR_CORRUPTED;
+
+	ocfs2_swap_dx_root_to_cpu(dx_root);
+
+	return 0;
+}
+
+static void ocfs2_swap_dx_leaf_to_cpu(struct ocfs2_dx_leaf *dx_leaf)
+{
+	if (cpu_is_little_endian)
+		return;
+
+	dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno);
+	dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation);
+
+	ocfs2_swap_dx_entry_list_to_cpu(&dx_leaf->dl_list);
+}
+
+errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block,
+			     void *buf)
+{
+	errcode_t	ret;
+	struct ocfs2_dx_leaf *dx_leaf;
+
+	ret = ocfs2_read_blocks(fs, block, 1, buf);
+	if (ret)
+		return ret;
+
+	dx_leaf = (struct ocfs2_dx_leaf *)buf;
+	ret = ocfs2_validate_meta_ecc(fs, buf, &dx_leaf->dl_check);
+	if (ret)
+		return ret;
+
+	if (memcmp(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE,
+		   strlen(OCFS2_DX_LEAF_SIGNATURE)))
+		return OCFS2_ET_DIR_CORRUPTED;
+
+	ocfs2_swap_dx_leaf_to_cpu(dx_leaf);
+
+	return 0;
+}
+
+int ocfs2_dir_indexed(struct ocfs2_dinode *di)
+{
+	if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL)
+		return 1;
+	return 0;
+}
+
+/*
+ * Only use this when we already know the directory is indexed.
+ */
+int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off)
+{
+	if (de_off == ocfs2_dir_trailer_blk_off(fs))
+		return 1;
+
+	return 0;
+}
+
+int ocfs2_is_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di,
+			 unsigned long de_off)
+{
+	if (ocfs2_dir_has_trailer(fs, di)) {
+		return __ocfs2_is_dir_trailer(fs, de_off);
+	}
+
+	return 0;
+}
diff --git a/libocfs2/extents.c b/libocfs2/extents.c
index ee7ef93..8c322b1 100644
--- a/libocfs2/extents.c
+++ b/libocfs2/extents.c
@@ -550,6 +550,95 @@ out:
 	return ret;
 }

+errcode_t ocfs2_extent_iterate_dx_root(ocfs2_filesys *fs,
+				       struct ocfs2_dx_root_block *dx_root,
+				       int flags,
+				       char *block_buf,
+				       int (*func)(ocfs2_filesys *fs,
+						   struct ocfs2_extent_rec *rec,
+						   int tree_depth,
+						   uint32_t ccount,
+						   uint64_t ref_blkno,
+						   int ref_recno,
+						   void *priv_data),
+				       void *priv_data)
+{
+	int i;
+	int iret = 0;
+	struct ocfs2_extent_list *el;
+	errcode_t ret;
+	struct extent_context ctxt;
+
+	if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)
+		return OCFS2_ET_INODE_CANNOT_BE_ITERATED;
+
+	el = &dx_root->dr_list;
+	if (el->l_tree_depth) {
+		ret = ocfs2_malloc0(sizeof(char *) * el->l_tree_depth,
+				    &ctxt.eb_bufs);
+		if (ret)
+			goto out;
+
+		if (block_buf) {
+			ctxt.eb_bufs[0] = block_buf;
+		} else {
+			ret = ocfs2_malloc0(fs->fs_blocksize *
+					    el->l_tree_depth,
+					    &ctxt.eb_bufs[0]);
+			if (ret)
+				goto out_eb_bufs;
+		}
+
+		for (i = 1; i < el->l_tree_depth; i++) {
+			ctxt.eb_bufs[i] = ctxt.eb_bufs[0] +
+				i * fs->fs_blocksize;
+		}
+	}
+	else
+		ctxt.eb_bufs = NULL;
+
+	ctxt.fs = fs;
+	ctxt.func = func;
+	ctxt.priv_data = priv_data;
+	ctxt.flags = flags;
+	ctxt.ccount = 0;
+	ctxt.last_eb_blkno = 0;
+	ctxt.last_eb_cpos = 0;
+
+	ret = 0;
+	iret |= extent_iterate_el(el, 0, &ctxt);
+	if (iret & OCFS2_EXTENT_ERROR)
+		ret = ctxt.errcode;
+
+	if (iret & OCFS2_EXTENT_ABORT)
+		goto out_abort;
+
+	/* we can only trust ctxt.last_eb_blkno if we walked the whole tree */
+	if (dx_root->dr_last_eb_blk != ctxt.last_eb_blkno) {
+		dx_root->dr_last_eb_blk = ctxt.last_eb_blkno;
+		iret |= OCFS2_EXTENT_CHANGED;
+	}
+
+out_abort:
+#if 0
+	/*
+	 * This block needs to be fixed up for write support.
+	 */
+	if (!ret && (iret & OCFS2_EXTENT_CHANGED))
+		ret = ocfs2_write_inode(fs, inode->i_blkno, (char *)inode);
+#endif
+
+out_eb_bufs:
+	if (ctxt.eb_bufs) {
+		if (!block_buf && ctxt.eb_bufs[0])
+			ocfs2_free(&ctxt.eb_bufs[0]);
+		ocfs2_free(&ctxt.eb_bufs);
+	}
+
+out:
+	return ret;
+}
+
 errcode_t ocfs2_extent_iterate(ocfs2_filesys *fs,
 			       uint64_t blkno,
 			       int flags,
diff --git a/libocfs2/feature_string.c b/libocfs2/feature_string.c
index 79d5712..e90f864 100644
--- a/libocfs2/feature_string.c
+++ b/libocfs2/feature_string.c
@@ -88,7 +88,8 @@ static ocfs2_fs_options feature_level_defaults[] = {
 	 OCFS2_FEATURE_INCOMPAT_INLINE_DATA |
 	 OCFS2_FEATURE_INCOMPAT_META_ECC |
 	 OCFS2_FEATURE_INCOMPAT_XATTR |
-	 OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE,
+	 OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE |
+	 OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS,
 	 OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
 	 OCFS2_FEATURE_RO_COMPAT_USRQUOTA |
 	 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }, /* OCFS2_FEATURE_LEVEL_MAX_FEATURES */
@@ -161,6 +162,11 @@ static struct fs_feature_flags ocfs2_supported_features[] = {
 		{0, OCFS2_FEATURE_INCOMPAT_XATTR, 0},
 		{0, OCFS2_FEATURE_INCOMPAT_XATTR, 0},
 	},
+ 	{
+		"indexed-dirs",
+		{0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0},
+		{0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0},
+	},
 	{
 		"usrquota",
 		{0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA},
@@ -243,6 +249,10 @@ static struct feature_name ocfs2_feature_names[] = {
 		.fn_name = "xattr",
 		.fn_flag = {0, OCFS2_FEATURE_INCOMPAT_XATTR, 0},
 	},
+ 	{
+		.fn_name = "IndexedDirs",
+		.fn_flag = {0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0},
+	},
 	{
 		.fn_name = "usrquota",
 		.fn_flag = {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA},
diff --git a/libocfs2/inode.c b/libocfs2/inode.c
index e9ce579..95419f4 100644
--- a/libocfs2/inode.c
+++ b/libocfs2/inode.c
@@ -163,6 +163,8 @@ static void ocfs2_swap_inode_second(struct ocfs2_dinode *di)
 		struct ocfs2_inline_data *id = &di->id2.i_data;

 		id->id_count = bswap_16(id->id_count);
+	} else if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) {
+		di->i_dx_root = bswap_64(di->i_dx_root);
 	}
 }

diff --git a/sizetest/sizes.txt b/sizetest/sizes.txt
index d03b67c..1fb27a9 100644
--- a/sizetest/sizes.txt
+++ b/sizetest/sizes.txt
@@ -110,7 +110,7 @@
 0x0C0	id2.i_chain         	+0x10
 0x0C0	id2.i_list          	+0x10
 0x0C0	id2.i_symlink       	+0x00
-	Total               	0x200
+	Total               	0x208

 [off]	struct ocfs2_dir_entry	[size]
 0x000	inode               	+0x08
diff --git a/sizetest/sizetest.c b/sizetest/sizetest.c
index bdb06a2..e01e800 100644
--- a/sizetest/sizetest.c
+++ b/sizetest/sizetest.c
@@ -173,7 +173,7 @@ static void print_ocfs2_dinode(void)
 	SHOW_OFFSET(struct ocfs2_dinode, i_generation);
 	SHOW_OFFSET(struct ocfs2_dinode, i_suballoc_slot);
 	SHOW_OFFSET(struct ocfs2_dinode, i_suballoc_bit);
-	SHOW_OFFSET(struct ocfs2_dinode, i_reserved0);
+	SHOW_OFFSET(struct ocfs2_dinode, i_links_count_hi);
 	SHOW_OFFSET(struct ocfs2_dinode, i_xattr_inline_size);
 	SHOW_OFFSET(struct ocfs2_dinode, i_clusters);
 	SHOW_OFFSET(struct ocfs2_dinode, i_uid);
@@ -197,6 +197,7 @@ static void print_ocfs2_dinode(void)
 	SHOW_OFFSET(struct ocfs2_dinode, i_dyn_features);
 	SHOW_OFFSET(struct ocfs2_dinode, i_xattr_loc);
 	SHOW_OFFSET(struct ocfs2_dinode, i_check);
+	SHOW_OFFSET(struct ocfs2_dinode, i_dx_root);
 	SHOW_OFFSET(struct ocfs2_dinode, i_reserved2);

 	SHOW_OFFSET(struct ocfs2_dinode, id1.i_pad1);
-- 
Coly Li
SuSE Labs



More information about the Ocfs2-tools-devel mailing list