[Ocfs-tools-commits] khackel commits r59 - in trunk/ocfs2: . format format/inc

Sun Jun 13 20:33:53 CDT 2004

Author: khackel
Date: 2004-06-13 19:33:51 -0500 (Sun, 13 Jun 2004)
New Revision: 59

Added:
   trunk/ocfs2/Makefile
   trunk/ocfs2/format/
   trunk/ocfs2/format/Makefile
   trunk/ocfs2/format/inc/
   trunk/ocfs2/format/inc/jfs_compat.h
   trunk/ocfs2/format/inc/kernel-jbd.h
   trunk/ocfs2/format/inc/kernel-list.h
   trunk/ocfs2/format/inc/ocfs1_fs_compat.h
   trunk/ocfs2/format/inc/ocfs2_fs.h
   trunk/ocfs2/format/mkfs2.c
Log:
well someone will hate all this, but i had to put something in :)

Added: trunk/ocfs2/Makefile
===================================================================

--- trunk/ocfs2/Makefile	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/Makefile	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,7 @@
+TOPDIR = ..
+
+include $(TOPDIR)/Preamble.make
+
+SUBDIRS = libocfs debugocfs format fsck bugfix
+
+include $(TOPDIR)/Postamble.make

Added: trunk/ocfs2/format/Makefile
===================================================================
--- trunk/ocfs2/format/Makefile	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/Makefile	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,67 @@
+TOPDIR = ../..
+
+include $(TOPDIR)/Preamble.make
+
+WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
+           -Wmissing-declarations
+
+ifdef OCFS_DEBUG
+OPTS = -g
+endif
+
+CFLAGS = $(OPTS) -fno-strict-aliasing $(WARNINGS) 
+
+SBIN_PROGRAMS = mkfs.ocfs2
+
+INCLUDES = -Iinc
+DEFINES = -DLINUX -DUSERSPACE_TOOL -DFORMAT_OCFS
+
+OPTIMIZE = -O2
+
+ifeq ($(OCFS_PROCESSOR),x86_64)
+  CFLAGS += -m64
+endif
+ifeq ($(OCFS_PROCESSOR),ia64)
+endif
+ifeq ($(OCFS_PROCESSOR),i686)
+  DEFINES += -D__ILP32__
+endif
+
+CFLAGS += $(OPTIMIZE)
+
+VERSION_FILES = mkfs2.c inc/jfs_compat.h inc/kernel-jbd.h inc/kernel-list.h
+VERSION_SRC = mkfs2.c
+VERSION_PREFIX = OCFS2
+
+DIST_RULES = dist-incdir
+
+#MANS = mkfs.ocfs2.8
+
+INSTALL_RULES = install-sbin-links install-man-links
+
+##########################
+# WARNING!!!
+# are we still going to do argv[0] tricks?
+##########################
+
+install-sbin-links: install-sbin-programs
+	cd $(DESTDIR)$(sbindir) \
+	&& rm -f mkfs.ocfs2 resizeocfs \
+	&& $(LN_S) mkfs.ocfs mkfs.ocfs2 \
+	&& $(LN_S) tuneocfs resizeocfs
+
+#install-man-links: install-mans
+#	cd $(DESTDIR)$(mandir)/man8 \
+#	&& rm -f mkfs.ocfs.8 resizeocfs.8 \
+#	&& $(LN_S) mkfs.ocfs.8 mkfs.ocfs2.8 \
+#	&& $(LN_S) tuneocfs.8 resizeocfs.8
+
+DIST_FILES = $(VERSION_FILES) $(VERSION_SRC) #mkfs.ocfs2.8.in
+
+mkfs.ocfs2: mkfs2.o
+	$(LINK) 
+
+dist-incdir:
+	$(TOPDIR)/mkinstalldirs $(DIST_DIR)/inc
+
+include $(TOPDIR)/Postamble.make

Added: trunk/ocfs2/format/inc/jfs_compat.h
===================================================================
--- trunk/ocfs2/format/inc/jfs_compat.h	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/jfs_compat.h	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,69 @@
+
+#ifndef _JFS_COMPAT_H
+#define _JFS_COMPAT_H
+
+#include "kernel-list.h"
+#include <errno.h>
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif
+
+// libocfs.h has these
+//#define printk printf
+//#define KERN_ERR ""
+#define KERN_DEBUG ""
+#define KERN_EMERG ""
+
+#define READ 0
+#define WRITE 1
+
+#define cpu_to_be32(n) htonl(n)
+#define be32_to_cpu(n) ntohl(n)
+
+typedef unsigned int tid_t;
+typedef struct journal_s journal_t;
+
+struct buffer_head;
+struct inode;
+
+struct journal_s
+{
+	unsigned long		j_flags;
+	int			j_errno;
+	struct buffer_head *	j_sb_buffer;
+	struct journal_superblock_s *j_superblock;
+	int			j_format_version;
+	unsigned long		j_head;
+	unsigned long		j_tail;
+	unsigned long		j_free;
+	unsigned long		j_first, j_last;
+	kdev_t			j_dev;
+	kdev_t			j_fs_dev;
+	int			j_blocksize;
+	unsigned int		j_blk_offset;
+	unsigned int		j_maxlen;
+	struct inode *		j_inode;
+	tid_t			j_tail_sequence;
+	tid_t			j_transaction_sequence;
+	__u8			j_uuid[16];
+	struct jbd_revoke_table_s *j_revoke;
+};
+
+#define J_ASSERT(assert)						\
+	do { if (!(assert)) {						\
+		printf ("Assertion failure in %s() at %s line %d: "	\
+			"\"%s\"\n",					\
+			__FUNCTION__, __FILE__, __LINE__, # assert);	\
+		fatal_error(e2fsck_global_ctx, 0);			\
+	} } while (0)
+
+#define is_journal_abort(x) 0
+
+#define BUFFER_TRACE(bh, info)	do {} while (0)
+
+/* Need this so we can compile with configure --enable-gcc-wall */
+#ifdef NO_INLINE_FUNCS
+#define inline
+#endif
+
+#endif /* _JFS_COMPAT_H */

Added: trunk/ocfs2/format/inc/kernel-jbd.h
===================================================================
--- trunk/ocfs2/format/inc/kernel-jbd.h	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/kernel-jbd.h	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,910 @@
+/*
+ * linux/include/linux/jbd.h
+ * 
+ * Written by Stephen C. Tweedie <sct at redhat.com>
+ *
+ * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Definitions for transaction data structures for the buffer cache
+ * filesystem journaling support.
+ */
+
+#ifndef _LINUX_JBD_H
+#define _LINUX_JBD_H
+
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || !defined(__KERNEL__)
+
+/* Allow this file to be included directly into e2fsprogs */
+#ifndef __KERNEL__
+#include "jfs_compat.h"
+#define JFS_DEBUG
+#define jfs_debug jbd_debug
+#else
+
+#include <linux/journal-head.h>
+#include <linux/stddef.h>
+#include <asm/semaphore.h>
+#endif
+
+#ifndef __GNUC__
+#define __FUNCTION__ ""
+#endif
+
+#define journal_oom_retry 1
+
+#ifdef __STDC__
+#ifdef CONFIG_JBD_DEBUG
+/*
+ * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
+ * consistency checks.  By default we don't do this unless
+ * CONFIG_JBD_DEBUG is on.
+ */
+#define JBD_EXPENSIVE_CHECKING
+extern int journal_enable_debug;
+
+#define jbd_debug(n, f, a...)						\
+	do {								\
+		if ((n) <= journal_enable_debug) {			\
+			printk (KERN_DEBUG "(%s, %d): %s: ",		\
+				__FILE__, __LINE__, __FUNCTION__);	\
+		  	printk (f, ## a);				\
+		}							\
+	} while (0)
+#else
+#ifdef __GNUC__
+#define jbd_debug(f, a...)	/**/
+#else
+#define jbd_debug(f, ...)	/**/
+#endif	
+#endif
+#else
+#define jbd_debug(x)		/* AIX doesn't do STDC */
+#endif
+
+extern void * __jbd_kmalloc (char *where, size_t size, int flags, int retry);
+#define jbd_kmalloc(size, flags) \
+	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
+#define jbd_rep_kmalloc(size, flags) \
+	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
+
+#define JFS_MIN_JOURNAL_BLOCKS 1024
+
+#ifdef __KERNEL__
+typedef struct handle_s		handle_t;	/* Atomic operation type */
+typedef struct journal_s	journal_t;	/* Journal control structure */
+#endif
+
+/*
+ * Internal structures used by the logging mechanism:
+ */
+
+#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
+
+/*
+ * On-disk structures
+ */
+
+/* 
+ * Descriptor block types:
+ */
+
+#define JFS_DESCRIPTOR_BLOCK	1
+#define JFS_COMMIT_BLOCK	2
+#define JFS_SUPERBLOCK_V1	3
+#define JFS_SUPERBLOCK_V2	4
+#define JFS_REVOKE_BLOCK	5
+
+/*
+ * Standard header for all descriptor blocks:
+ */
+typedef struct journal_header_s
+{
+	__u32		h_magic;
+	__u32		h_blocktype;
+	__u32		h_sequence;
+} journal_header_t;
+
+
+/* 
+ * The block tag: used to describe a single buffer in the journal 
+ */
+typedef struct journal_block_tag_s
+{
+	__u32		t_blocknr;	/* The on-disk block number */
+	__u32		t_flags;	/* See below */
+} journal_block_tag_t;
+
+/* 
+ * The revoke descriptor: used on disk to describe a series of blocks to
+ * be revoked from the log 
+ */
+typedef struct journal_revoke_header_s
+{
+	journal_header_t r_header;
+	int		 r_count;	/* Count of bytes used in the block */
+} journal_revoke_header_t;
+
+
+/* Definitions for the journal tag flags word: */
+#define JFS_FLAG_ESCAPE		1	/* on-disk block is escaped */
+#define JFS_FLAG_SAME_UUID	2	/* block has same uuid as previous */
+#define JFS_FLAG_DELETED	4	/* block deleted by this transaction */
+#define JFS_FLAG_LAST_TAG	8	/* last tag in this descriptor block */
+
+
+/*
+ * The journal superblock.  All fields are in big-endian byte order.
+ */
+typedef struct journal_superblock_s
+{
+/* 0x0000 */
+	journal_header_t s_header;
+
+/* 0x000C */
+	/* Static information describing the journal */
+	__u32	s_blocksize;		/* journal device blocksize */
+	__u32	s_maxlen;		/* total blocks in journal file */
+	__u32	s_first;		/* first block of log information */
+	
+/* 0x0018 */
+	/* Dynamic information describing the current state of the log */
+	__u32	s_sequence;		/* first commit ID expected in log */
+	__u32	s_start;		/* blocknr of start of log */
+
+/* 0x0020 */
+	/* Error value, as set by journal_abort(). */
+	__s32	s_errno;
+
+/* 0x0024 */
+	/* Remaining fields are only valid in a version-2 superblock */
+	__u32	s_feature_compat; 	/* compatible feature set */
+	__u32	s_feature_incompat; 	/* incompatible feature set */
+	__u32	s_feature_ro_compat; 	/* readonly-compatible feature set */
+/* 0x0030 */
+	__u8	s_uuid[16];		/* 128-bit uuid for journal */
+
+/* 0x0040 */
+	__u32	s_nr_users;		/* Nr of filesystems sharing log */
+	
+	__u32	s_dynsuper;		/* Blocknr of dynamic superblock copy*/
+	
+/* 0x0048 */
+	__u32	s_max_transaction;	/* Limit of journal blocks per trans.*/
+	__u32	s_max_trans_data;	/* Limit of data blocks per trans. */
+
+/* 0x0050 */
+	__u32	s_padding[44];
+
+/* 0x0100 */
+	__u8	s_users[16*48];		/* ids of all fs'es sharing the log */
+/* 0x0400 */
+} journal_superblock_t;
+
+#define JFS_HAS_COMPAT_FEATURE(j,mask)					\
+	((j)->j_format_version >= 2 &&					\
+	 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
+#define JFS_HAS_RO_COMPAT_FEATURE(j,mask)				\
+	((j)->j_format_version >= 2 &&					\
+	 ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
+#define JFS_HAS_INCOMPAT_FEATURE(j,mask)				\
+	((j)->j_format_version >= 2 &&					\
+	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
+
+#define JFS_FEATURE_INCOMPAT_REVOKE	0x00000001
+
+/* Features known to this kernel version: */
+#define JFS_KNOWN_COMPAT_FEATURES	0
+#define JFS_KNOWN_ROCOMPAT_FEATURES	0
+#define JFS_KNOWN_INCOMPAT_FEATURES	JFS_FEATURE_INCOMPAT_REVOKE
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+
+#define JBD_ASSERTIONS
+#ifdef JBD_ASSERTIONS
+#define J_ASSERT(assert)						\
+do {									\
+	if (!(assert)) {						\
+		printk (KERN_EMERG					\
+			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
+			__FUNCTION__, __FILE__, __LINE__, # assert);	\
+		BUG();							\
+	}								\
+} while (0)
+
+#if defined(CONFIG_BUFFER_DEBUG)
+void buffer_assertion_failure(struct buffer_head *bh);
+#define J_ASSERT_BH(bh, expr)						\
+	do {								\
+		if (!(expr))						\
+			buffer_assertion_failure(bh);			\
+		J_ASSERT(expr);						\
+	} while (0)
+#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
+#else
+#define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
+#define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
+#endif
+
+#else
+#define J_ASSERT(assert)
+#endif		/* JBD_ASSERTIONS */
+
+enum jbd_state_bits {
+	BH_JWrite
+	  = BH_PrivateStart,	/* 1 if being written to log (@@@ DEBUGGING) */
+	BH_Freed,		/* 1 if buffer has been freed (truncated) */
+	BH_Revoked,		/* 1 if buffer has been revoked from the log */
+	BH_RevokeValid,		/* 1 if buffer revoked flag is valid */
+	BH_JBDDirty,		/* 1 if buffer is dirty but journaled */
+};
+
+/* Return true if the buffer is one which JBD is managing */
+static inline int buffer_jbd(struct buffer_head *bh)
+{
+	return __buffer_state(bh, JBD);
+}
+
+static inline struct buffer_head *jh2bh(struct journal_head *jh)
+{
+	return jh->b_bh;
+}
+
+static inline struct journal_head *bh2jh(struct buffer_head *bh)
+{
+	return bh->b_private;
+}
+
+struct jbd_revoke_table_s;
+
+/* The handle_t type represents a single atomic update being performed
+ * by some process.  All filesystem modifications made by the process go
+ * through this handle.  Recursive operations (such as quota operations)
+ * are gathered into a single update.
+ *
+ * The buffer credits field is used to account for journaled buffers
+ * being modified by the running process.  To ensure that there is
+ * enough log space for all outstanding operations, we need to limit the
+ * number of outstanding buffers possible at any time.  When the
+ * operation completes, any buffer credits not used are credited back to
+ * the transaction, so that at all times we know how many buffers the
+ * outstanding updates on a transaction might possibly touch. */
+
+struct handle_s 
+{
+	/* Which compound transaction is this update a part of? */
+	transaction_t	      * h_transaction;
+
+	/* Number of remaining buffers we are allowed to dirty: */
+	int			h_buffer_credits;
+
+	/* Reference count on this handle */
+	int			h_ref;
+
+	/* Field for caller's use to track errors through large fs
+	   operations */
+	int			h_err;
+
+	/* Flags */
+	unsigned int	h_sync:		1;	/* sync-on-close */
+	unsigned int	h_jdata:	1;	/* force data journaling */
+	unsigned int	h_aborted:	1;	/* fatal error on handle */
+};
+
+
+/* The transaction_t type is the guts of the journaling mechanism.  It
+ * tracks a compound transaction through its various states:
+ *
+ * RUNNING:	accepting new updates
+ * LOCKED:	Updates still running but we don't accept new ones
+ * RUNDOWN:	Updates are tidying up but have finished requesting
+ *		new buffers to modify (state not used for now)
+ * FLUSH:       All updates complete, but we are still writing to disk
+ * COMMIT:      All data on disk, writing commit record
+ * FINISHED:	We still have to keep the transaction for checkpointing.
+ *
+ * The transaction keeps track of all of the buffers modified by a
+ * running transaction, and all of the buffers committed but not yet
+ * flushed to home for finished transactions.
+ */
+
+struct transaction_s 
+{
+	/* Pointer to the journal for this transaction. */
+	journal_t *		t_journal;
+	
+	/* Sequence number for this transaction */
+	tid_t			t_tid;
+	
+	/* Transaction's current state */
+	enum {
+		T_RUNNING,
+		T_LOCKED,
+		T_RUNDOWN,
+		T_FLUSH,
+		T_COMMIT,
+		T_FINISHED 
+	}			t_state;
+
+	/* Where in the log does this transaction's commit start? */
+	unsigned long		t_log_start;
+	
+	/* Doubly-linked circular list of all inodes owned by this
+           transaction */	/* AKPM: unused */
+	struct inode *		t_ilist;
+	
+	/* Number of buffers on the t_buffers list */
+	int			t_nr_buffers;
+	
+	/* Doubly-linked circular list of all buffers reserved but not
+           yet modified by this transaction */
+	struct journal_head *	t_reserved_list;
+	
+	/* Doubly-linked circular list of all metadata buffers owned by this
+           transaction */
+	struct journal_head *	t_buffers;
+	
+	/*
+	 * Doubly-linked circular list of all data buffers still to be
+	 * flushed before this transaction can be committed.
+	 * Protected by journal_datalist_lock.
+	 */
+	struct journal_head *	t_sync_datalist;
+	
+	/*
+	 * Doubly-linked circular list of all writepage data buffers
+	 * still to be written before this transaction can be committed.
+	 * Protected by journal_datalist_lock.
+	 */
+	struct journal_head *	t_async_datalist;
+	
+	/* Doubly-linked circular list of all forget buffers (superceded
+           buffers which we can un-checkpoint once this transaction
+           commits) */
+	struct journal_head *	t_forget;
+	
+	/*
+	 * Doubly-linked circular list of all buffers still to be
+	 * flushed before this transaction can be checkpointed.
+	 */
+	/* Protected by journal_datalist_lock */
+	struct journal_head *	t_checkpoint_list;
+	
+	/* Doubly-linked circular list of temporary buffers currently
+           undergoing IO in the log */
+	struct journal_head *	t_iobuf_list;
+	
+	/* Doubly-linked circular list of metadata buffers being
+           shadowed by log IO.  The IO buffers on the iobuf list and the
+           shadow buffers on this list match each other one for one at
+           all times. */
+	struct journal_head *	t_shadow_list;
+	
+	/* Doubly-linked circular list of control buffers being written
+           to the log. */
+	struct journal_head *	t_log_list;
+	
+	/* Number of outstanding updates running on this transaction */
+	int			t_updates;
+
+	/* Number of buffers reserved for use by all handles in this
+	 * transaction handle but not yet modified. */
+	int			t_outstanding_credits;
+	
+	/*
+	 * Forward and backward links for the circular list of all
+	 * transactions awaiting checkpoint.
+	 */
+	/* Protected by journal_datalist_lock */
+	transaction_t		*t_cpnext, *t_cpprev;
+
+	/* When will the transaction expire (become due for commit), in
+	 * jiffies ? */
+	unsigned long		t_expires;
+
+	/* How many handles used this transaction? */
+	int t_handle_count;
+};
+
+
+/* The journal_t maintains all of the journaling state information for a
+ * single filesystem.  It is linked to from the fs superblock structure.
+ * 
+ * We use the journal_t to keep track of all outstanding transaction
+ * activity on the filesystem, and to manage the state of the log
+ * writing process. */
+
+struct journal_s
+{
+	/* General journaling state flags */
+	unsigned long		j_flags;
+
+	/* Is there an outstanding uncleared error on the journal (from
+	 * a prior abort)? */
+	int			j_errno;
+	
+	/* The superblock buffer */
+	struct buffer_head *	j_sb_buffer;
+	journal_superblock_t *	j_superblock;
+
+	/* Version of the superblock format */
+	int			j_format_version;
+
+	/* Number of processes waiting to create a barrier lock */
+	int			j_barrier_count;
+	
+	/* The barrier lock itself */
+	struct semaphore	j_barrier;
+	
+	/* Transactions: The current running transaction... */
+	transaction_t *		j_running_transaction;
+	
+	/* ... the transaction we are pushing to disk ... */
+	transaction_t *		j_committing_transaction;
+	
+	/* ... and a linked circular list of all transactions waiting
+	 * for checkpointing. */
+	/* Protected by journal_datalist_lock */
+	transaction_t *		j_checkpoint_transactions;
+
+	/* Wait queue for waiting for a locked transaction to start
+           committing, or for a barrier lock to be released */
+	wait_queue_head_t	j_wait_transaction_locked;
+	
+	/* Wait queue for waiting for checkpointing to complete */
+	wait_queue_head_t	j_wait_logspace;
+	
+	/* Wait queue for waiting for commit to complete */
+	wait_queue_head_t	j_wait_done_commit;
+	
+	/* Wait queue to trigger checkpointing */
+	wait_queue_head_t	j_wait_checkpoint;
+	
+	/* Wait queue to trigger commit */
+	wait_queue_head_t	j_wait_commit;
+	
+	/* Wait queue to wait for updates to complete */
+	wait_queue_head_t	j_wait_updates;
+
+	/* Semaphore for locking against concurrent checkpoints */
+	struct semaphore 	j_checkpoint_sem;
+
+	/* The main journal lock, used by lock_journal() */
+	struct semaphore	j_sem;
+		
+	/* Journal head: identifies the first unused block in the journal. */
+	unsigned long		j_head;
+	
+	/* Journal tail: identifies the oldest still-used block in the
+	 * journal. */
+	unsigned long		j_tail;
+
+	/* Journal free: how many free blocks are there in the journal? */
+	unsigned long		j_free;
+
+	/* Journal start and end: the block numbers of the first usable
+	 * block and one beyond the last usable block in the journal. */
+	unsigned long		j_first, j_last;
+
+	/* Device, blocksize and starting block offset for the location
+	 * where we store the journal. */
+	kdev_t			j_dev;
+	int			j_blocksize;
+	unsigned int		j_blk_offset;
+
+	/* Device which holds the client fs.  For internal journal this
+	 * will be equal to j_dev. */
+	kdev_t			j_fs_dev;
+
+	/* Total maximum capacity of the journal region on disk. */
+	unsigned int		j_maxlen;
+
+	/* Optional inode where we store the journal.  If present, all
+	 * journal block numbers are mapped into this inode via
+	 * bmap(). */
+	struct inode *		j_inode;
+
+	/* Sequence number of the oldest transaction in the log */
+	tid_t			j_tail_sequence;
+	/* Sequence number of the next transaction to grant */
+	tid_t			j_transaction_sequence;
+	/* Sequence number of the most recently committed transaction */
+	tid_t			j_commit_sequence;
+	/* Sequence number of the most recent transaction wanting commit */
+	tid_t			j_commit_request;
+
+	/* Journal uuid: identifies the object (filesystem, LVM volume
+	 * etc) backed by this journal.  This will eventually be
+	 * replaced by an array of uuids, allowing us to index multiple
+	 * devices within a single journal and to perform atomic updates
+	 * across them.  */
+
+	__u8			j_uuid[16];
+
+	/* Pointer to the current commit thread for this journal */
+	struct task_struct *	j_task;
+
+	/* Maximum number of metadata buffers to allow in a single
+	 * compound commit transaction */
+	int			j_max_transaction_buffers;
+
+	/* What is the maximum transaction lifetime before we begin a
+	 * commit? */
+	unsigned long		j_commit_interval;
+
+	/* The timer used to wakeup the commit thread: */
+	struct timer_list *	j_commit_timer;
+	int			j_commit_timer_active;
+
+	/* Link all journals together - system-wide */
+	struct list_head	j_all_journals;
+
+	/* The revoke table: maintains the list of revoked blocks in the
+           current transaction. */
+	struct jbd_revoke_table_s *j_revoke;
+};
+
+/* 
+ * Journal flag definitions 
+ */
+#define JFS_UNMOUNT	0x001	/* Journal thread is being destroyed */
+#define JFS_ABORT	0x002	/* Journaling has been aborted for errors. */
+#define JFS_ACK_ERR	0x004	/* The errno in the sb has been acked */
+#define JFS_FLUSHED	0x008	/* The journal superblock has been flushed */
+#define JFS_LOADED	0x010	/* The journal superblock has been loaded */
+
+/* 
+ * Function declarations for the journaling transaction and buffer
+ * management
+ */
+
+/* Filing buffers */
+extern void __journal_unfile_buffer(struct journal_head *);
+extern void journal_unfile_buffer(struct journal_head *);
+extern void __journal_refile_buffer(struct journal_head *);
+extern void journal_refile_buffer(struct journal_head *);
+extern void __journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __journal_free_buffer(struct journal_head *bh);
+extern void journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __journal_clean_data_list(transaction_t *transaction);
+
+/* Log buffer allocation */
+extern struct journal_head * journal_get_descriptor_buffer(journal_t *);
+extern unsigned long journal_next_log_block(journal_t *);
+
+/* Commit management */
+extern void journal_commit_transaction(journal_t *);
+
+/* Checkpoint list management */
+int __journal_clean_checkpoint_list(journal_t *journal);
+extern void journal_remove_checkpoint(struct journal_head *);
+extern void __journal_remove_checkpoint(struct journal_head *);
+extern void journal_insert_checkpoint(struct journal_head *, transaction_t *);
+extern void __journal_insert_checkpoint(struct journal_head *,transaction_t *);
+
+/* Buffer IO */
+extern int 
+journal_write_metadata_buffer(transaction_t	  *transaction,
+			      struct journal_head  *jh_in,
+			      struct journal_head **jh_out,
+			      int		   blocknr);
+
+/* Transaction locking */
+extern void		__wait_on_journal (journal_t *);
+
+/*
+ * Journal locking.
+ *
+ * We need to lock the journal during transaction state changes so that
+ * nobody ever tries to take a handle on the running transaction while
+ * we are in the middle of moving it to the commit phase.  
+ *
+ * Note that the locking is completely interrupt unsafe.  We never touch
+ * journal structures from interrupts.
+ *
+ * In 2.2, the BKL was required for lock_journal.  This is no longer
+ * the case.
+ */
+
+static inline void lock_journal(journal_t *journal)
+{
+	down(&journal->j_sem);
+}
+
+/* This returns zero if we acquired the semaphore */
+static inline int try_lock_journal(journal_t * journal)
+{
+	return down_trylock(&journal->j_sem);
+}
+
+static inline void unlock_journal(journal_t * journal)
+{
+	up(&journal->j_sem);
+}
+
+
+static inline handle_t *journal_current_handle(void)
+{
+	return current->journal_info;
+}
+
+/* The journaling code user interface:
+ *
+ * Create and destroy handles
+ * Register buffer modifications against the current transaction. 
+ */
+
+extern handle_t *journal_start(journal_t *, int nblocks);
+extern handle_t *journal_try_start(journal_t *, int nblocks);
+extern int	 journal_restart (handle_t *, int nblocks);
+extern int	 journal_extend (handle_t *, int nblocks);
+extern int	 journal_get_write_access (handle_t *, struct buffer_head *);
+extern int	 journal_get_create_access (handle_t *, struct buffer_head *);
+extern int	 journal_get_undo_access (handle_t *, struct buffer_head *);
+extern int	 journal_dirty_data (handle_t *,
+				struct buffer_head *, int async);
+extern int	 journal_dirty_metadata (handle_t *, struct buffer_head *);
+extern void	 journal_release_buffer (handle_t *, struct buffer_head *);
+extern void	 journal_forget (handle_t *, struct buffer_head *);
+extern void	 journal_sync_buffer (struct buffer_head *);
+extern int	 journal_flushpage(journal_t *, struct page *, unsigned long);
+extern int	 journal_try_to_free_buffers(journal_t *, struct page *, int);
+extern int	 journal_stop(handle_t *);
+extern int	 journal_flush (journal_t *);
+
+extern void	 journal_lock_updates (journal_t *);
+extern void	 journal_unlock_updates (journal_t *);
+
+extern journal_t * journal_init_dev(kdev_t dev, kdev_t fs_dev,
+				int start, int len, int bsize);
+extern journal_t * journal_init_inode (struct inode *);
+extern int	   journal_update_format (journal_t *);
+extern int	   journal_check_used_features 
+		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   journal_check_available_features 
+		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   journal_set_features 
+		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   journal_create     (journal_t *);
+extern int	   journal_load       (journal_t *journal);
+extern void	   journal_destroy    (journal_t *);
+extern int	   journal_recover    (journal_t *journal);
+extern int	   journal_wipe       (journal_t *, int);
+extern int	   journal_skip_recovery (journal_t *);
+extern void	   journal_update_superblock (journal_t *, int);
+extern void	   __journal_abort      (journal_t *);
+extern void	   journal_abort      (journal_t *, int);
+extern int	   journal_errno      (journal_t *);
+extern void	   journal_ack_err    (journal_t *);
+extern int	   journal_clear_err  (journal_t *);
+extern unsigned long journal_bmap(journal_t *journal, unsigned long blocknr);
+extern int	    journal_force_commit(journal_t *journal);
+
+/*
+ * journal_head management
+ */
+extern struct journal_head
+		*journal_add_journal_head(struct buffer_head *bh);
+extern void	journal_remove_journal_head(struct buffer_head *bh);
+extern void	__journal_remove_journal_head(struct buffer_head *bh);
+extern void	journal_unlock_journal_head(struct journal_head *jh);
+
+/* Primary revoke support */
+#define JOURNAL_REVOKE_DEFAULT_HASH 256
+extern int	   journal_init_revoke(journal_t *, int);
+extern void	   journal_destroy_revoke_caches(void);
+extern int	   journal_init_revoke_caches(void);
+
+extern void	   journal_destroy_revoke(journal_t *);
+extern int	   journal_revoke (handle_t *,
+				unsigned long, struct buffer_head *);
+extern int	   journal_cancel_revoke(handle_t *, struct journal_head *);
+extern void	   journal_write_revoke_records(journal_t *, transaction_t *);
+
+/* Recovery revoke support */
+extern int	   journal_set_revoke(journal_t *, unsigned long, tid_t);
+extern int	   journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern void	   journal_clear_revoke(journal_t *);
+extern void	   journal_brelse_array(struct buffer_head *b[], int n);
+
+/* The log thread user interface:
+ *
+ * Request space in the current transaction, and force transaction commit
+ * transitions on demand.
+ */
+
+extern int	log_space_left (journal_t *); /* Called with journal locked */
+extern tid_t	log_start_commit (journal_t *, transaction_t *);
+extern void	log_wait_commit (journal_t *, tid_t);
+extern int	log_do_checkpoint (journal_t *, int);
+
+extern void	log_wait_for_space(journal_t *, int nblocks);
+extern void	__journal_drop_transaction(journal_t *, transaction_t *);
+extern int	cleanup_journal_tail(journal_t *);
+
+/* Reduce journal memory usage by flushing */
+extern void shrink_journal_memory(void);
+
+/* Debugging code only: */
+
+#define jbd_ENOSYS() \
+do {								      \
+	printk (KERN_ERR "JBD unimplemented function " __FUNCTION__); \
+	current->state = TASK_UNINTERRUPTIBLE;			      \
+	schedule();						      \
+} while (1)
+
+/*
+ * is_journal_abort
+ *
+ * Simple test wrapper function to test the JFS_ABORT state flag.  This
+ * bit, when set, indicates that we have had a fatal error somewhere,
+ * either inside the journaling layer or indicated to us by the client
+ * (eg. ext3), and that we and should not commit any further
+ * transactions.  
+ */
+
+static inline int is_journal_aborted(journal_t *journal)
+{
+	return journal->j_flags & JFS_ABORT;
+}
+
+static inline int is_handle_aborted(handle_t *handle)
+{
+	if (handle->h_aborted)
+		return 1;
+	return is_journal_aborted(handle->h_transaction->t_journal);
+}
+
+static inline void journal_abort_handle(handle_t *handle)
+{
+	handle->h_aborted = 1;
+}
+
+/* Not all architectures define BUG() */
+#ifndef BUG
+#define BUG() do { \
+        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	* ((char *) 0) = 0; \
+ } while (0)
+#endif /* BUG */
+
+#else
+
+extern int	   journal_recover    (journal_t *journal);
+extern int	   journal_skip_recovery (journal_t *);
+
+/* Primary revoke support */
+extern int	   journal_init_revoke(journal_t *, int);
+extern void	   journal_destroy_revoke_caches(void);
+extern int	   journal_init_revoke_caches(void);
+
+/* Recovery revoke support */
+extern int	   journal_set_revoke(journal_t *, unsigned long, tid_t);
+extern int	   journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern void	   journal_clear_revoke(journal_t *);
+extern void	   journal_brelse_array(struct buffer_head *b[], int n);
+
+extern void	   journal_destroy_revoke(journal_t *);
+#endif /* __KERNEL__   */
+
+/* Comparison functions for transaction IDs: perform comparisons using
+ * modulo arithmetic so that they work over sequence number wraps. */
+
+static inline int tid_gt(tid_t x, tid_t y)
+{
+	int difference = (x - y);
+	return (difference > 0);
+}
+
+static inline int tid_geq(tid_t x, tid_t y)
+{
+	int difference = (x - y);
+	return (difference >= 0);
+}
+
+extern int journal_blocks_per_page(struct inode *inode);
+
+/*
+ * Definitions which augment the buffer_head layer
+ */
+
+/* journaling buffer types */
+#define BJ_None		0	/* Not journaled */
+#define BJ_SyncData	1	/* Normal data: flush before commit */
+#define BJ_AsyncData	2	/* writepage data: wait on it before commit */
+#define BJ_Metadata	3	/* Normal journaled metadata */
+#define BJ_Forget	4	/* Buffer superceded by this transaction */
+#define BJ_IO		5	/* Buffer is for temporary IO use */
+#define BJ_Shadow	6	/* Buffer contents being shadowed to the log */
+#define BJ_LogCtl	7	/* Buffer contains log descriptors */
+#define BJ_Reserved	8	/* Buffer is reserved for access by journal */
+#define BJ_Types	9
+ 
+extern int jbd_blocks_per_page(struct inode *inode);
+
+#ifdef __KERNEL__
+
+extern spinlock_t jh_splice_lock;
+/*
+ * Once `expr1' has been found true, take jh_splice_lock
+ * and then reevaluate everything.
+ */
+#define SPLICE_LOCK(expr1, expr2)				\
+	({							\
+		int ret = (expr1);				\
+		if (ret) {					\
+			spin_lock(&jh_splice_lock);		\
+			ret = (expr1) && (expr2);		\
+			spin_unlock(&jh_splice_lock);		\
+		}						\
+		ret;						\
+	})
+
+/*
+ * A number of buffer state predicates.  They test for
+ * buffer_jbd() because they are used in core kernel code.
+ *
+ * These will be racy on SMP unless we're *sure* that the
+ * buffer won't be detached from the journalling system
+ * in parallel.
+ */
+
+/* Return true if the buffer is on journal list `list' */
+static inline int buffer_jlist_eq(struct buffer_head *bh, int list)
+{
+	return SPLICE_LOCK(buffer_jbd(bh), bh2jh(bh)->b_jlist == list);
+}
+
+/* Return true if this bufer is dirty wrt the journal */
+static inline int buffer_jdirty(struct buffer_head *bh)
+{
+	return buffer_jbd(bh) && __buffer_state(bh, JBDDirty);
+}
+
+/* Return true if it's a data buffer which journalling is managing */
+static inline int buffer_jbd_data(struct buffer_head *bh)
+{
+	return SPLICE_LOCK(buffer_jbd(bh),
+			bh2jh(bh)->b_jlist == BJ_SyncData ||
+			bh2jh(bh)->b_jlist == BJ_AsyncData);
+}
+
+#ifdef CONFIG_SMP
+#define assert_spin_locked(lock)	J_ASSERT(spin_is_locked(lock))
+#else
+#define assert_spin_locked(lock)	do {} while(0)
+#endif
+
+#define buffer_trace_init(bh)	do {} while (0)
+#define print_buffer_fields(bh)	do {} while (0)
+#define print_buffer_trace(bh)	do {} while (0)
+#define BUFFER_TRACE(bh, info)	do {} while (0)
+#define BUFFER_TRACE2(bh, bh2, info)	do {} while (0)
+#define JBUFFER_TRACE(jh, info)	do {} while (0)
+
+#endif	/* __KERNEL__ */
+
+#endif	/* CONFIG_JBD || CONFIG_JBD_MODULE || !__KERNEL__ */
+
+/*
+ * Compatibility no-ops which allow the kernel to compile without CONFIG_JBD
+ * go here.
+ */
+
+#if defined(__KERNEL__) && !(defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE))
+
+#define J_ASSERT(expr)			do {} while (0)
+#define J_ASSERT_BH(bh, expr)		do {} while (0)
+#define buffer_jbd(bh)			0
+#define buffer_jlist_eq(bh, val)	0
+#define journal_buffer_journal_lru(bh)	0
+
+#endif	/* defined(__KERNEL__) && !defined(CONFIG_JBD) */
+#endif	/* _LINUX_JBD_H */

Added: trunk/ocfs2/format/inc/kernel-list.h
===================================================================
--- trunk/ocfs2/format/inc/kernel-list.h	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/kernel-list.h	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,112 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = { &name, &name }
+
+#define INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#if (!defined(__GNUC__) && !defined(__WATCOMC__))
+#define __inline__
+#endif
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+	struct list_head * prev,
+	struct list_head * next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/*
+ * Insert a new entry after the specified head..
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/*
+ * Insert a new entry at the tail
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+				  struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+static __inline__ void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
+static __inline__ int list_empty(struct list_head *head)
+{
+	return head->next == head;
+}
+
+/*
+ * Splice in "list" into "head"
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+	struct list_head *first = list->next;
+
+	if (first != list) {
+		struct list_head *last = list->prev;
+		struct list_head *at = head->next;
+
+		first->prev = head;
+		head->next = first;
+
+		last->next = at;
+		at->prev = last;
+	}
+}
+
+#define list_entry(ptr, type, member) \
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+#define list_for_each(pos, head) \
+        for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#endif

Added: trunk/ocfs2/format/inc/ocfs1_fs_compat.h
===================================================================
--- trunk/ocfs2/format/inc/ocfs1_fs_compat.h	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/ocfs1_fs_compat.h	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,112 @@
+/* -*- mode: c; c-basic-offset: 9; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs1_fs_compat.h
+ *
+ * OCFS1 volume header definitions.  OCFS2 creates valid but unmountable
+ * OCFS1 volume headers on the first two sectors of an OCFS2 volume.
+ * This allows an OCFS1 volume to see the partition and cleanly fail to
+ * mount it.
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS1_FS_COMPAT_H
+#define _OCFS1_FS_COMPAT_H
+
+#define MAX_VOL_SIGNATURE_LEN_V1          128
+#define MAX_MOUNT_POINT_LEN_V1            128
+#define MAX_VOL_ID_LENGTH_V1               16
+#define MAX_VOL_LABEL_LEN_V1               64
+#define MAX_CLUSTER_NAME_LEN_V1            64
+
+#define OCFS1_MAJOR_VERSION              (2)
+#define OCFS1_MINOR_VERSION              (0)
+#define OCFS1_VOLUME_SIGNATURE		 "OracleCFS"
+
+/*
+ * OCFS1 superblock.  Lives at sector 0.
+ */
+typedef struct _ocfs1_vol_disk_hdr
+{
+/*00*/	__u32 minor_version;
+	__u32 major_version;
+/*08*/	__u8 signature[MAX_VOL_SIGNATURE_LEN_V1];
+/*88*/	__u8 mount_point[MAX_MOUNT_POINT_LEN_V1];
+/*108*/	__u64 serial_num;
+/*110*/	__u64 device_size;
+	__u64 start_off;
+/*120*/	__u64 bitmap_off;
+	__u64 publ_off;
+/*130*/	__u64 vote_off;
+	__u64 root_bitmap_off;
+/*140*/	__u64 data_start_off;
+	__u64 root_bitmap_size;
+/*150*/	__u64 root_off;
+	__u64 root_size;
+/*160*/	__u64 cluster_size;
+	__u64 num_nodes;
+/*170*/	__u64 num_clusters;
+	__u64 dir_node_size;
+/*180*/	__u64 file_node_size;
+	__u64 internal_off;
+/*190*/	__u64 node_cfg_off;
+	__u64 node_cfg_size;
+/*1A0*/	__u64 new_cfg_off;
+	__u32 prot_bits;
+	__s32 excl_mount;
+/*1B0*/
+} ocfs1_vol_disk_hdr;
+
+
+typedef struct _ocfs1_disk_lock
+{
+/*00*/	__u32 curr_master;
+	__u8 file_lock;
+	__u8 compat_pad[3];  /* Not in orignal definition.  Used to
+				make the already existing alignment
+				explicit */
+	__u64 last_write_time;
+/*10*/	__u64 last_read_time;
+	__u32 writer_node_num;
+	__u32 reader_node_num;
+/*20*/	__u64 oin_node_map;
+	__u64 dlock_seq_num;
+/*30*/
+} ocfs1_disk_lock;
+
+/*
+ * OCFS1 volume label.  Lives at sector 1.
+ */
+typedef struct _ocfs1_vol_label
+{
+/*00*/	ocfs1_disk_lock disk_lock;
+/*30*/	__u8 label[MAX_VOL_LABEL_LEN_V1];
+/*70*/	__u16 label_len;
+/*72*/	__u8 vol_id[MAX_VOL_ID_LENGTH_V1];
+/*82*/	__u16 vol_id_len;
+/*84*/	__u8 cluster_name[MAX_CLUSTER_NAME_LEN_V1];
+/*A4*/	__u16 cluster_name_len;
+/*A6*/
+} ocfs1_vol_label;
+
+
+#endif /* _OCFS1_FS_COMPAT_H */
+

Added: trunk/ocfs2/format/inc/ocfs2_fs.h
===================================================================
--- trunk/ocfs2/format/inc/ocfs2_fs.h	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/ocfs2_fs.h	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,447 @@
+/* -*- mode: c; c-basic-offset: 9; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs2_fs.h
+ *
+ * On-disk structures for OCFS2.
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ *	    Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS2_FS_H
+#define _OCFS2_FS_H
+
+/* Version */
+#define OCFS2_MAJOR_REV_LEVEL		2
+#define OCFS2_MINOR_REV_LEVEL          	0
+
+/*
+ * An OCFS2 volume starts this way:
+ * Sector 0: Valid ocfs1_vol_disk_hdr that cleanly fails to mount v1.
+ * Sector 1: Valid ocfs1_vol_label that cleanly fails to mount v1.
+ * Block OCFS2_SUPER_BLOCK_BLKNO: OCFS2 superblock.
+ *
+ * All other structures are found from the superblock information.
+ *
+ * OCFS2_SUPER_BLOCK_BLKNO is in blocks, not sectors.  eg, for a
+ * blocksize of 2K, it is 4096 bytes into disk.
+ */
+#define OCFS2_SUPER_BLOCK_BLKNO		2
+
+/* Object signatures */
+#define OCFS2_SUPER_BLOCK_SIGNATURE	"OCFSV2"
+#define OCFS2_FILE_ENTRY_SIGNATURE	"INODE01"
+#define OCFS2_EXTENT_BLOCK_SIGNATURE	"EXBLK01"
+
+/*
+ * Flags on ocfs2_dinode.i_flags
+ */
+#define OCFS2_VALID_FL		(0x01)
+#define OCFS2_UNUSED2_FL	(0x02)
+#define OCFS2_ORPHANED_FL	(0x04)
+#define OCFS2_UNUSED3_FL	(0x08)
+#define OCFS2_SYSTEM_FL		(0x10)
+#define OCFS2_SUPER_BLOCK_FL	(0x20)
+#define OCFS2_LOCAL_ALLOC_FL	(0x40)
+#define OCFS2_BITMAP_FL		(0x80)
+	
+
+/* Limit of space in ocfs2_dir_entry */
+#define OCFS2_MAX_FILENAME_LENGTH       255
+
+/* Limit of node map bits in ocfs2_disk_lock */
+#define OCFS2_MAX_NODES			256
+
+#define MAX_VOL_ID_LENGTH               16
+#define MAX_VOL_LABEL_LEN               64
+#define MAX_CLUSTER_NAME_LEN            64
+
+
+#define ONE_MEGA_BYTE           	(1 * 1024 * 1024)   /* in bytes */
+#define OCFS2_DEFAULT_JOURNAL_SIZE	(8 * ONE_MEGA_BYTE)
+
+
+/* System file index */
+enum {
+	GLOBAL_BITMAP_SYSTEM_INODE = 0,
+	GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+	GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE,
+	AUTOCONFIG_SYSTEM_INODE,
+	PUBLISH_SYSTEM_INODE,
+	VOTE_SYSTEM_INODE,
+	ORPHAN_DIR_SYSTEM_INODE,
+	EXTENT_ALLOC_SYSTEM_INODE,
+	EXTENT_ALLOC_BITMAP_SYSTEM_INODE,
+	INODE_ALLOC_SYSTEM_INODE,
+	INODE_ALLOC_BITMAP_SYSTEM_INODE,
+	JOURNAL_SYSTEM_INODE,
+	LOCAL_ALLOC_SYSTEM_INODE,
+	NUM_SYSTEM_INODES
+};
+
+/*
+ * The last system inode that has only one global copy.  Every system
+ * inode after it in the system inode enum has a node-specific copy.
+ */
+#define OCFS_LAST_GLOBAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
+
+
+/* Default size for the local alloc bitmap */
+#define OCFS2_LOCAL_BITMAP_DEFAULT_SIZE		256
+
+/*
+ * OCFS2 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+#define OCFS2_FT_UNKNOWN	0
+#define OCFS2_FT_REG_FILE	1
+#define OCFS2_FT_DIR		2
+#define OCFS2_FT_CHRDEV		3
+#define OCFS2_FT_BLKDEV		4
+#define OCFS2_FT_FIFO		5
+#define OCFS2_FT_SOCK		6
+#define OCFS2_FT_SYMLINK	7
+
+#define OCFS2_FT_MAX		8
+
+/*
+ * OCFS2_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define OCFS2_DIR_PAD			4
+#define OCFS2_DIR_ROUND			(OCFS2_DIR_PAD - 1)
+#define OCFS2_DIR_REC_LEN(name_len)	(((name_len) + 12 + \
+                                          OCFS2_DIR_ROUND) & \
+					 ~OCFS2_DIR_ROUND)
+#define OCFS2_LINK_MAX		32000
+
+#define S_SHIFT			12
+static unsigned char ocfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]    OCFS2_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]    OCFS2_FT_DIR,
+	[S_IFCHR >> S_SHIFT]    OCFS2_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]    OCFS2_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]    OCFS2_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]   OCFS2_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]    OCFS2_FT_SYMLINK,
+};
+
+
+/*
+ * Convenience casts
+ */
+#define OCFS2_RAW_SB(dinode)	(&((dinode)->id2.i_super))
+#define DISK_LOCK(dinode)	(&((dinode)->i_disk_lock))
+#define LOCAL_ALLOC(dinode)	(&((dinode)->id2.i_lab))
+
+/* TODO: change these?  */
+#define OCFS2_NODE_CONFIG_HDR_SIGN	"NODECFG"
+#define OCFS2_NODE_CONFIG_SIGN_LEN	8
+#define OCFS2_NODE_CONFIG_VER		2
+#define OCFS2_NODE_MIN_SUPPORTED_VER	2
+
+#define MAX_IP_ADDR_LEN		32
+#define HOSTID_LEN              20
+#define MACID_LEN		12
+#define GUID_LEN		(HOSTID_LEN+MACID_LEN)
+#define MAX_NODE_NAME_LENGTH	32
+
+
+
+
+/*
+ * On disk extent record for OCFS2
+ * It describes a range of clusters on disk.
+ */
+typedef struct _ocfs2_extent_rec {
+/*00*/	__u32 e_cpos;		/* Offset into the file, in clusters */
+	__u32 e_clusters;	/* Clusters covered by this extent */
+	__u64 e_blkno;		/* Physical disk offset, in blocks */
+/*10*/
+} ocfs2_extent_rec;	
+
+/*
+ * On disk extent list for OCFS2 (node in the tree).  Note that this
+ * is contained inside ocfs2_dinode or ocfs2_extent_block, so the
+ * offsets are relative to ocfs2_dinode.id2.i_list or
+ * ocfs2_extent_block.h_list, respectively.
+ */
+typedef struct _ocfs2_extent_list {
+/*00*/	__s16 l_tree_depth;		/* Extent tree depth from this
+					   point.  -1 means data extents
+					   hang directly off this
+					   header (a leaf) */
+	__u16 l_count;			/* Number of extent records */
+	__u16 l_next_free_rec;		/* Next unused extent slot */
+	__u16 l_reserved1;
+	__u64 l_reserved2;		/* Pad to
+					   sizeof(ocfs2_extent_rec) */
+/*10*/	ocfs2_extent_rec l_recs[0];	/* Extent records */
+} ocfs2_extent_list;
+
+/*
+ * On disk extent block (indirect block) for OCFS2
+ */
+typedef struct _ocfs2_extent_block
+{
+/*00*/	__u8 h_signature[8];		/* Signature for verification */
+	__u64 h_suballoc_blkno;		/* Node suballocator offset,
+					   in blocks */
+/*10*/	__u16 h_suballoc_node;		/* Node suballocator this
+					   extent_header belongs to */
+	__u16 h_reserved1;
+	__u32 h_reserved2;
+	__u64 h_blkno;			/* Offset on disk, in blocks */
+/*20*/	__u64 h_parent_blk;		/* Offset on disk, in blocks,
+					   of this block's parent in the
+					   tree */
+	__u64 h_next_leaf_blk;		/* Offset on disk, in blocks,
+					   of next leaf header pointing
+					   to data */
+/*30*/	ocfs2_extent_list h_list;	/* Extent record list */
+/* Actual on-disk size is one block */
+} ocfs2_extent_block;
+
+/*
+ * On disk lock structure for OCFS2
+ */
+typedef struct _ocfs2_disk_lock
+{
+/*00*/	__u32 dl_master;	/* Node number of current master */
+	__u8 dl_level;		/* Lock level */
+	__u8 dl_reserved1[3];	/* Pad to u64 */
+	__u64 dl_seq_num;	/* Lock transaction seqnum */
+/*10*/	__u32 dl_node_map[8];	/* Bitmap of interested nodes,
+				   was __u32 */ 
+/*30*/
+} ocfs2_disk_lock;
+
+/*
+ * On disk superblock for OCFS2
+ * Note that it is contained inside an ocfs2_dinode, so all offsets
+ * are relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_super_block {
+/*00*/	__u16 s_major_rev_level;
+	__u16 s_minor_rev_level;
+	__u16 s_mnt_count;
+	__s16 s_max_mnt_count;
+	__u16 s_state;			/* File system state */
+	__u16 s_errors;			/* Behaviour when detecting errors */
+	__u32 s_checkinterval;		/* Max time between checks */
+/*10*/	__u64 s_lastcheck;		/* Time of last check */
+	__u32 s_creator_os;		/* OS */
+	__u32 s_feature_compat;		/* Compatible feature set */
+/*20*/	__u32 s_feature_incompat;	/* Incompatible feature set */
+	__u32 s_feature_ro_compat;	/* Readonly-compatible feature set */
+	__u64 s_root_blkno;		/* Offset, in blocks, of root directory
+					   dinode */
+/*30*/	__u64 s_system_dir_blkno;	/* Offset, in blocks, of system
+					   directory dinode */
+	__u32 s_blocksize_bits;		/* Blocksize for this fs */
+	__u32 s_clustersize_bits;	/* Clustersize for this fs */
+/*40*/	__u32 s_max_nodes;		/* Max nodes in this cluster before
+					   tunefs required */
+	__u32 s_reserved1;
+	__u64 s_reserved2;
+/*50*/	__u8  s_label[64];		/* Label for mounting, etc. */
+/*90*/	__u8  s_uuid[16];		/* Was vol_id */
+/*A0*/
+} ocfs2_super_block;
+
+/*
+ * Local allocation bitmap for OCFS2 nodes
+ * Node that it exists inside an ocfs2_dinode, so all offsets are
+ * relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_local_alloc
+{
+/*00*/	__u32 la_bm_off;	/* Starting bit offset in main bitmap */
+	/* Do we want to use id1.bitmap1? */
+	__u16 la_bm_bits;	/* Number of bits from main bitmap */
+	__u16 la_bits_set;	/* Number of set bits */
+	__u16 la_size;		/* Size of included bitmap, in bytes */
+	__u16 la_reserved1;
+	__u32 la_reserved2;
+/*10*/	__u8 la_bitmap[0];
+} ocfs2_local_alloc;
+
+/*
+ * On disk inode for OCFS2
+ */
+typedef struct _ocfs2_dinode {
+/*00*/	__u8 i_signature[8];		/* Signature for validation */
+	__u32 i_generation;		/* Generation number */
+	__u16 i_reserved1;
+	__u16 i_suballoc_node;		/* Node suballocater this inode
+					   belongs to */
+/*10*/	__u64 i_suballoc_blkno;		/* Node suballocator offset,
+       					   in blocks */
+/*18*/	ocfs2_disk_lock i_disk_lock;	/* Lock structure */
+/*48*/	__u32 i_uid;			/* Owner UID */
+	__u32 i_gid;			/* Owning GID */
+/*50*/	__u64 i_size;			/* Size in bytes */
+	__u16 i_mode;			/* File mode */
+	__u16 i_links_count;		/* Links count */
+	__u32 i_flags;			/* File flags */
+/*60*/	__u64 i_atime;			/* Access time */
+	__u64 i_ctime;			/* Creation time */
+/*70*/	__u64 i_mtime;			/* Modification time */
+	__u64 i_dtime;			/* Deletion time */
+/*80*/	__u64 i_blkno;			/* Offset on disk, in blocks */
+	__u32 i_clusters;		/* Cluster count */
+	__u32 i_reserved2;
+/*90*/	__u64 i_last_eb_blk;		/* Pointer to last extent
+					   block */
+	__u64 i_reserved3;
+/*A0*/	__u64 i_reserved4;
+	__u64 i_reserved5;
+/*B0*/	__u64 i_reserved6;
+	union {
+		__u64 i_pad1;		/* Generic way to refer to this 64bit
+					   union */
+		struct {
+			__u64 i_rdev;	/* Device number */
+		} dev1;
+		struct {		/* Info for bitmap system inodes */
+			__u32 i_used;	/* Bits (ie, clusters) used  */
+			__u32 i_total;	/* Total bits (clusters) available */
+		} bitmap1;
+	} id1;				/* Inode type dependant 1 */
+/*C0*/	union {
+		ocfs2_super_block i_super;
+                ocfs2_local_alloc i_lab;
+		ocfs2_extent_list i_list;
+	} id2;
+/* Actual on-disk size is one block */
+} ocfs2_dinode;
+
+/*
+ * On-disk directory entry structure for OCFS2
+ */
+struct ocfs2_dir_entry {
+/*00*/	__u64   inode;                  /* Inode number */
+	__u16   rec_len;                /* Directory entry length */
+	__u8    name_len;               /* Name length */
+	__u8    file_type;
+/*0C*/	char    name[OCFS2_MAX_FILENAME_LENGTH];    /* File name */
+/* Actual on-disk length specified by rec_len */
+};
+
+typedef struct _ocfs_ipc_config_info			// CLASS
+{
+	__u8 type;					// NUMBER RANGE(0, 255)
+	__u8 ip_addr[MAX_IP_ADDR_LEN+1];		// CHAR[MAX_IP_ADDR_LEN+1]
+	__u32 ip_port;					// NUMBER RANGE(0,ULONG_MAX)
+	__u8 ip_mask[MAX_IP_ADDR_LEN+1];		// CHAR[MAX_IP_ADDR_LEN+1]
+}
+ocfs_ipc_config_info;	// END CLASS
+/* TODO this structure will break in 64-bit.... need to pack */
+typedef union _ocfs_guid				// CLASS
+{
+	struct
+	{
+		char host_id[HOSTID_LEN];
+		char mac_id[MACID_LEN];
+	} id;
+	__u8 guid[GUID_LEN];				// CHAR[GUID_LEN]
+}
+ocfs_guid;						// END CLASS
+
+typedef struct _ocfs_node_config_info			// CLASS
+{
+	ocfs2_disk_lock disk_lock;			// DISKLOCK
+	__u8 node_name[MAX_NODE_NAME_LENGTH+1];		// CHAR[MAX_NODE_NAME_LENGTH+1]
+	ocfs_guid guid;					// GUID
+	ocfs_ipc_config_info ipc_config;		// IPCONFIG
+}
+ocfs_node_config_info;					// END CLASS
+
+typedef struct _ocfs_node_config_hdr			// CLASS
+{
+	ocfs2_disk_lock disk_lock;			// DISKLOCK
+	__u8 signature[OCFS2_NODE_CONFIG_SIGN_LEN];	// CHAR[NODE_CONFIG_SIGN_LEN]
+	__u32 version;					// NUMBER RANGE(0,ULONG_MAX)
+	__u32 num_nodes;				// NUMBER RANGE(0,32)
+	__u32 last_node;				// NUMBER RANGE(0,32)
+	__u32 onch_pad;                                 // UNUSED
+	__u64 cfg_seq_num;				// NUMBER RANGE(0,ULONG_LONG_MAX)
+}
+ocfs_node_config_hdr;					// END CLASS
+
+
+#ifdef __KERNEL__
+static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(struct super_block *sb)
+{
+	/*
+	 * Perhaps change one day when we want to be dynamic
+	 * based on sb->s_blocksize.
+	 */
+	return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#else
+static inline int ocfs2_extent_recs_per_inode(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+	return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(int blocksize)
+{
+	return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#endif  /* __KERNEL__ */
+
+#endif  /* _OCFS2_FS_H */

Added: trunk/ocfs2/format/mkfs2.c
===================================================================
--- trunk/ocfs2/format/mkfs2.c	2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/mkfs2.c	2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,1217 @@
+/*
+ *
+ * this is a temporary version of mkfs.ocfs2 to get us through for now
+ *
+ */
+
+
+#define _LARGEFILE64_SOURCE
+#define __USE_ISOC99
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <asm/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <asm/bitops.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <ocfs2_fs.h>
+#include <ocfs1_fs_compat.h>
+
+typedef unsigned short kdev_t;
+
+#include "kernel-list.h"
+
+#include <signal.h>
+#include <libgen.h>
+
+#include <netinet/in.h>
+#include "kernel-jbd.h"
+
+
+extern char *optarg;
+extern int optind, opterr, optopt;
+extern void * memalign (size_t __alignment, size_t __size);
+
+#warning eeeek need to implement these
+#define cpu_to_le16(x)		(x)
+#define cpu_to_le32(x)		(x)
+#define cpu_to_le64(x)		(x)
+#define le16_to_cpu(x)		(x)
+#define le32_to_cpu(x)		(x)
+#define le64_to_cpu(x)		(x)
+
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#define BITCOUNT(x)     (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+#define BX_(x)          ((x) - (((x)>>1)&0x77777777) \
+		             - (((x)>>2)&0x33333333) \
+			     - (((x)>>3)&0x11111111))
+
+#define MKFS_FATAL(fmt, arg...)		({ fprintf(stderr, "ERROR at %s, %d: " fmt ".  EXITING!!!\n", \
+						   __FILE__, __LINE__, ##arg);  \
+					   exit(1); \
+					 })
+#define MKFS_FATAL_STR(str)	MKFS_FATAL(str, "")
+#define MKFS_WARN(fmt, arg...)		fprintf(stderr, "WARNING at %s, %d: " fmt ".\n", \
+						   __FILE__, __LINE__, ##arg)
+#define MKFS_WARN_STR(str)	MKFS_WARN(str, "")
+
+
+
+#define MIN_RESERVED_TAIL_BLOCKS    8
+
+#define LEADING_SPACE_BLOCKS	2  // we will put special strings in the v1 header blocks
+#define SLOP_BLOCKS		0
+#define FILE_ENTRY_BLOCKS	8
+#define SUPERBLOCK_BLOCKS	1
+#define PUBLISH_BLOCKS(i,min)	(i<min ? min : i)  // at least min
+#define VOTE_BLOCKS(i,min)	(i<min ? min : i)  // at least min
+#define AUTOCONF_BLOCKS(i,min)	((2+4) + (i<min ? min : i))  // at least 32, plus the other 6
+#define NUM_LOCAL_SYSTEM_FILES  6
+
+#define MAGIC_SUPERBLOCK_BLOCK_NUMBER  2
+
+#define OCFS2_OS_LINUX           0
+#define OCFS2_OS_HURD            1
+#define OCFS2_OS_MASIX           2
+#define OCFS2_OS_FREEBSD         3
+#define OCFS2_OS_LITES           4
+
+#define OCFS2_DFL_MAX_MNT_COUNT          20      /* Allow 20 mounts */
+#define OCFS2_DFL_CHECKINTERVAL          0       /* Don't use interval check */
+
+
+enum {
+	sfi_journal,
+	sfi_bitmap,
+	sfi_alloc,
+	sfi_local_alloc,
+	sfi_other
+};	
+
+typedef struct _system_file_info {
+	char *name;
+	int type;
+	int global;
+	int dir;
+
+} system_file_info;
+
+system_file_info system_files[] = {
+	{ "global_bitmap", sfi_bitmap, 1, 0 },
+	{ "global_inode_alloc", sfi_alloc, 1, 0 },
+	{ "global_inode_alloc_bitmap", sfi_bitmap, 1, 0 },
+	{ "autoconfig", sfi_other, 1, 0 },
+	{ "publish", sfi_other, 1, 0 },
+	{ "vote", sfi_other, 1, 0 },
+	{ "orphan_dir", sfi_other, 1, 1 },
+	{ "extent_alloc:%04d", sfi_alloc, 0, 0 },
+	{ "extent_alloc_bitmap:%04d", sfi_bitmap, 0, 0 },
+	{ "inode_alloc:%04d", sfi_alloc, 0, 0 },
+	{ "inode_alloc_bitmap:%04d", sfi_bitmap, 0, 0 },
+	{ "journal:%04d", sfi_journal, 0, 0 },
+	{ "local_alloc:%04d", sfi_local_alloc, 0, 0 },
+};
+
+struct bitinfo {
+	__u32 used_bits;
+	__u32 total_bits;
+};
+
+typedef struct _system_file_disk_record
+{
+	__u64 fe_off;
+	__u64 extent_off;
+	__u64 extent_len;
+	__u64 file_size;
+	struct bitinfo bi;
+	int flags;
+	int links;
+	int dir;
+} system_file_disk_record;
+	
+
+typedef struct _alloc_bm
+{
+	void *buf;
+	__u32 valid_bits;
+	__u32 unit;
+	__u32 unit_bits;
+	char *name;
+	__u64 fe_disk_off;
+	system_file_disk_record *bm_record;
+	system_file_disk_record *alloc_record;
+} alloc_bm;
+
+typedef struct _funky_dir
+{
+	__u64 disk_off;
+	__u64 disk_len;
+	void *buf;
+	int buf_len;
+	int last_off;
+	__u64 fe_disk_off;
+	int link_count;
+	system_file_disk_record *record;
+} funky_dir;
+
+alloc_bm * initialize_bitmap (__u32 bits, __u32 unit_bits, char *name,
+			      system_file_disk_record *bm_rec,
+			      system_file_disk_record *alloc_rec);
+void destroy_bitmap (alloc_bm *bm);
+int find_clear_bits (alloc_bm * bitmap, __u32 numBits, __u32 offset);
+int count_bits (alloc_bm * bitmap);
+int alloc_bytes_from_bitmap (__u64 bytes, alloc_bm *bm, __u64 *start, __u64 *num);
+int alloc_from_bitmap (__u64 numbits, alloc_bm *bm, __u64 *start, __u64 *num);
+__u64 alloc_inode (int numblocks);
+funky_dir * alloc_directory(void);
+void add_entry_to_directory(funky_dir *dir, char *name, __u64 ino, __u8 type);
+void adjust_volume_size(void);
+void map_device(void);
+void sync_device(void);
+void unmap_device(void);
+void init_format_time(void);
+void format_superblock(system_file_disk_record *rec, system_file_disk_record *root_rec, system_file_disk_record *sys_rec);
+void format_file(system_file_disk_record *rec);
+void write_bitmap_data(alloc_bm *bm);
+void write_directory_data(funky_dir *dir);
+void format_leading_space(__u64 start);
+void format_autoconf_publish_vote(__u64 autoconf_off, __u64 publish_off, __u64 vote_off, 
+				  __u64 data_off, __u64 data_len);
+void init_device(void);
+void init_globals(void);
+void usage(void);
+void process_args(int argc, char **argv);
+void generate_uuid(void);
+static inline __u32 blocks_needed(void);
+static inline __u32 system_dir_blocks_needed(void);
+void replacement_journal_create(__u64 journal_off);
+void adjust_autoconfig_publish_vote(system_file_disk_record *autoconfig_rec,
+					system_file_disk_record *publish_rec,
+					system_file_disk_record *vote_rec);
+void write_autoconfig_header(system_file_disk_record *rec);
+void init_record(system_file_disk_record *rec, int type, int dir);
+
+
+
+system_file_disk_record *record[NUM_SYSTEM_INODES];
+// these 4 do not live in the record[] array
+system_file_disk_record global_alloc_rec;  	// represents whole volume, not written to disk
+system_file_disk_record superblock_rec;  	
+system_file_disk_record root_dir_rec;
+system_file_disk_record system_dir_rec;
+
+
+__u32 pagesize_bits=0;
+__u32 blocksize_bits=0;
+__u32 cluster_size_bits=0;
+__u32 blocksize=0;
+__u32 cluster_size=0;
+__u64 volume_size_in_bytes=0;
+__u32 volume_size_in_clusters=0;
+__u64 volume_size_in_blocks=0;
+__u64 reserved_tail_size=0;
+__u32 compat_flags = 0;
+int initial_nodes=0;
+int fd=-1;
+void *mapping;
+char *dev_name = NULL;
+char *vol_label = NULL;
+char *uuid = NULL;
+gid_t default_gid = 0;
+mode_t default_mode = 0;
+uid_t default_uid = 0;
+alloc_bm *global_bm=NULL;
+alloc_bm *system_bm=NULL;
+char *progname = NULL;
+time_t format_time;
+
+
+
+alloc_bm * initialize_bitmap (__u32 bits, __u32 unit_bits, char *name,
+			      system_file_disk_record *bm_rec,
+			      system_file_disk_record *alloc_rec)
+{
+	alloc_bm *bitmap;
+	__u64 bitmap_len = bm_rec->extent_len;
+	
+	bitmap = malloc(sizeof(alloc_bm));
+	if (bitmap == NULL)
+		MKFS_FATAL("could not allocate memory for %s\n", name);
+	memset(bitmap, 0, sizeof(alloc_bm));
+	
+	bitmap->buf = memalign(blocksize, bitmap_len);
+	memset(bitmap->buf, 0, bitmap_len);
+
+	bitmap->valid_bits = bits;
+	bitmap->unit_bits = unit_bits;
+	bitmap->unit = 1 << unit_bits;
+	bitmap->name = strdup(name);
+
+	bm_rec->file_size = bitmap_len;
+	bm_rec->fe_off = 0ULL; // set later
+	bm_rec->bi.used_bits = 0;
+	bm_rec->bi.total_bits = bits;
+
+	alloc_rec->file_size = bits << unit_bits;
+	alloc_rec->fe_off = 0ULL; // set later
+
+	bitmap->bm_record = bm_rec;
+	bitmap->alloc_record = alloc_rec;
+
+	return bitmap;
+}
+
+void destroy_bitmap (alloc_bm *bm)
+{
+	free(bm->buf);
+	free(bm);
+}
+
+
+int find_clear_bits (alloc_bm * bitmap, __u32 numBits, __u32 offset)
+{
+	__u32 next_zero, off, count, size, first_zero = -1; 
+	void *buffer;
+
+	buffer = bitmap->buf;
+	size = bitmap->valid_bits;
+	count = 0;
+	off = offset;
+
+	while ((size - off + count >= numBits) &&
+	       (next_zero = find_next_zero_bit (buffer, size, off)) != size) {
+                if (next_zero >= bitmap->valid_bits)
+                    break;
+
+		if (next_zero != off) {
+			first_zero = next_zero;
+			off = next_zero + 1;
+			count = 0;
+		} else {
+			off++;
+			if (count == 0)
+				first_zero = next_zero;
+		}
+
+		count++;
+
+		if (count == numBits)
+			goto bail;
+	}
+	first_zero = -1;
+
+      bail:
+	if (first_zero != -1 && first_zero > bitmap->valid_bits) {
+		fprintf(stderr, "um... first_zero>bitmap->valid_bits (%d > %d)",
+			       first_zero, bitmap->valid_bits);
+		first_zero = -1;
+	}
+	return first_zero;
+}
+
+int count_bits (alloc_bm * bitmap)
+{
+	__u32 size, count = 0, off = 0;
+	unsigned char tmp;
+	__u8 *buffer;
+
+	buffer = bitmap->buf;
+
+	size = (bitmap->valid_bits >> 3);
+
+	while (off < size) {
+		memcpy (&tmp, buffer, 1);
+		count += BITCOUNT (tmp);
+		off++;
+		buffer++;
+	}
+	return count;
+}
+
+
+/* returns bytes to avoid any confusion */
+int alloc_bytes_from_bitmap (__u64 bytes, alloc_bm *bm, __u64 *start, __u64 *num)
+{
+	__u32 startbit = 0, numbits = 0;
+	char *p;
+
+	numbits = (bytes + bm->unit - 1) >> bm->unit_bits;
+	startbit = find_clear_bits (bm, numbits, 0);
+	if (startbit == (__u32)-1)
+		MKFS_FATAL("could not allocate %llu bits from %s bitmap\n", 
+			   numbits, bm->name);
+	*start = ((__u64)startbit) << bm->unit_bits;
+	*num = ((__u64)numbits) << bm->unit_bits;
+	bm->bm_record->bi.used_bits += numbits;
+	p = mapping + *start;
+	memset(p, 0, *num);
+	while (numbits--)
+		set_bit (startbit++, bm->buf);
+	return 0;
+}
+
+/* returns bytes to avoid any confusion */
+int alloc_from_bitmap (__u64 numbits, alloc_bm *bm, __u64 *start, __u64 *num)
+{
+	__u32 startbit = 0;
+	char *p;
+	
+	startbit = find_clear_bits (bm, numbits, 0);
+	if (startbit == (__u32)-1)
+		MKFS_FATAL("could not allocate %llu bits from %s bitmap\n", 
+			   numbits, bm->name);
+	*start = ((__u64)startbit) << bm->unit_bits;
+	*num = ((__u64)numbits) << bm->unit_bits;
+	bm->bm_record->bi.used_bits += numbits;
+	p = mapping + *start;
+	memset(p, 0, *num);
+	while (numbits--)
+		set_bit (startbit++, bm->buf);
+	return 0;
+}
+
+__u64 alloc_inode (int numblocks)
+{
+	__u64 ret, num;
+	alloc_from_bitmap (numblocks, system_bm, &ret, &num);
+	return ret;
+}
+
+funky_dir * alloc_directory(void)
+{
+	funky_dir *dir;
+
+	dir = malloc(sizeof(funky_dir));
+	if (!dir)
+		MKFS_FATAL_STR("could not allocate memory for directory");
+	memset(dir, 0, sizeof(funky_dir));
+	return dir;
+}
+	
+void add_entry_to_directory(funky_dir *dir, char *name, __u64 ino, __u8 type)
+{
+	struct ocfs2_dir_entry *de, *de1;
+	int new_rec_len;
+	void *newbuf, *p;
+	int newsize, reclen, reallen;
+	
+	new_rec_len = OCFS2_DIR_REC_LEN(strlen(name));
+
+	if (dir->buf) {
+		de = (struct ocfs2_dir_entry *)(dir->buf + dir->last_off);
+		reclen = le16_to_cpu(de->rec_len);
+		reallen = OCFS2_DIR_REC_LEN(de->name_len);
+
+		/* find an area with large enough reclen */
+		if ((le64_to_cpu(de->inode) == 0 && reclen >= new_rec_len) ||
+		    (reclen >= reallen + new_rec_len)) {
+			if (le64_to_cpu(de->inode)) {
+				// move ahead just past the last entry
+				de1 = (struct ocfs2_dir_entry *) ((char *) de + reallen);
+				// set the next entry's rec_len to the rest of the block
+				de1->rec_len = cpu_to_le16(le16_to_cpu(de->rec_len) - reallen);
+				// shorten the last entry
+				de->rec_len = cpu_to_le16(reallen);  
+				de = de1;
+			}
+			goto got_it;
+		}
+		/* no space, add more */
+		newsize = dir->record->file_size + blocksize; // add one block
+	} else
+		newsize = blocksize;  // add one block
+	
+	newbuf = memalign(blocksize, newsize);
+	if (newbuf == NULL) 
+		MKFS_FATAL_STR("failed to grow directory");
+
+	if (dir->buf) {
+		memcpy(newbuf, dir->buf, dir->record->file_size);
+		free(dir->buf);
+		p = newbuf + dir->record->file_size;
+		memset(p, 0, blocksize);
+	} else {
+		p = newbuf;
+		memset(newbuf, 0, newsize);
+	}
+
+	dir->buf = newbuf;
+	dir->record->file_size = newsize;
+
+	de = (struct ocfs2_dir_entry *)p;
+	de->inode = 0;
+	de->rec_len = cpu_to_le16(blocksize);
+
+got_it:
+	de->name_len = strlen(name);
+	de->inode = cpu_to_le64(ino);
+	de->file_type = type;
+	strcpy(de->name, name);
+	dir->last_off = ((char *)de - (char *)dir->buf);
+	if (type == OCFS2_FT_DIR)
+		dir->record->links++;
+}
+
+
+
+#define SYSTEM_FILE_NAME_MAX   40
+
+static inline __u32 blocks_needed(void)
+{
+	__u32 num;
+	
+	/* 
+	 * leading space ???
+	 * superblock
+	 * global bm fe
+	 * system bm fe
+	 * system alloc fe
+	 * root inode fe
+	 * system inode fe
+	 * autoconf fe
+	 * publish fe
+	 * vote fe
+	 * autoconf sectors
+	 * publish sectors
+	 * vote sectors
+	 * (extent_alloc, extent_alloc_bitmap, inode_alloc, 
+	 *    inode_alloc_bitmap, journal) x initial_nodes
+	 * slop ;-)
+         */
+	num = LEADING_SPACE_BLOCKS;
+	num += SUPERBLOCK_BLOCKS;
+	num += FILE_ENTRY_BLOCKS;
+	num += AUTOCONF_BLOCKS(initial_nodes, 32);
+	num += PUBLISH_BLOCKS(initial_nodes, 32);
+	num += VOTE_BLOCKS(initial_nodes, 32);
+       	num += (initial_nodes * NUM_LOCAL_SYSTEM_FILES);
+	num += SLOP_BLOCKS;
+	return num;
+}
+
+static inline __u32 system_dir_blocks_needed(void)
+{
+	int bytes_needed = 0;
+	int each = OCFS2_DIR_REC_LEN(SYSTEM_FILE_NAME_MAX);
+	int entries_per_block = blocksize / each;
+	
+	/* blocks_needed() is way more than the number of filenames... */
+	bytes_needed = (blocks_needed() + entries_per_block - 1 / entries_per_block) << blocksize_bits;
+	return (bytes_needed + cluster_size - 1) >> cluster_size_bits;
+}
+
+void adjust_volume_size()
+{
+	__u32 max;
+	__u64 vsize = volume_size_in_bytes - 
+		(MIN_RESERVED_TAIL_BLOCKS << blocksize_bits);
+
+	max = MAX(pagesize_bits, blocksize_bits);
+	max = MAX(max, cluster_size_bits);
+	vsize >>= max;
+	vsize <<= max;
+	volume_size_in_blocks = vsize >> blocksize_bits;
+	volume_size_in_clusters = vsize >> cluster_size_bits;
+	reserved_tail_size = volume_size_in_bytes - vsize;
+	volume_size_in_bytes = vsize;
+}
+
+static inline __u32 mmap_len(void);
+
+/* total guess */
+static inline size_t mmap_len(void)
+{
+	size_t ret;
+
+	ret = initial_nodes * OCFS2_DEFAULT_JOURNAL_SIZE;
+	ret += (10 * ONE_MEGA_BYTE);
+	return ret;
+}
+
+void map_device()
+{
+	mapping = mmap(NULL, mmap_len(), PROT_READ | PROT_WRITE, MAP_NORESERVE | MAP_SHARED, fd, 0);
+	if (mapping==MAP_FAILED)
+		MKFS_FATAL("could not mmap the device: %s", strerror(errno));
+}
+void sync_device()
+{
+	if (msync(mapping, mmap_len(), MS_SYNC))
+		MKFS_FATAL_STR("could not sync the device");
+}
+
+void unmap_device()
+{
+	if (munmap(mapping, mmap_len()))
+		MKFS_FATAL_STR("could not munmap the device");
+}
+
+
+void init_format_time()
+{
+	format_time = time(NULL);
+}
+
+
+
+void format_superblock(system_file_disk_record *rec, system_file_disk_record *root_rec, system_file_disk_record *sys_rec)
+{
+	ocfs2_dinode *di;
+	__u64 super_off = rec->fe_off;
+
+	di = mapping + super_off;
+	memset(di, 0, blocksize);
+
+	/* many of these fields will be unused for now, but at least
+	 * let's init them to some sane values */
+
+	strcpy (di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE);
+	di->i_suballoc_node = cpu_to_le16((__u16)-1);
+	di->i_suballoc_blkno = cpu_to_le64(super_off >> blocksize_bits);
+
+	di->i_atime = 0; // unused
+	di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
+	di->i_mtime = cpu_to_le64(format_time); // use this as s_mtime (mount time)
+	di->i_blkno = cpu_to_le64(super_off >> blocksize_bits);
+	di->i_flags = cpu_to_le32(OCFS2_VALID_FL | OCFS2_SYSTEM_FL | OCFS2_SUPER_BLOCK_FL);
+	di->id2.i_super.s_major_rev_level = cpu_to_le16(OCFS2_MAJOR_REV_LEVEL);
+	di->id2.i_super.s_minor_rev_level = cpu_to_le16(OCFS2_MINOR_REV_LEVEL);
+	di->id2.i_super.s_root_blkno = cpu_to_le64(root_rec->fe_off >> blocksize_bits);
+	di->id2.i_super.s_system_dir_blkno = cpu_to_le64(sys_rec->fe_off >> blocksize_bits);
+	di->id2.i_super.s_mnt_count = 0;
+	di->id2.i_super.s_max_mnt_count = cpu_to_le16(OCFS2_DFL_MAX_MNT_COUNT);
+	di->id2.i_super.s_state = 0;
+	di->id2.i_super.s_errors = 0;
+	di->id2.i_super.s_lastcheck = cpu_to_le64(format_time);
+	di->id2.i_super.s_checkinterval = cpu_to_le32(OCFS2_DFL_CHECKINTERVAL);
+	di->id2.i_super.s_creator_os = cpu_to_le32(OCFS2_OS_LINUX);
+	di->id2.i_super.s_blocksize_bits = cpu_to_le32(blocksize_bits);
+	di->id2.i_super.s_clustersize_bits = cpu_to_le32(cluster_size_bits);
+	di->id2.i_super.s_max_nodes = cpu_to_le32(initial_nodes);
+	if (strlen(vol_label) > 63)
+		MKFS_FATAL_STR("volume label > 63 bytes long");
+	strcpy(di->id2.i_super.s_label, vol_label);
+	memcpy(di->id2.i_super.s_uuid, uuid, 16);
+}
+
+
+void format_file(system_file_disk_record *rec)
+{
+	ocfs2_dinode *di;
+	int mode = default_mode | (rec->dir ? S_IFDIR : S_IFREG);
+	__u32 clusters;
+	
+	//rec->flags |= (OCFS2_VALID_FL | OCFS2_SYSTEM_FL);
+
+	clusters = (rec->extent_len + cluster_size - 1) >> cluster_size_bits;
+
+	di = mapping + rec->fe_off;
+	memset(di, 0, blocksize);
+	strcpy (di->i_signature, OCFS2_FILE_ENTRY_SIGNATURE);
+	di->i_generation = 0;
+	di->i_suballoc_node = cpu_to_le16(-1);
+	di->i_suballoc_blkno = cpu_to_le64(rec->fe_off >> blocksize_bits);
+	di->i_blkno = cpu_to_le64(rec->fe_off >> blocksize_bits);
+	di->i_uid = cpu_to_le32(default_uid);
+	di->i_gid = cpu_to_le32(default_gid);
+	di->i_size = cpu_to_le64(rec->file_size);
+	di->i_mode = cpu_to_le16(mode);
+	di->i_links_count = cpu_to_le16(rec->links);
+	di->i_flags = cpu_to_le32(rec->flags);
+	di->i_atime = di->i_ctime = di->i_mtime = cpu_to_le64(format_time);
+	di->i_dtime = 0;
+	di->i_clusters = cpu_to_le32(clusters);
+	di->id2.i_list.l_next_free_rec = cpu_to_le16(0);
+	di->id2.i_list.l_tree_depth = cpu_to_le16(-1);
+	if (rec->extent_len) {
+		di->id2.i_list.l_next_free_rec = cpu_to_le16(1);
+		di->id2.i_list.l_recs[0].e_cpos = 0;
+		di->id2.i_list.l_recs[0].e_clusters = cpu_to_le32(clusters);
+		di->id2.i_list.l_recs[0].e_blkno = cpu_to_le64(rec->extent_off >> blocksize_bits);
+	}
+	if (rec->flags & OCFS2_BITMAP_FL) {
+		di->id1.bitmap1.i_used = cpu_to_le32(rec->bi.used_bits);
+		di->id1.bitmap1.i_total = cpu_to_le32(rec->bi.total_bits);
+	}
+}
+		
+
+void write_bitmap_data(alloc_bm *bm)
+{
+	system_file_disk_record *rec = bm->bm_record;
+	memset(mapping + rec->extent_off, 0, rec->extent_len);
+	memcpy(mapping + rec->extent_off, bm->buf, rec->file_size);
+}
+
+void write_directory_data(funky_dir *dir)
+{
+	system_file_disk_record *rec = dir->record;
+	memset(mapping + rec->extent_off, 0, rec->extent_len);
+	memcpy(mapping + rec->extent_off, dir->buf, rec->file_size);
+}
+
+void format_leading_space(__u64 start)
+{
+	int num_blocks = 2;  // 2 blocks were allocated
+	ocfs1_vol_disk_hdr *hdr;
+	ocfs1_vol_label *lbl;
+	char *p;
+	
+	p = mapping + start;
+	memset(p, 2, num_blocks << blocksize_bits);
+	
+	hdr = (ocfs1_vol_disk_hdr *)p;
+	strcpy(hdr->signature, "this is an ocfs2 volume");
+	strcpy(hdr->mount_point, "this is an ocfs2 volume");
+
+	p += 512;
+	lbl = (ocfs1_vol_label *)p;
+	strcpy(lbl->label, "this is an ocfs2 volume");
+	strcpy(lbl->cluster_name, "this is an ocfs2 volume");
+}
+
+void replacement_journal_create(__u64 journal_off)
+{
+	journal_superblock_t *sb;
+	char *p;
+
+	p = mapping + journal_off;
+	/* zero out all 8mb and stamp this little sb header on it */
+	sb = (journal_superblock_t *) p;
+	memset(sb, 0, OCFS2_DEFAULT_JOURNAL_SIZE);
+
+	sb->s_header.h_magic	 = htonl(JFS_MAGIC_NUMBER);
+	sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
+	sb->s_blocksize	= htonl(blocksize);
+	sb->s_maxlen	= htonl(OCFS2_DEFAULT_JOURNAL_SIZE >> blocksize_bits);
+	sb->s_first	= htonl(1);
+	sb->s_start     = htonl(1);
+	sb->s_sequence  = htonl(1);
+	sb->s_errno     = htonl(0);
+}
+
+void init_device(void)
+{
+	fd = open(dev_name, O_RDWR);
+	if (fd == -1)
+		MKFS_FATAL("could not open device %d for read/write", dev_name);
+}
+
+
+#define ONE_GB_SHIFT    30
+
+int initial_nodes_for_volume(__u64 size);
+
+/* this is just silly guesswork if the user does not
+ * provide a number for initial_nodes */
+int initial_nodes_for_volume(__u64 size)
+{
+	int shift = ONE_GB_SHIFT;
+	int ret, i;
+
+	/*
+	 * <1gb    ->  2 nodes
+	 * <8gb    ->  4 nodes
+	 * <64gb   ->  8 nodes
+	 * <512gb  -> 16 nodes
+	 * 512+gb  -> 32 nodes
+	 */
+ 	
+	for (i=0, shift = ONE_GB_SHIFT; i<4; i++, shift += 3) {
+		size >>= shift;
+		if (!size)
+			break;
+	}
+	switch (i)
+	{
+		case 0:
+			ret = 2;
+			break;
+		case 1:
+			ret = 4;
+			break;
+		case 2:
+			ret = 8;
+			break;
+		case 3:
+			ret = 16;
+			break;
+		default:
+			ret = 32;
+			break;
+	}
+      	return ret;
+}
+
+void init_globals(void)
+{
+	size_t pagesize;
+	int i;
+	__u32 tmp;
+	unsigned long long tmp2;
+	
+	pagesize = getpagesize();
+	pagesize_bits = 0;
+	for (i=32; i>=0; i--) {
+		if ((1 << i) == pagesize)
+			pagesize_bits = i;
+	}
+	if (!pagesize_bits)
+		MKFS_FATAL("could not get pagesize_bits for pagesize %d", pagesize);
+
+	if (blocksize) {
+		printf("blocksize was set manually: %lu\n", blocksize);
+	} else {
+		if (ioctl(fd, BLKSSZGET, &tmp) == -1)
+			MKFS_FATAL_STR("could not get sector size for device");
+		blocksize = tmp;
+	}
+	blocksize_bits = 0;
+	for (i=32; i>=0; i--) {
+		if ((1 << i) == blocksize)
+			blocksize_bits = i;
+	}
+	if (!blocksize_bits)
+		MKFS_FATAL("could not get blocksize_bits for blocksize %lu", blocksize);
+
+	cluster_size_bits = 0;
+	for (i=32; i>=0; i--) {
+		if ((1 << i) == cluster_size)
+			cluster_size_bits = i;
+	}
+	if (!cluster_size_bits)
+		MKFS_FATAL("could not get cluster_size_bits for cluster_size %lu", cluster_size);
+
+	/* these will be readjusted later */
+	tmp2 = lseek64(fd, 0, SEEK_END);
+	if (volume_size_in_bytes) {
+		printf("volume size was set manually: %llu, real size: %llu\n", 
+		       volume_size_in_bytes, tmp2);
+	} else {
+		volume_size_in_bytes = tmp2;
+	}
+	volume_size_in_clusters = volume_size_in_bytes >> cluster_size_bits;
+	volume_size_in_blocks = (volume_size_in_clusters << cluster_size_bits) >> blocksize_bits;
+	reserved_tail_size = 0;
+
+	if (initial_nodes) {
+		if (initial_nodes < 2 || initial_nodes > OCFS2_MAX_NODES)
+			MKFS_FATAL("initial_nodes given (%lu) out of range", initial_nodes);
+		printf("initial_nodes was set manually: %lu\n", initial_nodes);
+	} else {
+		initial_nodes = initial_nodes_for_volume(volume_size_in_bytes);
+		printf("using %lu for initial_nodes\n", initial_nodes);
+	}
+}
+
+void generate_uuid(void)
+{
+	int randfd = 0;
+	int readlen = 0;
+	int len = 0;
+
+	if ((randfd = open("/dev/urandom", O_RDONLY)) == -1)
+		MKFS_FATAL("error opening /dev/urandom: %s", strerror(errno));
+
+	uuid = malloc(MAX_VOL_ID_LENGTH);
+	if (!uuid)
+		MKFS_FATAL_STR("could not allocate memory");
+
+	while (readlen < MAX_VOL_ID_LENGTH)
+	{
+		if ((len = read(randfd, uuid + readlen, MAX_VOL_ID_LENGTH - readlen)) == -1)
+			MKFS_FATAL("error reading from /dev/urandom: %s", strerror(errno));
+		readlen += len;
+	}
+	
+	close(randfd);
+}
+
+
+void usage(void)
+{
+	// "b:c:v:C:n:g:u:m:d:l:U:"
+	fprintf(stderr, "usage: mkfs2 [--blocksize=bytes] [--mode=##] [--uuid=id]\n");
+	fprintf(stderr, "             [--volumesize=bytes] [--compatflags=##]\n");
+	fprintf(stderr, "             [--nodes=##] [--gid=##] [--uid=##]\n");
+	fprintf(stderr, "             --clustersize=bytes --device=/dev/name\n");
+	fprintf(stderr, "             --label=\"volume label\"\n");
+	fprintf(stderr, "\n");
+	exit(1);
+}
+
+void process_args(int argc, char **argv)
+{
+	int c;
+
+	while (1) {
+		static struct option long_options[] = {
+			{"blocksize", 1, 0, 'b'},
+			{"clustersize", 1, 0, 'c'},
+			{"volumesize", 1, 0, 'v'},
+			{"compatflags", 0, 0, 'C'},
+			{"nodes", 1, 0, 'n'},
+			{"gid", 1, 0, 'g'},
+			{"uid", 1, 0, 'u'},
+			{"mode", 1, 0, 'm'},
+			{"device", 1, 0, 'd'},
+			{"label", 1, 0, 'l'},
+			{"uuid", 1, 0, 'U'},
+			{0, 0, 0, 0}
+		};
+		c = getopt_long (argc, argv, "b:c:v:c:n:g:u:m:d:l:U:", long_options, NULL);
+		if (c == -1)
+			break;
+
+		switch (c) {
+			case 'b':
+				blocksize = strtoul(optarg, NULL, 10);
+				break;
+			case 'c':
+				cluster_size = strtoul(optarg, NULL, 10);
+				break;
+			case 'v':
+				volume_size_in_bytes = strtoull(optarg, NULL, 10);
+				break;
+			case 'C':
+				compat_flags = strtoul(optarg, NULL, 10);
+				break;
+			case 'n':
+				initial_nodes = strtoul(optarg, NULL, 10);
+				break;
+			case 'g':
+				default_gid = strtoul(optarg, NULL, 10);
+				break;
+			case 'u':
+				default_uid = strtoul(optarg, NULL, 10);
+				break;
+			case 'm':
+				default_mode = strtoul(optarg, NULL, 0);
+				break;
+			case 'd':
+				dev_name = strdup(optarg);
+				break;
+			case 'l':
+				vol_label = strdup(optarg);
+				break;
+			case 'U':
+				uuid = strdup(optarg);
+				break;
+			case '?':
+			default:
+				usage();
+				break;
+		}
+	}
+
+	if (optind < argc) {
+		if (dev_name)
+			free(dev_name);
+		dev_name = strdup(argv[optind]);
+	}
+	if (!vol_label) {
+		MKFS_WARN_STR("you must give a volume label");
+		usage();
+	}
+	if (!dev_name) {
+		MKFS_WARN_STR("you must give a volume label");
+		usage();
+	}
+	if (!cluster_size) {
+		MKFS_WARN_STR("you must give a cluster size");
+		usage();
+	}
+}
+
+void adjust_autoconfig_publish_vote(system_file_disk_record *autoconfig_rec,
+					system_file_disk_record *publish_rec,
+					system_file_disk_record *vote_rec)
+{
+	/* whole block was allocated to autoconfig, now divvy it up */
+	__u64 apv_data = autoconfig_rec->extent_off;
+	__u64 apv_data_len = autoconfig_rec->extent_len;
+	__u64 vblocks, ablocks = AUTOCONF_BLOCKS(initial_nodes, 1), 
+		pblocks = PUBLISH_BLOCKS(initial_nodes, 1);
+
+	/* autoconf and publish get just enough, vote gets all the rest. */
+	/* this way we can easily tune up to 32 nodes without having to  */
+	/* move these, and still keep them contiguous all the time.      */
+	vblocks = ((apv_data_len >> blocksize_bits) - ablocks - pblocks);
+
+	autoconfig_rec->extent_off = apv_data;
+	autoconfig_rec->file_size = 
+		autoconfig_rec->extent_len = ablocks << blocksize_bits;
+
+	publish_rec->extent_off = autoconfig_rec->extent_off + autoconfig_rec->extent_len;
+	publish_rec->file_size =
+		publish_rec->extent_len = pblocks << blocksize_bits;
+
+	vote_rec->extent_off = publish_rec->extent_off + publish_rec->extent_len;
+	vote_rec->file_size = 
+		vote_rec->extent_len = vblocks << blocksize_bits;
+}
+
+void write_autoconfig_header(system_file_disk_record *rec)
+{
+	ocfs_node_config_hdr *hdr;
+
+	hdr = (mapping + rec->extent_off);
+	memset(hdr, 0, blocksize);
+	strcpy(hdr->signature, OCFS2_NODE_CONFIG_HDR_SIGN);
+	hdr->version = OCFS2_NODE_CONFIG_VER;
+	hdr->num_nodes = 0;
+	hdr->disk_lock.dl_master = -1;
+	hdr->last_node = 0;
+}
+void init_record(system_file_disk_record *rec, int type, int dir)
+{
+	memset(rec, 0, sizeof(system_file_disk_record));
+	rec->flags = OCFS2_VALID_FL | OCFS2_SYSTEM_FL;
+	rec->dir = dir;
+	if (dir)
+		rec->links = 0;
+	else
+		rec->links = 1;
+	rec->bi.used_bits = rec->bi.total_bits = 0;
+	rec->flags = (OCFS2_VALID_FL | OCFS2_SYSTEM_FL);
+
+	switch (type) {
+		case sfi_journal:
+			//rec->flags |= OCFS2_JOURNAL_FL;
+			break;
+		case sfi_bitmap:
+			rec->flags |= OCFS2_BITMAP_FL;
+			break;
+		case sfi_alloc:
+			//rec->flags |= OCFS2_ALLOC_FL;
+			break;
+		case sfi_local_alloc:
+			rec->flags |= OCFS2_LOCAL_ALLOC_FL;
+			break;
+		case sfi_other:
+			break;
+	}
+}
+
+
+int main(int argc, char **argv)
+{
+	__u64 allocated;
+	__u32 need;
+	char fname[SYSTEM_FILE_NAME_MAX];
+	int i, j, num;
+	__u64 leading_space;
+	funky_dir *orphan_dir;
+	funky_dir *root_dir;
+	funky_dir *system_dir;
+	system_file_disk_record *tmprec, *tmprec2, *tmprec3;
+
+	progname = strdup(argv[0]);
+	process_args(argc, argv);
+	init_format_time();
+	init_device();
+	init_globals();
+	adjust_volume_size();
+	map_device();
+	generate_uuid();
+
+	/*
+	 * ALLOCATE STUFF
+	 */
+	// dummy record representing the whole volume
+	init_record(&global_alloc_rec, sfi_alloc, 0);
+	global_alloc_rec.extent_off = 0;
+	global_alloc_rec.extent_len = volume_size_in_bytes;
+
+	init_record(&superblock_rec, sfi_other, 0);
+	init_record(&root_dir_rec, sfi_other, 1);
+	init_record(&system_dir_rec, sfi_other, 1);
+
+	for (i=0; i<NUM_SYSTEM_INODES; i++) {
+		num = (system_files[i].global ? 1 : initial_nodes);
+		record[i] = malloc(sizeof(system_file_disk_record) * num);
+		if (record[i] == NULL)
+			MKFS_FATAL_STR("could not allocate memory for system file disk records");
+		for (j=0; j < num; j++)
+			init_record(&record[i][j], system_files[i].type, system_files[i].dir);
+	}
+
+	root_dir = alloc_directory();
+	system_dir = alloc_directory();
+	orphan_dir = alloc_directory();
+
+	/*
+	 * INITIALIZE BITMAPS
+	 */
+	
+	/* create an alloc_bm for the global bitmap and align bytes up to next whole cluster. 
+	   extent_off is not yet known, since it must be allocated from itself.  */
+	need = (volume_size_in_clusters+7) >> 3;  
+	need = ((need + cluster_size - 1) >> cluster_size_bits) << cluster_size_bits;
+	tmprec = &(record[GLOBAL_BITMAP_SYSTEM_INODE][0]);
+	tmprec->extent_off = 0; // need to fill this in later
+	tmprec->extent_len = need;
+
+	global_bm = initialize_bitmap (volume_size_in_clusters, cluster_size_bits,
+				       "global bitmap", tmprec, &global_alloc_rec);
+
+	/* assign some space from global_bm to system_bm for data and bitmap blocks */
+	tmprec = &(record[GLOBAL_INODE_ALLOC_SYSTEM_INODE][0]);
+	tmprec2 = &(record[GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE][0]);
+	need = blocks_needed(); 
+	alloc_bytes_from_bitmap (need << blocksize_bits, global_bm, 
+				 &(tmprec->extent_off), &(tmprec->extent_len));
+
+	need = ((((need+7) >> 3) + cluster_size - 1) >> cluster_size_bits) << cluster_size_bits;
+	alloc_bytes_from_bitmap (need, global_bm, &(tmprec2->extent_off), 
+				 &(tmprec2->extent_len)); 
+	
+	/* create an alloc_bm for the system inode bitmap */
+	system_bm = initialize_bitmap(tmprec->extent_len >> blocksize_bits, blocksize_bits, 
+				      "system inode bitmap", tmprec2, tmprec);
+
+
+	/*
+	 * ALLOCATE INODES AND DIRECTORIES
+	 */
+	
+	/* leading space */
+	leading_space = alloc_inode(LEADING_SPACE_BLOCKS);
+	if (leading_space != 0ULL)
+		MKFS_FATAL("leading space blocks start at byte %llu, must start at 0\n", leading_space);
+
+
+	/* superblock */
+	superblock_rec.fe_off = alloc_inode(SUPERBLOCK_BLOCKS);
+	if (superblock_rec.fe_off != MAGIC_SUPERBLOCK_BLOCK_NUMBER << blocksize_bits)
+		MKFS_FATAL("superblock starts at byte %llu, must start at %llu\n", 
+			   superblock_rec.fe_off, MAGIC_SUPERBLOCK_BLOCK_NUMBER << blocksize_bits);
+
+
+	/* root directory */	
+	alloc_from_bitmap (1, global_bm, &root_dir_rec.extent_off, &root_dir_rec.extent_len);
+	root_dir_rec.fe_off = alloc_inode(1);
+	root_dir->record = &root_dir_rec;
+	add_entry_to_directory(root_dir, ".", root_dir_rec.extent_off, OCFS2_FT_DIR);
+	add_entry_to_directory(root_dir, "..", root_dir_rec.extent_off, OCFS2_FT_DIR);
+
+
+	/* system directory */	
+	need = system_dir_blocks_needed();
+	alloc_from_bitmap (need, global_bm, &system_dir_rec.extent_off, &system_dir_rec.extent_len);
+	system_dir_rec.fe_off = alloc_inode(1);
+	system_dir->record = &system_dir_rec;
+	add_entry_to_directory(system_dir, ".", system_dir_rec.extent_off, OCFS2_FT_DIR);
+	add_entry_to_directory(system_dir, "..", system_dir_rec.extent_off, OCFS2_FT_DIR);
+	/* alloc and add all local system file inodes to system directory */
+	for (i=0; i<NUM_SYSTEM_INODES; i++) {
+		num = (system_files[i].global) ? 1 : initial_nodes;
+		for (j=0; j < num; j++) {
+			record[i][j].fe_off = alloc_inode(1);
+			sprintf(fname, system_files[i].name, j);
+			add_entry_to_directory(system_dir, fname, record[i][j].fe_off, 
+				       system_files[i].dir ?  OCFS2_FT_DIR : OCFS2_FT_REG_FILE);
+		}
+	}
+
+	/* autoconfig, publish, vote data */
+	/* XXX: ok this is messy ;-) */
+	/* give everything to autoconfig, then adjust it */
+	tmprec = &(record[AUTOCONFIG_SYSTEM_INODE][0]);
+	tmprec2 = &(record[PUBLISH_SYSTEM_INODE][0]);
+	tmprec3 = &(record[VOTE_SYSTEM_INODE][0]);
+	need = (AUTOCONF_BLOCKS(initial_nodes, 32) +
+		PUBLISH_BLOCKS(initial_nodes, 32) + 
+		VOTE_BLOCKS(initial_nodes, 32));
+	tmprec->extent_off = alloc_inode(need);
+	tmprec->extent_len = need << blocksize_bits;
+	adjust_autoconfig_publish_vote(tmprec, tmprec2, tmprec3);
+
+
+	/* orphan dir */
+	tmprec = &record[ORPHAN_DIR_SYSTEM_INODE][0];
+	orphan_dir->record = tmprec;
+	alloc_from_bitmap (1, global_bm, &tmprec->extent_off, &tmprec->extent_len);
+	add_entry_to_directory(orphan_dir, ".", tmprec->extent_off, OCFS2_FT_DIR);
+	add_entry_to_directory(orphan_dir, "..", tmprec->extent_off, OCFS2_FT_DIR);
+
+
+	/* finally, allocate (extent_off) the space for the global bitmap from itself */	
+	tmprec = global_bm->bm_record;
+	alloc_bytes_from_bitmap (tmprec->extent_len, global_bm, 
+				 &(tmprec->extent_off), &allocated);
+			
+
+	/* 
+	 * FORMAT BLOCKS
+	 */
+	format_leading_space(leading_space);
+	format_superblock(&superblock_rec, &root_dir_rec, &system_dir_rec);
+
+	format_file(&root_dir_rec);
+	format_file(&system_dir_rec);
+	
+	for (i=0; i<NUM_SYSTEM_INODES; i++) {
+		num = (system_files[i].global ? 1 : initial_nodes);
+		for (j=0; j<num; j++) {
+			tmprec = &(record[i][j]);
+			if (system_files[i].type == sfi_journal) {
+				alloc_bytes_from_bitmap(OCFS2_DEFAULT_JOURNAL_SIZE, global_bm, 
+							&(tmprec->extent_off), &(tmprec->extent_len));
+				replacement_journal_create(tmprec->extent_off);
+				tmprec->file_size = tmprec->extent_len;
+			}
+			format_file(tmprec);
+		}
+	}
+	
+	/*
+	 * WRITE BITMAPS
+	 */
+	write_bitmap_data(global_bm);
+	write_bitmap_data(system_bm);
+
+	/*
+	 * WRITE DIRECTORIES
+	 */
+	write_directory_data(root_dir);
+	write_directory_data(system_dir);
+	write_directory_data(orphan_dir);
+
+	write_autoconfig_header(&record[AUTOCONFIG_SYSTEM_INODE][0]);
+	/*
+	 * SYNC TO DISK
+	 */
+	sync_device();
+	unmap_device();
+	close(fd);
+
+	return 0;
+}
+
+
+void version(char *progname)
+{
+	printf("%s %s %s (build %s)\n", progname,
+					OCFS2_BUILD_VERSION,
+					OCFS2_BUILD_DATE,
+					OCFS2_BUILD_MD5);
+	return;
+}				/* version */
+
+