[Ocfs-tools-commits]
khackel commits r59 - in trunk/ocfs2: . format format/inc
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Sun Jun 13 20:33:53 CDT 2004
Author: khackel
Date: 2004-06-13 19:33:51 -0500 (Sun, 13 Jun 2004)
New Revision: 59
Added:
trunk/ocfs2/Makefile
trunk/ocfs2/format/
trunk/ocfs2/format/Makefile
trunk/ocfs2/format/inc/
trunk/ocfs2/format/inc/jfs_compat.h
trunk/ocfs2/format/inc/kernel-jbd.h
trunk/ocfs2/format/inc/kernel-list.h
trunk/ocfs2/format/inc/ocfs1_fs_compat.h
trunk/ocfs2/format/inc/ocfs2_fs.h
trunk/ocfs2/format/mkfs2.c
Log:
well someone will hate all this, but i had to put something in :)
Added: trunk/ocfs2/Makefile
===================================================================
--- trunk/ocfs2/Makefile 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/Makefile 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,7 @@
+TOPDIR = ..
+
+include $(TOPDIR)/Preamble.make
+
+SUBDIRS = libocfs debugocfs format fsck bugfix
+
+include $(TOPDIR)/Postamble.make
Added: trunk/ocfs2/format/Makefile
===================================================================
--- trunk/ocfs2/format/Makefile 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/Makefile 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,67 @@
+TOPDIR = ../..
+
+include $(TOPDIR)/Preamble.make
+
+WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
+ -Wmissing-declarations
+
+ifdef OCFS_DEBUG
+OPTS = -g
+endif
+
+CFLAGS = $(OPTS) -fno-strict-aliasing $(WARNINGS)
+
+SBIN_PROGRAMS = mkfs.ocfs2
+
+INCLUDES = -Iinc
+DEFINES = -DLINUX -DUSERSPACE_TOOL -DFORMAT_OCFS
+
+OPTIMIZE = -O2
+
+ifeq ($(OCFS_PROCESSOR),x86_64)
+ CFLAGS += -m64
+endif
+ifeq ($(OCFS_PROCESSOR),ia64)
+endif
+ifeq ($(OCFS_PROCESSOR),i686)
+ DEFINES += -D__ILP32__
+endif
+
+CFLAGS += $(OPTIMIZE)
+
+VERSION_FILES = mkfs2.c inc/jfs_compat.h inc/kernel-jbd.h inc/kernel-list.h
+VERSION_SRC = mkfs2.c
+VERSION_PREFIX = OCFS2
+
+DIST_RULES = dist-incdir
+
+#MANS = mkfs.ocfs2.8
+
+INSTALL_RULES = install-sbin-links install-man-links
+
+##########################
+# WARNING!!!
+# are we still going to do argv[0] tricks?
+##########################
+
+install-sbin-links: install-sbin-programs
+ cd $(DESTDIR)$(sbindir) \
+ && rm -f mkfs.ocfs2 resizeocfs \
+ && $(LN_S) mkfs.ocfs mkfs.ocfs2 \
+ && $(LN_S) tuneocfs resizeocfs
+
+#install-man-links: install-mans
+# cd $(DESTDIR)$(mandir)/man8 \
+# && rm -f mkfs.ocfs.8 resizeocfs.8 \
+# && $(LN_S) mkfs.ocfs.8 mkfs.ocfs2.8 \
+# && $(LN_S) tuneocfs.8 resizeocfs.8
+
+DIST_FILES = $(VERSION_FILES) $(VERSION_SRC) #mkfs.ocfs2.8.in
+
+mkfs.ocfs2: mkfs2.o
+ $(LINK)
+
+dist-incdir:
+ $(TOPDIR)/mkinstalldirs $(DIST_DIR)/inc
+
+include $(TOPDIR)/Postamble.make
Added: trunk/ocfs2/format/inc/jfs_compat.h
===================================================================
--- trunk/ocfs2/format/inc/jfs_compat.h 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/jfs_compat.h 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,69 @@
+
+#ifndef _JFS_COMPAT_H
+#define _JFS_COMPAT_H
+
+#include "kernel-list.h"
+#include <errno.h>
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif
+
+// libocfs.h has these
+//#define printk printf
+//#define KERN_ERR ""
+#define KERN_DEBUG ""
+#define KERN_EMERG ""
+
+#define READ 0
+#define WRITE 1
+
+#define cpu_to_be32(n) htonl(n)
+#define be32_to_cpu(n) ntohl(n)
+
+typedef unsigned int tid_t;
+typedef struct journal_s journal_t;
+
+struct buffer_head;
+struct inode;
+
+struct journal_s
+{
+ unsigned long j_flags;
+ int j_errno;
+ struct buffer_head * j_sb_buffer;
+ struct journal_superblock_s *j_superblock;
+ int j_format_version;
+ unsigned long j_head;
+ unsigned long j_tail;
+ unsigned long j_free;
+ unsigned long j_first, j_last;
+ kdev_t j_dev;
+ kdev_t j_fs_dev;
+ int j_blocksize;
+ unsigned int j_blk_offset;
+ unsigned int j_maxlen;
+ struct inode * j_inode;
+ tid_t j_tail_sequence;
+ tid_t j_transaction_sequence;
+ __u8 j_uuid[16];
+ struct jbd_revoke_table_s *j_revoke;
+};
+
+#define J_ASSERT(assert) \
+ do { if (!(assert)) { \
+ printf ("Assertion failure in %s() at %s line %d: " \
+ "\"%s\"\n", \
+ __FUNCTION__, __FILE__, __LINE__, # assert); \
+ fatal_error(e2fsck_global_ctx, 0); \
+ } } while (0)
+
+#define is_journal_abort(x) 0
+
+#define BUFFER_TRACE(bh, info) do {} while (0)
+
+/* Need this so we can compile with configure --enable-gcc-wall */
+#ifdef NO_INLINE_FUNCS
+#define inline
+#endif
+
+#endif /* _JFS_COMPAT_H */
Added: trunk/ocfs2/format/inc/kernel-jbd.h
===================================================================
--- trunk/ocfs2/format/inc/kernel-jbd.h 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/kernel-jbd.h 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,910 @@
+/*
+ * linux/include/linux/jbd.h
+ *
+ * Written by Stephen C. Tweedie <sct at redhat.com>
+ *
+ * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Definitions for transaction data structures for the buffer cache
+ * filesystem journaling support.
+ */
+
+#ifndef _LINUX_JBD_H
+#define _LINUX_JBD_H
+
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || !defined(__KERNEL__)
+
+/* Allow this file to be included directly into e2fsprogs */
+#ifndef __KERNEL__
+#include "jfs_compat.h"
+#define JFS_DEBUG
+#define jfs_debug jbd_debug
+#else
+
+#include <linux/journal-head.h>
+#include <linux/stddef.h>
+#include <asm/semaphore.h>
+#endif
+
+#ifndef __GNUC__
+#define __FUNCTION__ ""
+#endif
+
+#define journal_oom_retry 1
+
+#ifdef __STDC__
+#ifdef CONFIG_JBD_DEBUG
+/*
+ * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
+ * consistency checks. By default we don't do this unless
+ * CONFIG_JBD_DEBUG is on.
+ */
+#define JBD_EXPENSIVE_CHECKING
+extern int journal_enable_debug;
+
+#define jbd_debug(n, f, a...) \
+ do { \
+ if ((n) <= journal_enable_debug) { \
+ printk (KERN_DEBUG "(%s, %d): %s: ", \
+ __FILE__, __LINE__, __FUNCTION__); \
+ printk (f, ## a); \
+ } \
+ } while (0)
+#else
+#ifdef __GNUC__
+#define jbd_debug(f, a...) /**/
+#else
+#define jbd_debug(f, ...) /**/
+#endif
+#endif
+#else
+#define jbd_debug(x) /* AIX doesn't do STDC */
+#endif
+
+extern void * __jbd_kmalloc (char *where, size_t size, int flags, int retry);
+#define jbd_kmalloc(size, flags) \
+ __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
+#define jbd_rep_kmalloc(size, flags) \
+ __jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
+
+#define JFS_MIN_JOURNAL_BLOCKS 1024
+
+#ifdef __KERNEL__
+typedef struct handle_s handle_t; /* Atomic operation type */
+typedef struct journal_s journal_t; /* Journal control structure */
+#endif
+
+/*
+ * Internal structures used by the logging mechanism:
+ */
+
+#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
+
+/*
+ * On-disk structures
+ */
+
+/*
+ * Descriptor block types:
+ */
+
+#define JFS_DESCRIPTOR_BLOCK 1
+#define JFS_COMMIT_BLOCK 2
+#define JFS_SUPERBLOCK_V1 3
+#define JFS_SUPERBLOCK_V2 4
+#define JFS_REVOKE_BLOCK 5
+
+/*
+ * Standard header for all descriptor blocks:
+ */
+typedef struct journal_header_s
+{
+ __u32 h_magic;
+ __u32 h_blocktype;
+ __u32 h_sequence;
+} journal_header_t;
+
+
+/*
+ * The block tag: used to describe a single buffer in the journal
+ */
+typedef struct journal_block_tag_s
+{
+ __u32 t_blocknr; /* The on-disk block number */
+ __u32 t_flags; /* See below */
+} journal_block_tag_t;
+
+/*
+ * The revoke descriptor: used on disk to describe a series of blocks to
+ * be revoked from the log
+ */
+typedef struct journal_revoke_header_s
+{
+ journal_header_t r_header;
+ int r_count; /* Count of bytes used in the block */
+} journal_revoke_header_t;
+
+
+/* Definitions for the journal tag flags word: */
+#define JFS_FLAG_ESCAPE 1 /* on-disk block is escaped */
+#define JFS_FLAG_SAME_UUID 2 /* block has same uuid as previous */
+#define JFS_FLAG_DELETED 4 /* block deleted by this transaction */
+#define JFS_FLAG_LAST_TAG 8 /* last tag in this descriptor block */
+
+
+/*
+ * The journal superblock. All fields are in big-endian byte order.
+ */
+typedef struct journal_superblock_s
+{
+/* 0x0000 */
+ journal_header_t s_header;
+
+/* 0x000C */
+ /* Static information describing the journal */
+ __u32 s_blocksize; /* journal device blocksize */
+ __u32 s_maxlen; /* total blocks in journal file */
+ __u32 s_first; /* first block of log information */
+
+/* 0x0018 */
+ /* Dynamic information describing the current state of the log */
+ __u32 s_sequence; /* first commit ID expected in log */
+ __u32 s_start; /* blocknr of start of log */
+
+/* 0x0020 */
+ /* Error value, as set by journal_abort(). */
+ __s32 s_errno;
+
+/* 0x0024 */
+ /* Remaining fields are only valid in a version-2 superblock */
+ __u32 s_feature_compat; /* compatible feature set */
+ __u32 s_feature_incompat; /* incompatible feature set */
+ __u32 s_feature_ro_compat; /* readonly-compatible feature set */
+/* 0x0030 */
+ __u8 s_uuid[16]; /* 128-bit uuid for journal */
+
+/* 0x0040 */
+ __u32 s_nr_users; /* Nr of filesystems sharing log */
+
+ __u32 s_dynsuper; /* Blocknr of dynamic superblock copy*/
+
+/* 0x0048 */
+ __u32 s_max_transaction; /* Limit of journal blocks per trans.*/
+ __u32 s_max_trans_data; /* Limit of data blocks per trans. */
+
+/* 0x0050 */
+ __u32 s_padding[44];
+
+/* 0x0100 */
+ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */
+/* 0x0400 */
+} journal_superblock_t;
+
+#define JFS_HAS_COMPAT_FEATURE(j,mask) \
+ ((j)->j_format_version >= 2 && \
+ ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
+#define JFS_HAS_RO_COMPAT_FEATURE(j,mask) \
+ ((j)->j_format_version >= 2 && \
+ ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
+#define JFS_HAS_INCOMPAT_FEATURE(j,mask) \
+ ((j)->j_format_version >= 2 && \
+ ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
+
+#define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001
+
+/* Features known to this kernel version: */
+#define JFS_KNOWN_COMPAT_FEATURES 0
+#define JFS_KNOWN_ROCOMPAT_FEATURES 0
+#define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+
+#define JBD_ASSERTIONS
+#ifdef JBD_ASSERTIONS
+#define J_ASSERT(assert) \
+do { \
+ if (!(assert)) { \
+ printk (KERN_EMERG \
+ "Assertion failure in %s() at %s:%d: \"%s\"\n", \
+ __FUNCTION__, __FILE__, __LINE__, # assert); \
+ BUG(); \
+ } \
+} while (0)
+
+#if defined(CONFIG_BUFFER_DEBUG)
+void buffer_assertion_failure(struct buffer_head *bh);
+#define J_ASSERT_BH(bh, expr) \
+ do { \
+ if (!(expr)) \
+ buffer_assertion_failure(bh); \
+ J_ASSERT(expr); \
+ } while (0)
+#define J_ASSERT_JH(jh, expr) J_ASSERT_BH(jh2bh(jh), expr)
+#else
+#define J_ASSERT_BH(bh, expr) J_ASSERT(expr)
+#define J_ASSERT_JH(jh, expr) J_ASSERT(expr)
+#endif
+
+#else
+#define J_ASSERT(assert)
+#endif /* JBD_ASSERTIONS */
+
+enum jbd_state_bits {
+ BH_JWrite
+ = BH_PrivateStart, /* 1 if being written to log (@@@ DEBUGGING) */
+ BH_Freed, /* 1 if buffer has been freed (truncated) */
+ BH_Revoked, /* 1 if buffer has been revoked from the log */
+ BH_RevokeValid, /* 1 if buffer revoked flag is valid */
+ BH_JBDDirty, /* 1 if buffer is dirty but journaled */
+};
+
+/* Return true if the buffer is one which JBD is managing */
+static inline int buffer_jbd(struct buffer_head *bh)
+{
+ return __buffer_state(bh, JBD);
+}
+
+static inline struct buffer_head *jh2bh(struct journal_head *jh)
+{
+ return jh->b_bh;
+}
+
+static inline struct journal_head *bh2jh(struct buffer_head *bh)
+{
+ return bh->b_private;
+}
+
+struct jbd_revoke_table_s;
+
+/* The handle_t type represents a single atomic update being performed
+ * by some process. All filesystem modifications made by the process go
+ * through this handle. Recursive operations (such as quota operations)
+ * are gathered into a single update.
+ *
+ * The buffer credits field is used to account for journaled buffers
+ * being modified by the running process. To ensure that there is
+ * enough log space for all outstanding operations, we need to limit the
+ * number of outstanding buffers possible at any time. When the
+ * operation completes, any buffer credits not used are credited back to
+ * the transaction, so that at all times we know how many buffers the
+ * outstanding updates on a transaction might possibly touch. */
+
+struct handle_s
+{
+ /* Which compound transaction is this update a part of? */
+ transaction_t * h_transaction;
+
+ /* Number of remaining buffers we are allowed to dirty: */
+ int h_buffer_credits;
+
+ /* Reference count on this handle */
+ int h_ref;
+
+ /* Field for caller's use to track errors through large fs
+ operations */
+ int h_err;
+
+ /* Flags */
+ unsigned int h_sync: 1; /* sync-on-close */
+ unsigned int h_jdata: 1; /* force data journaling */
+ unsigned int h_aborted: 1; /* fatal error on handle */
+};
+
+
+/* The transaction_t type is the guts of the journaling mechanism. It
+ * tracks a compound transaction through its various states:
+ *
+ * RUNNING: accepting new updates
+ * LOCKED: Updates still running but we don't accept new ones
+ * RUNDOWN: Updates are tidying up but have finished requesting
+ * new buffers to modify (state not used for now)
+ * FLUSH: All updates complete, but we are still writing to disk
+ * COMMIT: All data on disk, writing commit record
+ * FINISHED: We still have to keep the transaction for checkpointing.
+ *
+ * The transaction keeps track of all of the buffers modified by a
+ * running transaction, and all of the buffers committed but not yet
+ * flushed to home for finished transactions.
+ */
+
+struct transaction_s
+{
+ /* Pointer to the journal for this transaction. */
+ journal_t * t_journal;
+
+ /* Sequence number for this transaction */
+ tid_t t_tid;
+
+ /* Transaction's current state */
+ enum {
+ T_RUNNING,
+ T_LOCKED,
+ T_RUNDOWN,
+ T_FLUSH,
+ T_COMMIT,
+ T_FINISHED
+ } t_state;
+
+ /* Where in the log does this transaction's commit start? */
+ unsigned long t_log_start;
+
+ /* Doubly-linked circular list of all inodes owned by this
+ transaction */ /* AKPM: unused */
+ struct inode * t_ilist;
+
+ /* Number of buffers on the t_buffers list */
+ int t_nr_buffers;
+
+ /* Doubly-linked circular list of all buffers reserved but not
+ yet modified by this transaction */
+ struct journal_head * t_reserved_list;
+
+ /* Doubly-linked circular list of all metadata buffers owned by this
+ transaction */
+ struct journal_head * t_buffers;
+
+ /*
+ * Doubly-linked circular list of all data buffers still to be
+ * flushed before this transaction can be committed.
+ * Protected by journal_datalist_lock.
+ */
+ struct journal_head * t_sync_datalist;
+
+ /*
+ * Doubly-linked circular list of all writepage data buffers
+ * still to be written before this transaction can be committed.
+ * Protected by journal_datalist_lock.
+ */
+ struct journal_head * t_async_datalist;
+
+ /* Doubly-linked circular list of all forget buffers (superceded
+ buffers which we can un-checkpoint once this transaction
+ commits) */
+ struct journal_head * t_forget;
+
+ /*
+ * Doubly-linked circular list of all buffers still to be
+ * flushed before this transaction can be checkpointed.
+ */
+ /* Protected by journal_datalist_lock */
+ struct journal_head * t_checkpoint_list;
+
+ /* Doubly-linked circular list of temporary buffers currently
+ undergoing IO in the log */
+ struct journal_head * t_iobuf_list;
+
+ /* Doubly-linked circular list of metadata buffers being
+ shadowed by log IO. The IO buffers on the iobuf list and the
+ shadow buffers on this list match each other one for one at
+ all times. */
+ struct journal_head * t_shadow_list;
+
+ /* Doubly-linked circular list of control buffers being written
+ to the log. */
+ struct journal_head * t_log_list;
+
+ /* Number of outstanding updates running on this transaction */
+ int t_updates;
+
+ /* Number of buffers reserved for use by all handles in this
+ * transaction handle but not yet modified. */
+ int t_outstanding_credits;
+
+ /*
+ * Forward and backward links for the circular list of all
+ * transactions awaiting checkpoint.
+ */
+ /* Protected by journal_datalist_lock */
+ transaction_t *t_cpnext, *t_cpprev;
+
+ /* When will the transaction expire (become due for commit), in
+ * jiffies ? */
+ unsigned long t_expires;
+
+ /* How many handles used this transaction? */
+ int t_handle_count;
+};
+
+
+/* The journal_t maintains all of the journaling state information for a
+ * single filesystem. It is linked to from the fs superblock structure.
+ *
+ * We use the journal_t to keep track of all outstanding transaction
+ * activity on the filesystem, and to manage the state of the log
+ * writing process. */
+
+struct journal_s
+{
+ /* General journaling state flags */
+ unsigned long j_flags;
+
+ /* Is there an outstanding uncleared error on the journal (from
+ * a prior abort)? */
+ int j_errno;
+
+ /* The superblock buffer */
+ struct buffer_head * j_sb_buffer;
+ journal_superblock_t * j_superblock;
+
+ /* Version of the superblock format */
+ int j_format_version;
+
+ /* Number of processes waiting to create a barrier lock */
+ int j_barrier_count;
+
+ /* The barrier lock itself */
+ struct semaphore j_barrier;
+
+ /* Transactions: The current running transaction... */
+ transaction_t * j_running_transaction;
+
+ /* ... the transaction we are pushing to disk ... */
+ transaction_t * j_committing_transaction;
+
+ /* ... and a linked circular list of all transactions waiting
+ * for checkpointing. */
+ /* Protected by journal_datalist_lock */
+ transaction_t * j_checkpoint_transactions;
+
+ /* Wait queue for waiting for a locked transaction to start
+ committing, or for a barrier lock to be released */
+ wait_queue_head_t j_wait_transaction_locked;
+
+ /* Wait queue for waiting for checkpointing to complete */
+ wait_queue_head_t j_wait_logspace;
+
+ /* Wait queue for waiting for commit to complete */
+ wait_queue_head_t j_wait_done_commit;
+
+ /* Wait queue to trigger checkpointing */
+ wait_queue_head_t j_wait_checkpoint;
+
+ /* Wait queue to trigger commit */
+ wait_queue_head_t j_wait_commit;
+
+ /* Wait queue to wait for updates to complete */
+ wait_queue_head_t j_wait_updates;
+
+ /* Semaphore for locking against concurrent checkpoints */
+ struct semaphore j_checkpoint_sem;
+
+ /* The main journal lock, used by lock_journal() */
+ struct semaphore j_sem;
+
+ /* Journal head: identifies the first unused block in the journal. */
+ unsigned long j_head;
+
+ /* Journal tail: identifies the oldest still-used block in the
+ * journal. */
+ unsigned long j_tail;
+
+ /* Journal free: how many free blocks are there in the journal? */
+ unsigned long j_free;
+
+ /* Journal start and end: the block numbers of the first usable
+ * block and one beyond the last usable block in the journal. */
+ unsigned long j_first, j_last;
+
+ /* Device, blocksize and starting block offset for the location
+ * where we store the journal. */
+ kdev_t j_dev;
+ int j_blocksize;
+ unsigned int j_blk_offset;
+
+ /* Device which holds the client fs. For internal journal this
+ * will be equal to j_dev. */
+ kdev_t j_fs_dev;
+
+ /* Total maximum capacity of the journal region on disk. */
+ unsigned int j_maxlen;
+
+ /* Optional inode where we store the journal. If present, all
+ * journal block numbers are mapped into this inode via
+ * bmap(). */
+ struct inode * j_inode;
+
+ /* Sequence number of the oldest transaction in the log */
+ tid_t j_tail_sequence;
+ /* Sequence number of the next transaction to grant */
+ tid_t j_transaction_sequence;
+ /* Sequence number of the most recently committed transaction */
+ tid_t j_commit_sequence;
+ /* Sequence number of the most recent transaction wanting commit */
+ tid_t j_commit_request;
+
+ /* Journal uuid: identifies the object (filesystem, LVM volume
+ * etc) backed by this journal. This will eventually be
+ * replaced by an array of uuids, allowing us to index multiple
+ * devices within a single journal and to perform atomic updates
+ * across them. */
+
+ __u8 j_uuid[16];
+
+ /* Pointer to the current commit thread for this journal */
+ struct task_struct * j_task;
+
+ /* Maximum number of metadata buffers to allow in a single
+ * compound commit transaction */
+ int j_max_transaction_buffers;
+
+ /* What is the maximum transaction lifetime before we begin a
+ * commit? */
+ unsigned long j_commit_interval;
+
+ /* The timer used to wakeup the commit thread: */
+ struct timer_list * j_commit_timer;
+ int j_commit_timer_active;
+
+ /* Link all journals together - system-wide */
+ struct list_head j_all_journals;
+
+ /* The revoke table: maintains the list of revoked blocks in the
+ current transaction. */
+ struct jbd_revoke_table_s *j_revoke;
+};
+
+/*
+ * Journal flag definitions
+ */
+#define JFS_UNMOUNT 0x001 /* Journal thread is being destroyed */
+#define JFS_ABORT 0x002 /* Journaling has been aborted for errors. */
+#define JFS_ACK_ERR 0x004 /* The errno in the sb has been acked */
+#define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */
+#define JFS_LOADED 0x010 /* The journal superblock has been loaded */
+
+/*
+ * Function declarations for the journaling transaction and buffer
+ * management
+ */
+
+/* Filing buffers */
+extern void __journal_unfile_buffer(struct journal_head *);
+extern void journal_unfile_buffer(struct journal_head *);
+extern void __journal_refile_buffer(struct journal_head *);
+extern void journal_refile_buffer(struct journal_head *);
+extern void __journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __journal_free_buffer(struct journal_head *bh);
+extern void journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __journal_clean_data_list(transaction_t *transaction);
+
+/* Log buffer allocation */
+extern struct journal_head * journal_get_descriptor_buffer(journal_t *);
+extern unsigned long journal_next_log_block(journal_t *);
+
+/* Commit management */
+extern void journal_commit_transaction(journal_t *);
+
+/* Checkpoint list management */
+int __journal_clean_checkpoint_list(journal_t *journal);
+extern void journal_remove_checkpoint(struct journal_head *);
+extern void __journal_remove_checkpoint(struct journal_head *);
+extern void journal_insert_checkpoint(struct journal_head *, transaction_t *);
+extern void __journal_insert_checkpoint(struct journal_head *,transaction_t *);
+
+/* Buffer IO */
+extern int
+journal_write_metadata_buffer(transaction_t *transaction,
+ struct journal_head *jh_in,
+ struct journal_head **jh_out,
+ int blocknr);
+
+/* Transaction locking */
+extern void __wait_on_journal (journal_t *);
+
+/*
+ * Journal locking.
+ *
+ * We need to lock the journal during transaction state changes so that
+ * nobody ever tries to take a handle on the running transaction while
+ * we are in the middle of moving it to the commit phase.
+ *
+ * Note that the locking is completely interrupt unsafe. We never touch
+ * journal structures from interrupts.
+ *
+ * In 2.2, the BKL was required for lock_journal. This is no longer
+ * the case.
+ */
+
+static inline void lock_journal(journal_t *journal)
+{
+ down(&journal->j_sem);
+}
+
+/* This returns zero if we acquired the semaphore */
+static inline int try_lock_journal(journal_t * journal)
+{
+ return down_trylock(&journal->j_sem);
+}
+
+static inline void unlock_journal(journal_t * journal)
+{
+ up(&journal->j_sem);
+}
+
+
+static inline handle_t *journal_current_handle(void)
+{
+ return current->journal_info;
+}
+
+/* The journaling code user interface:
+ *
+ * Create and destroy handles
+ * Register buffer modifications against the current transaction.
+ */
+
+extern handle_t *journal_start(journal_t *, int nblocks);
+extern handle_t *journal_try_start(journal_t *, int nblocks);
+extern int journal_restart (handle_t *, int nblocks);
+extern int journal_extend (handle_t *, int nblocks);
+extern int journal_get_write_access (handle_t *, struct buffer_head *);
+extern int journal_get_create_access (handle_t *, struct buffer_head *);
+extern int journal_get_undo_access (handle_t *, struct buffer_head *);
+extern int journal_dirty_data (handle_t *,
+ struct buffer_head *, int async);
+extern int journal_dirty_metadata (handle_t *, struct buffer_head *);
+extern void journal_release_buffer (handle_t *, struct buffer_head *);
+extern void journal_forget (handle_t *, struct buffer_head *);
+extern void journal_sync_buffer (struct buffer_head *);
+extern int journal_flushpage(journal_t *, struct page *, unsigned long);
+extern int journal_try_to_free_buffers(journal_t *, struct page *, int);
+extern int journal_stop(handle_t *);
+extern int journal_flush (journal_t *);
+
+extern void journal_lock_updates (journal_t *);
+extern void journal_unlock_updates (journal_t *);
+
+extern journal_t * journal_init_dev(kdev_t dev, kdev_t fs_dev,
+ int start, int len, int bsize);
+extern journal_t * journal_init_inode (struct inode *);
+extern int journal_update_format (journal_t *);
+extern int journal_check_used_features
+ (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int journal_check_available_features
+ (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int journal_set_features
+ (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int journal_create (journal_t *);
+extern int journal_load (journal_t *journal);
+extern void journal_destroy (journal_t *);
+extern int journal_recover (journal_t *journal);
+extern int journal_wipe (journal_t *, int);
+extern int journal_skip_recovery (journal_t *);
+extern void journal_update_superblock (journal_t *, int);
+extern void __journal_abort (journal_t *);
+extern void journal_abort (journal_t *, int);
+extern int journal_errno (journal_t *);
+extern void journal_ack_err (journal_t *);
+extern int journal_clear_err (journal_t *);
+extern unsigned long journal_bmap(journal_t *journal, unsigned long blocknr);
+extern int journal_force_commit(journal_t *journal);
+
+/*
+ * journal_head management
+ */
+extern struct journal_head
+ *journal_add_journal_head(struct buffer_head *bh);
+extern void journal_remove_journal_head(struct buffer_head *bh);
+extern void __journal_remove_journal_head(struct buffer_head *bh);
+extern void journal_unlock_journal_head(struct journal_head *jh);
+
+/* Primary revoke support */
+#define JOURNAL_REVOKE_DEFAULT_HASH 256
+extern int journal_init_revoke(journal_t *, int);
+extern void journal_destroy_revoke_caches(void);
+extern int journal_init_revoke_caches(void);
+
+extern void journal_destroy_revoke(journal_t *);
+extern int journal_revoke (handle_t *,
+ unsigned long, struct buffer_head *);
+extern int journal_cancel_revoke(handle_t *, struct journal_head *);
+extern void journal_write_revoke_records(journal_t *, transaction_t *);
+
+/* Recovery revoke support */
+extern int journal_set_revoke(journal_t *, unsigned long, tid_t);
+extern int journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern void journal_clear_revoke(journal_t *);
+extern void journal_brelse_array(struct buffer_head *b[], int n);
+
+/* The log thread user interface:
+ *
+ * Request space in the current transaction, and force transaction commit
+ * transitions on demand.
+ */
+
+extern int log_space_left (journal_t *); /* Called with journal locked */
+extern tid_t log_start_commit (journal_t *, transaction_t *);
+extern void log_wait_commit (journal_t *, tid_t);
+extern int log_do_checkpoint (journal_t *, int);
+
+extern void log_wait_for_space(journal_t *, int nblocks);
+extern void __journal_drop_transaction(journal_t *, transaction_t *);
+extern int cleanup_journal_tail(journal_t *);
+
+/* Reduce journal memory usage by flushing */
+extern void shrink_journal_memory(void);
+
+/* Debugging code only: */
+
+#define jbd_ENOSYS() \
+do { \
+ printk (KERN_ERR "JBD unimplemented function " __FUNCTION__); \
+ current->state = TASK_UNINTERRUPTIBLE; \
+ schedule(); \
+} while (1)
+
+/*
+ * is_journal_abort
+ *
+ * Simple test wrapper function to test the JFS_ABORT state flag. This
+ * bit, when set, indicates that we have had a fatal error somewhere,
+ * either inside the journaling layer or indicated to us by the client
+ * (eg. ext3), and that we and should not commit any further
+ * transactions.
+ */
+
+static inline int is_journal_aborted(journal_t *journal)
+{
+ return journal->j_flags & JFS_ABORT;
+}
+
+static inline int is_handle_aborted(handle_t *handle)
+{
+ if (handle->h_aborted)
+ return 1;
+ return is_journal_aborted(handle->h_transaction->t_journal);
+}
+
+static inline void journal_abort_handle(handle_t *handle)
+{
+ handle->h_aborted = 1;
+}
+
+/* Not all architectures define BUG() */
+#ifndef BUG
+#define BUG() do { \
+ printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ * ((char *) 0) = 0; \
+ } while (0)
+#endif /* BUG */
+
+#else
+
+extern int journal_recover (journal_t *journal);
+extern int journal_skip_recovery (journal_t *);
+
+/* Primary revoke support */
+extern int journal_init_revoke(journal_t *, int);
+extern void journal_destroy_revoke_caches(void);
+extern int journal_init_revoke_caches(void);
+
+/* Recovery revoke support */
+extern int journal_set_revoke(journal_t *, unsigned long, tid_t);
+extern int journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern void journal_clear_revoke(journal_t *);
+extern void journal_brelse_array(struct buffer_head *b[], int n);
+
+extern void journal_destroy_revoke(journal_t *);
+#endif /* __KERNEL__ */
+
+/* Comparison functions for transaction IDs: perform comparisons using
+ * modulo arithmetic so that they work over sequence number wraps. */
+
+static inline int tid_gt(tid_t x, tid_t y)
+{
+ int difference = (x - y);
+ return (difference > 0);
+}
+
+static inline int tid_geq(tid_t x, tid_t y)
+{
+ int difference = (x - y);
+ return (difference >= 0);
+}
+
+extern int journal_blocks_per_page(struct inode *inode);
+
+/*
+ * Definitions which augment the buffer_head layer
+ */
+
+/* journaling buffer types */
+#define BJ_None 0 /* Not journaled */
+#define BJ_SyncData 1 /* Normal data: flush before commit */
+#define BJ_AsyncData 2 /* writepage data: wait on it before commit */
+#define BJ_Metadata 3 /* Normal journaled metadata */
+#define BJ_Forget 4 /* Buffer superceded by this transaction */
+#define BJ_IO 5 /* Buffer is for temporary IO use */
+#define BJ_Shadow 6 /* Buffer contents being shadowed to the log */
+#define BJ_LogCtl 7 /* Buffer contains log descriptors */
+#define BJ_Reserved 8 /* Buffer is reserved for access by journal */
+#define BJ_Types 9
+
+extern int jbd_blocks_per_page(struct inode *inode);
+
+#ifdef __KERNEL__
+
+extern spinlock_t jh_splice_lock;
+/*
+ * Once `expr1' has been found true, take jh_splice_lock
+ * and then reevaluate everything.
+ */
+#define SPLICE_LOCK(expr1, expr2) \
+ ({ \
+ int ret = (expr1); \
+ if (ret) { \
+ spin_lock(&jh_splice_lock); \
+ ret = (expr1) && (expr2); \
+ spin_unlock(&jh_splice_lock); \
+ } \
+ ret; \
+ })
+
+/*
+ * A number of buffer state predicates. They test for
+ * buffer_jbd() because they are used in core kernel code.
+ *
+ * These will be racy on SMP unless we're *sure* that the
+ * buffer won't be detached from the journalling system
+ * in parallel.
+ */
+
+/* Return true if the buffer is on journal list `list' */
+static inline int buffer_jlist_eq(struct buffer_head *bh, int list)
+{
+ return SPLICE_LOCK(buffer_jbd(bh), bh2jh(bh)->b_jlist == list);
+}
+
+/* Return true if this bufer is dirty wrt the journal */
+static inline int buffer_jdirty(struct buffer_head *bh)
+{
+ return buffer_jbd(bh) && __buffer_state(bh, JBDDirty);
+}
+
+/* Return true if it's a data buffer which journalling is managing */
+static inline int buffer_jbd_data(struct buffer_head *bh)
+{
+ return SPLICE_LOCK(buffer_jbd(bh),
+ bh2jh(bh)->b_jlist == BJ_SyncData ||
+ bh2jh(bh)->b_jlist == BJ_AsyncData);
+}
+
+#ifdef CONFIG_SMP
+#define assert_spin_locked(lock) J_ASSERT(spin_is_locked(lock))
+#else
+#define assert_spin_locked(lock) do {} while(0)
+#endif
+
+#define buffer_trace_init(bh) do {} while (0)
+#define print_buffer_fields(bh) do {} while (0)
+#define print_buffer_trace(bh) do {} while (0)
+#define BUFFER_TRACE(bh, info) do {} while (0)
+#define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
+#define JBUFFER_TRACE(jh, info) do {} while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* CONFIG_JBD || CONFIG_JBD_MODULE || !__KERNEL__ */
+
+/*
+ * Compatibility no-ops which allow the kernel to compile without CONFIG_JBD
+ * go here.
+ */
+
+#if defined(__KERNEL__) && !(defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE))
+
+#define J_ASSERT(expr) do {} while (0)
+#define J_ASSERT_BH(bh, expr) do {} while (0)
+#define buffer_jbd(bh) 0
+#define buffer_jlist_eq(bh, val) 0
+#define journal_buffer_journal_lru(bh) 0
+
+#endif /* defined(__KERNEL__) && !defined(CONFIG_JBD) */
+#endif /* _LINUX_JBD_H */
Added: trunk/ocfs2/format/inc/kernel-list.h
===================================================================
--- trunk/ocfs2/format/inc/kernel-list.h 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/kernel-list.h 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,112 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = { &name, &name }
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#if (!defined(__GNUC__) && !defined(__WATCOMC__))
+#define __inline__
+#endif
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/*
+ * Insert a new entry after the specified head..
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/*
+ * Insert a new entry at the tail
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static __inline__ void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static __inline__ int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+/*
+ * Splice in "list" into "head"
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+ struct list_head *first = list->next;
+
+ if (first != list) {
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+ }
+}
+
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#endif
Added: trunk/ocfs2/format/inc/ocfs1_fs_compat.h
===================================================================
--- trunk/ocfs2/format/inc/ocfs1_fs_compat.h 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/ocfs1_fs_compat.h 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,112 @@
+/* -*- mode: c; c-basic-offset: 9; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs1_fs_compat.h
+ *
+ * OCFS1 volume header definitions. OCFS2 creates valid but unmountable
+ * OCFS1 volume headers on the first two sectors of an OCFS2 volume.
+ * This allows an OCFS1 volume to see the partition and cleanly fail to
+ * mount it.
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS1_FS_COMPAT_H
+#define _OCFS1_FS_COMPAT_H
+
+#define MAX_VOL_SIGNATURE_LEN_V1 128
+#define MAX_MOUNT_POINT_LEN_V1 128
+#define MAX_VOL_ID_LENGTH_V1 16
+#define MAX_VOL_LABEL_LEN_V1 64
+#define MAX_CLUSTER_NAME_LEN_V1 64
+
+#define OCFS1_MAJOR_VERSION (2)
+#define OCFS1_MINOR_VERSION (0)
+#define OCFS1_VOLUME_SIGNATURE "OracleCFS"
+
+/*
+ * OCFS1 superblock. Lives at sector 0.
+ */
+typedef struct _ocfs1_vol_disk_hdr
+{
+/*00*/ __u32 minor_version;
+ __u32 major_version;
+/*08*/ __u8 signature[MAX_VOL_SIGNATURE_LEN_V1];
+/*88*/ __u8 mount_point[MAX_MOUNT_POINT_LEN_V1];
+/*108*/ __u64 serial_num;
+/*110*/ __u64 device_size;
+ __u64 start_off;
+/*120*/ __u64 bitmap_off;
+ __u64 publ_off;
+/*130*/ __u64 vote_off;
+ __u64 root_bitmap_off;
+/*140*/ __u64 data_start_off;
+ __u64 root_bitmap_size;
+/*150*/ __u64 root_off;
+ __u64 root_size;
+/*160*/ __u64 cluster_size;
+ __u64 num_nodes;
+/*170*/ __u64 num_clusters;
+ __u64 dir_node_size;
+/*180*/ __u64 file_node_size;
+ __u64 internal_off;
+/*190*/ __u64 node_cfg_off;
+ __u64 node_cfg_size;
+/*1A0*/ __u64 new_cfg_off;
+ __u32 prot_bits;
+ __s32 excl_mount;
+/*1B0*/
+} ocfs1_vol_disk_hdr;
+
+
+typedef struct _ocfs1_disk_lock
+{
+/*00*/ __u32 curr_master;
+ __u8 file_lock;
+ __u8 compat_pad[3]; /* Not in orignal definition. Used to
+ make the already existing alignment
+ explicit */
+ __u64 last_write_time;
+/*10*/ __u64 last_read_time;
+ __u32 writer_node_num;
+ __u32 reader_node_num;
+/*20*/ __u64 oin_node_map;
+ __u64 dlock_seq_num;
+/*30*/
+} ocfs1_disk_lock;
+
+/*
+ * OCFS1 volume label. Lives at sector 1.
+ */
+typedef struct _ocfs1_vol_label
+{
+/*00*/ ocfs1_disk_lock disk_lock;
+/*30*/ __u8 label[MAX_VOL_LABEL_LEN_V1];
+/*70*/ __u16 label_len;
+/*72*/ __u8 vol_id[MAX_VOL_ID_LENGTH_V1];
+/*82*/ __u16 vol_id_len;
+/*84*/ __u8 cluster_name[MAX_CLUSTER_NAME_LEN_V1];
+/*A4*/ __u16 cluster_name_len;
+/*A6*/
+} ocfs1_vol_label;
+
+
+#endif /* _OCFS1_FS_COMPAT_H */
+
Added: trunk/ocfs2/format/inc/ocfs2_fs.h
===================================================================
--- trunk/ocfs2/format/inc/ocfs2_fs.h 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/inc/ocfs2_fs.h 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,447 @@
+/* -*- mode: c; c-basic-offset: 9; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * ocfs2_fs.h
+ *
+ * On-disk structures for OCFS2.
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura, Joel Becker
+ */
+
+#ifndef _OCFS2_FS_H
+#define _OCFS2_FS_H
+
+/* Version */
+#define OCFS2_MAJOR_REV_LEVEL 2
+#define OCFS2_MINOR_REV_LEVEL 0
+
+/*
+ * An OCFS2 volume starts this way:
+ * Sector 0: Valid ocfs1_vol_disk_hdr that cleanly fails to mount v1.
+ * Sector 1: Valid ocfs1_vol_label that cleanly fails to mount v1.
+ * Block OCFS2_SUPER_BLOCK_BLKNO: OCFS2 superblock.
+ *
+ * All other structures are found from the superblock information.
+ *
+ * OCFS2_SUPER_BLOCK_BLKNO is in blocks, not sectors. eg, for a
+ * blocksize of 2K, it is 4096 bytes into disk.
+ */
+#define OCFS2_SUPER_BLOCK_BLKNO 2
+
+/* Object signatures */
+#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
+#define OCFS2_FILE_ENTRY_SIGNATURE "INODE01"
+#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
+
+/*
+ * Flags on ocfs2_dinode.i_flags
+ */
+#define OCFS2_VALID_FL (0x01)
+#define OCFS2_UNUSED2_FL (0x02)
+#define OCFS2_ORPHANED_FL (0x04)
+#define OCFS2_UNUSED3_FL (0x08)
+#define OCFS2_SYSTEM_FL (0x10)
+#define OCFS2_SUPER_BLOCK_FL (0x20)
+#define OCFS2_LOCAL_ALLOC_FL (0x40)
+#define OCFS2_BITMAP_FL (0x80)
+
+
+/* Limit of space in ocfs2_dir_entry */
+#define OCFS2_MAX_FILENAME_LENGTH 255
+
+/* Limit of node map bits in ocfs2_disk_lock */
+#define OCFS2_MAX_NODES 256
+
+#define MAX_VOL_ID_LENGTH 16
+#define MAX_VOL_LABEL_LEN 64
+#define MAX_CLUSTER_NAME_LEN 64
+
+
+#define ONE_MEGA_BYTE (1 * 1024 * 1024) /* in bytes */
+#define OCFS2_DEFAULT_JOURNAL_SIZE (8 * ONE_MEGA_BYTE)
+
+
+/* System file index */
+enum {
+ GLOBAL_BITMAP_SYSTEM_INODE = 0,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+ GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE,
+ AUTOCONFIG_SYSTEM_INODE,
+ PUBLISH_SYSTEM_INODE,
+ VOTE_SYSTEM_INODE,
+ ORPHAN_DIR_SYSTEM_INODE,
+ EXTENT_ALLOC_SYSTEM_INODE,
+ EXTENT_ALLOC_BITMAP_SYSTEM_INODE,
+ INODE_ALLOC_SYSTEM_INODE,
+ INODE_ALLOC_BITMAP_SYSTEM_INODE,
+ JOURNAL_SYSTEM_INODE,
+ LOCAL_ALLOC_SYSTEM_INODE,
+ NUM_SYSTEM_INODES
+};
+
+/*
+ * The last system inode that has only one global copy. Every system
+ * inode after it in the system inode enum has a node-specific copy.
+ */
+#define OCFS_LAST_GLOBAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
+
+
+/* Default size for the local alloc bitmap */
+#define OCFS2_LOCAL_BITMAP_DEFAULT_SIZE 256
+
+/*
+ * OCFS2 directory file types. Only the low 3 bits are used. The
+ * other bits are reserved for now.
+ */
+#define OCFS2_FT_UNKNOWN 0
+#define OCFS2_FT_REG_FILE 1
+#define OCFS2_FT_DIR 2
+#define OCFS2_FT_CHRDEV 3
+#define OCFS2_FT_BLKDEV 4
+#define OCFS2_FT_FIFO 5
+#define OCFS2_FT_SOCK 6
+#define OCFS2_FT_SYMLINK 7
+
+#define OCFS2_FT_MAX 8
+
+/*
+ * OCFS2_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define OCFS2_DIR_PAD 4
+#define OCFS2_DIR_ROUND (OCFS2_DIR_PAD - 1)
+#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + 12 + \
+ OCFS2_DIR_ROUND) & \
+ ~OCFS2_DIR_ROUND)
+#define OCFS2_LINK_MAX 32000
+
+#define S_SHIFT 12
+static unsigned char ocfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+ [S_IFREG >> S_SHIFT] OCFS2_FT_REG_FILE,
+ [S_IFDIR >> S_SHIFT] OCFS2_FT_DIR,
+ [S_IFCHR >> S_SHIFT] OCFS2_FT_CHRDEV,
+ [S_IFBLK >> S_SHIFT] OCFS2_FT_BLKDEV,
+ [S_IFIFO >> S_SHIFT] OCFS2_FT_FIFO,
+ [S_IFSOCK >> S_SHIFT] OCFS2_FT_SOCK,
+ [S_IFLNK >> S_SHIFT] OCFS2_FT_SYMLINK,
+};
+
+
+/*
+ * Convenience casts
+ */
+#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
+#define DISK_LOCK(dinode) (&((dinode)->i_disk_lock))
+#define LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
+
+/* TODO: change these? */
+#define OCFS2_NODE_CONFIG_HDR_SIGN "NODECFG"
+#define OCFS2_NODE_CONFIG_SIGN_LEN 8
+#define OCFS2_NODE_CONFIG_VER 2
+#define OCFS2_NODE_MIN_SUPPORTED_VER 2
+
+#define MAX_IP_ADDR_LEN 32
+#define HOSTID_LEN 20
+#define MACID_LEN 12
+#define GUID_LEN (HOSTID_LEN+MACID_LEN)
+#define MAX_NODE_NAME_LENGTH 32
+
+
+
+
+/*
+ * On disk extent record for OCFS2
+ * It describes a range of clusters on disk.
+ */
+typedef struct _ocfs2_extent_rec {
+/*00*/ __u32 e_cpos; /* Offset into the file, in clusters */
+ __u32 e_clusters; /* Clusters covered by this extent */
+ __u64 e_blkno; /* Physical disk offset, in blocks */
+/*10*/
+} ocfs2_extent_rec;
+
+/*
+ * On disk extent list for OCFS2 (node in the tree). Note that this
+ * is contained inside ocfs2_dinode or ocfs2_extent_block, so the
+ * offsets are relative to ocfs2_dinode.id2.i_list or
+ * ocfs2_extent_block.h_list, respectively.
+ */
+typedef struct _ocfs2_extent_list {
+/*00*/ __s16 l_tree_depth; /* Extent tree depth from this
+ point. -1 means data extents
+ hang directly off this
+ header (a leaf) */
+ __u16 l_count; /* Number of extent records */
+ __u16 l_next_free_rec; /* Next unused extent slot */
+ __u16 l_reserved1;
+ __u64 l_reserved2; /* Pad to
+ sizeof(ocfs2_extent_rec) */
+/*10*/ ocfs2_extent_rec l_recs[0]; /* Extent records */
+} ocfs2_extent_list;
+
+/*
+ * On disk extent block (indirect block) for OCFS2
+ */
+typedef struct _ocfs2_extent_block
+{
+/*00*/ __u8 h_signature[8]; /* Signature for verification */
+ __u64 h_suballoc_blkno; /* Node suballocator offset,
+ in blocks */
+/*10*/ __u16 h_suballoc_node; /* Node suballocator this
+ extent_header belongs to */
+ __u16 h_reserved1;
+ __u32 h_reserved2;
+ __u64 h_blkno; /* Offset on disk, in blocks */
+/*20*/ __u64 h_parent_blk; /* Offset on disk, in blocks,
+ of this block's parent in the
+ tree */
+ __u64 h_next_leaf_blk; /* Offset on disk, in blocks,
+ of next leaf header pointing
+ to data */
+/*30*/ ocfs2_extent_list h_list; /* Extent record list */
+/* Actual on-disk size is one block */
+} ocfs2_extent_block;
+
+/*
+ * On disk lock structure for OCFS2
+ */
+typedef struct _ocfs2_disk_lock
+{
+/*00*/ __u32 dl_master; /* Node number of current master */
+ __u8 dl_level; /* Lock level */
+ __u8 dl_reserved1[3]; /* Pad to u64 */
+ __u64 dl_seq_num; /* Lock transaction seqnum */
+/*10*/ __u32 dl_node_map[8]; /* Bitmap of interested nodes,
+ was __u32 */
+/*30*/
+} ocfs2_disk_lock;
+
+/*
+ * On disk superblock for OCFS2
+ * Note that it is contained inside an ocfs2_dinode, so all offsets
+ * are relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_super_block {
+/*00*/ __u16 s_major_rev_level;
+ __u16 s_minor_rev_level;
+ __u16 s_mnt_count;
+ __s16 s_max_mnt_count;
+ __u16 s_state; /* File system state */
+ __u16 s_errors; /* Behaviour when detecting errors */
+ __u32 s_checkinterval; /* Max time between checks */
+/*10*/ __u64 s_lastcheck; /* Time of last check */
+ __u32 s_creator_os; /* OS */
+ __u32 s_feature_compat; /* Compatible feature set */
+/*20*/ __u32 s_feature_incompat; /* Incompatible feature set */
+ __u32 s_feature_ro_compat; /* Readonly-compatible feature set */
+ __u64 s_root_blkno; /* Offset, in blocks, of root directory
+ dinode */
+/*30*/ __u64 s_system_dir_blkno; /* Offset, in blocks, of system
+ directory dinode */
+ __u32 s_blocksize_bits; /* Blocksize for this fs */
+ __u32 s_clustersize_bits; /* Clustersize for this fs */
+/*40*/ __u32 s_max_nodes; /* Max nodes in this cluster before
+ tunefs required */
+ __u32 s_reserved1;
+ __u64 s_reserved2;
+/*50*/ __u8 s_label[64]; /* Label for mounting, etc. */
+/*90*/ __u8 s_uuid[16]; /* Was vol_id */
+/*A0*/
+} ocfs2_super_block;
+
+/*
+ * Local allocation bitmap for OCFS2 nodes
+ * Node that it exists inside an ocfs2_dinode, so all offsets are
+ * relative to the start of ocfs2_dinode.id2.
+ */
+typedef struct _ocfs2_local_alloc
+{
+/*00*/ __u32 la_bm_off; /* Starting bit offset in main bitmap */
+ /* Do we want to use id1.bitmap1? */
+ __u16 la_bm_bits; /* Number of bits from main bitmap */
+ __u16 la_bits_set; /* Number of set bits */
+ __u16 la_size; /* Size of included bitmap, in bytes */
+ __u16 la_reserved1;
+ __u32 la_reserved2;
+/*10*/ __u8 la_bitmap[0];
+} ocfs2_local_alloc;
+
+/*
+ * On disk inode for OCFS2
+ */
+typedef struct _ocfs2_dinode {
+/*00*/ __u8 i_signature[8]; /* Signature for validation */
+ __u32 i_generation; /* Generation number */
+ __u16 i_reserved1;
+ __u16 i_suballoc_node; /* Node suballocater this inode
+ belongs to */
+/*10*/ __u64 i_suballoc_blkno; /* Node suballocator offset,
+ in blocks */
+/*18*/ ocfs2_disk_lock i_disk_lock; /* Lock structure */
+/*48*/ __u32 i_uid; /* Owner UID */
+ __u32 i_gid; /* Owning GID */
+/*50*/ __u64 i_size; /* Size in bytes */
+ __u16 i_mode; /* File mode */
+ __u16 i_links_count; /* Links count */
+ __u32 i_flags; /* File flags */
+/*60*/ __u64 i_atime; /* Access time */
+ __u64 i_ctime; /* Creation time */
+/*70*/ __u64 i_mtime; /* Modification time */
+ __u64 i_dtime; /* Deletion time */
+/*80*/ __u64 i_blkno; /* Offset on disk, in blocks */
+ __u32 i_clusters; /* Cluster count */
+ __u32 i_reserved2;
+/*90*/ __u64 i_last_eb_blk; /* Pointer to last extent
+ block */
+ __u64 i_reserved3;
+/*A0*/ __u64 i_reserved4;
+ __u64 i_reserved5;
+/*B0*/ __u64 i_reserved6;
+ union {
+ __u64 i_pad1; /* Generic way to refer to this 64bit
+ union */
+ struct {
+ __u64 i_rdev; /* Device number */
+ } dev1;
+ struct { /* Info for bitmap system inodes */
+ __u32 i_used; /* Bits (ie, clusters) used */
+ __u32 i_total; /* Total bits (clusters) available */
+ } bitmap1;
+ } id1; /* Inode type dependant 1 */
+/*C0*/ union {
+ ocfs2_super_block i_super;
+ ocfs2_local_alloc i_lab;
+ ocfs2_extent_list i_list;
+ } id2;
+/* Actual on-disk size is one block */
+} ocfs2_dinode;
+
+/*
+ * On-disk directory entry structure for OCFS2
+ */
+struct ocfs2_dir_entry {
+/*00*/ __u64 inode; /* Inode number */
+ __u16 rec_len; /* Directory entry length */
+ __u8 name_len; /* Name length */
+ __u8 file_type;
+/*0C*/ char name[OCFS2_MAX_FILENAME_LENGTH]; /* File name */
+/* Actual on-disk length specified by rec_len */
+};
+
+typedef struct _ocfs_ipc_config_info // CLASS
+{
+ __u8 type; // NUMBER RANGE(0, 255)
+ __u8 ip_addr[MAX_IP_ADDR_LEN+1]; // CHAR[MAX_IP_ADDR_LEN+1]
+ __u32 ip_port; // NUMBER RANGE(0,ULONG_MAX)
+ __u8 ip_mask[MAX_IP_ADDR_LEN+1]; // CHAR[MAX_IP_ADDR_LEN+1]
+}
+ocfs_ipc_config_info; // END CLASS
+/* TODO this structure will break in 64-bit.... need to pack */
+typedef union _ocfs_guid // CLASS
+{
+ struct
+ {
+ char host_id[HOSTID_LEN];
+ char mac_id[MACID_LEN];
+ } id;
+ __u8 guid[GUID_LEN]; // CHAR[GUID_LEN]
+}
+ocfs_guid; // END CLASS
+
+typedef struct _ocfs_node_config_info // CLASS
+{
+ ocfs2_disk_lock disk_lock; // DISKLOCK
+ __u8 node_name[MAX_NODE_NAME_LENGTH+1]; // CHAR[MAX_NODE_NAME_LENGTH+1]
+ ocfs_guid guid; // GUID
+ ocfs_ipc_config_info ipc_config; // IPCONFIG
+}
+ocfs_node_config_info; // END CLASS
+
+typedef struct _ocfs_node_config_hdr // CLASS
+{
+ ocfs2_disk_lock disk_lock; // DISKLOCK
+ __u8 signature[OCFS2_NODE_CONFIG_SIGN_LEN]; // CHAR[NODE_CONFIG_SIGN_LEN]
+ __u32 version; // NUMBER RANGE(0,ULONG_MAX)
+ __u32 num_nodes; // NUMBER RANGE(0,32)
+ __u32 last_node; // NUMBER RANGE(0,32)
+ __u32 onch_pad; // UNUSED
+ __u64 cfg_seq_num; // NUMBER RANGE(0,ULONG_LONG_MAX)
+}
+ocfs_node_config_hdr; // END CLASS
+
+
+#ifdef __KERNEL__
+static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(struct super_block *sb)
+{
+ /*
+ * Perhaps change one day when we want to be dynamic
+ * based on sb->s_blocksize.
+ */
+ return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#else
+static inline int ocfs2_extent_recs_per_inode(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_extent_recs_per_eb(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_extent_block, h_list.l_recs);
+
+ return size / sizeof(struct _ocfs2_extent_rec);
+}
+
+static inline int ocfs2_local_alloc_size(int blocksize)
+{
+ return OCFS2_LOCAL_BITMAP_DEFAULT_SIZE;
+}
+#endif /* __KERNEL__ */
+
+#endif /* _OCFS2_FS_H */
Added: trunk/ocfs2/format/mkfs2.c
===================================================================
--- trunk/ocfs2/format/mkfs2.c 2004-06-13 23:54:25 UTC (rev 58)
+++ trunk/ocfs2/format/mkfs2.c 2004-06-14 00:33:51 UTC (rev 59)
@@ -0,0 +1,1217 @@
+/*
+ *
+ * this is a temporary version of mkfs.ocfs2 to get us through for now
+ *
+ */
+
+
+#define _LARGEFILE64_SOURCE
+#define __USE_ISOC99
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <asm/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <asm/bitops.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <ocfs2_fs.h>
+#include <ocfs1_fs_compat.h>
+
+typedef unsigned short kdev_t;
+
+#include "kernel-list.h"
+
+#include <signal.h>
+#include <libgen.h>
+
+#include <netinet/in.h>
+#include "kernel-jbd.h"
+
+
+extern char *optarg;
+extern int optind, opterr, optopt;
+extern void * memalign (size_t __alignment, size_t __size);
+
+#warning eeeek need to implement these
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le64(x) (x)
+#define le16_to_cpu(x) (x)
+#define le32_to_cpu(x) (x)
+#define le64_to_cpu(x) (x)
+
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+#define BX_(x) ((x) - (((x)>>1)&0x77777777) \
+ - (((x)>>2)&0x33333333) \
+ - (((x)>>3)&0x11111111))
+
+#define MKFS_FATAL(fmt, arg...) ({ fprintf(stderr, "ERROR at %s, %d: " fmt ". EXITING!!!\n", \
+ __FILE__, __LINE__, ##arg); \
+ exit(1); \
+ })
+#define MKFS_FATAL_STR(str) MKFS_FATAL(str, "")
+#define MKFS_WARN(fmt, arg...) fprintf(stderr, "WARNING at %s, %d: " fmt ".\n", \
+ __FILE__, __LINE__, ##arg)
+#define MKFS_WARN_STR(str) MKFS_WARN(str, "")
+
+
+
+#define MIN_RESERVED_TAIL_BLOCKS 8
+
+#define LEADING_SPACE_BLOCKS 2 // we will put special strings in the v1 header blocks
+#define SLOP_BLOCKS 0
+#define FILE_ENTRY_BLOCKS 8
+#define SUPERBLOCK_BLOCKS 1
+#define PUBLISH_BLOCKS(i,min) (i<min ? min : i) // at least min
+#define VOTE_BLOCKS(i,min) (i<min ? min : i) // at least min
+#define AUTOCONF_BLOCKS(i,min) ((2+4) + (i<min ? min : i)) // at least 32, plus the other 6
+#define NUM_LOCAL_SYSTEM_FILES 6
+
+#define MAGIC_SUPERBLOCK_BLOCK_NUMBER 2
+
+#define OCFS2_OS_LINUX 0
+#define OCFS2_OS_HURD 1
+#define OCFS2_OS_MASIX 2
+#define OCFS2_OS_FREEBSD 3
+#define OCFS2_OS_LITES 4
+
+#define OCFS2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
+#define OCFS2_DFL_CHECKINTERVAL 0 /* Don't use interval check */
+
+
+enum {
+ sfi_journal,
+ sfi_bitmap,
+ sfi_alloc,
+ sfi_local_alloc,
+ sfi_other
+};
+
+typedef struct _system_file_info {
+ char *name;
+ int type;
+ int global;
+ int dir;
+
+} system_file_info;
+
+system_file_info system_files[] = {
+ { "global_bitmap", sfi_bitmap, 1, 0 },
+ { "global_inode_alloc", sfi_alloc, 1, 0 },
+ { "global_inode_alloc_bitmap", sfi_bitmap, 1, 0 },
+ { "autoconfig", sfi_other, 1, 0 },
+ { "publish", sfi_other, 1, 0 },
+ { "vote", sfi_other, 1, 0 },
+ { "orphan_dir", sfi_other, 1, 1 },
+ { "extent_alloc:%04d", sfi_alloc, 0, 0 },
+ { "extent_alloc_bitmap:%04d", sfi_bitmap, 0, 0 },
+ { "inode_alloc:%04d", sfi_alloc, 0, 0 },
+ { "inode_alloc_bitmap:%04d", sfi_bitmap, 0, 0 },
+ { "journal:%04d", sfi_journal, 0, 0 },
+ { "local_alloc:%04d", sfi_local_alloc, 0, 0 },
+};
+
+struct bitinfo {
+ __u32 used_bits;
+ __u32 total_bits;
+};
+
+typedef struct _system_file_disk_record
+{
+ __u64 fe_off;
+ __u64 extent_off;
+ __u64 extent_len;
+ __u64 file_size;
+ struct bitinfo bi;
+ int flags;
+ int links;
+ int dir;
+} system_file_disk_record;
+
+
+typedef struct _alloc_bm
+{
+ void *buf;
+ __u32 valid_bits;
+ __u32 unit;
+ __u32 unit_bits;
+ char *name;
+ __u64 fe_disk_off;
+ system_file_disk_record *bm_record;
+ system_file_disk_record *alloc_record;
+} alloc_bm;
+
+typedef struct _funky_dir
+{
+ __u64 disk_off;
+ __u64 disk_len;
+ void *buf;
+ int buf_len;
+ int last_off;
+ __u64 fe_disk_off;
+ int link_count;
+ system_file_disk_record *record;
+} funky_dir;
+
+alloc_bm * initialize_bitmap (__u32 bits, __u32 unit_bits, char *name,
+ system_file_disk_record *bm_rec,
+ system_file_disk_record *alloc_rec);
+void destroy_bitmap (alloc_bm *bm);
+int find_clear_bits (alloc_bm * bitmap, __u32 numBits, __u32 offset);
+int count_bits (alloc_bm * bitmap);
+int alloc_bytes_from_bitmap (__u64 bytes, alloc_bm *bm, __u64 *start, __u64 *num);
+int alloc_from_bitmap (__u64 numbits, alloc_bm *bm, __u64 *start, __u64 *num);
+__u64 alloc_inode (int numblocks);
+funky_dir * alloc_directory(void);
+void add_entry_to_directory(funky_dir *dir, char *name, __u64 ino, __u8 type);
+void adjust_volume_size(void);
+void map_device(void);
+void sync_device(void);
+void unmap_device(void);
+void init_format_time(void);
+void format_superblock(system_file_disk_record *rec, system_file_disk_record *root_rec, system_file_disk_record *sys_rec);
+void format_file(system_file_disk_record *rec);
+void write_bitmap_data(alloc_bm *bm);
+void write_directory_data(funky_dir *dir);
+void format_leading_space(__u64 start);
+void format_autoconf_publish_vote(__u64 autoconf_off, __u64 publish_off, __u64 vote_off,
+ __u64 data_off, __u64 data_len);
+void init_device(void);
+void init_globals(void);
+void usage(void);
+void process_args(int argc, char **argv);
+void generate_uuid(void);
+static inline __u32 blocks_needed(void);
+static inline __u32 system_dir_blocks_needed(void);
+void replacement_journal_create(__u64 journal_off);
+void adjust_autoconfig_publish_vote(system_file_disk_record *autoconfig_rec,
+ system_file_disk_record *publish_rec,
+ system_file_disk_record *vote_rec);
+void write_autoconfig_header(system_file_disk_record *rec);
+void init_record(system_file_disk_record *rec, int type, int dir);
+
+
+
+system_file_disk_record *record[NUM_SYSTEM_INODES];
+// these 4 do not live in the record[] array
+system_file_disk_record global_alloc_rec; // represents whole volume, not written to disk
+system_file_disk_record superblock_rec;
+system_file_disk_record root_dir_rec;
+system_file_disk_record system_dir_rec;
+
+
+__u32 pagesize_bits=0;
+__u32 blocksize_bits=0;
+__u32 cluster_size_bits=0;
+__u32 blocksize=0;
+__u32 cluster_size=0;
+__u64 volume_size_in_bytes=0;
+__u32 volume_size_in_clusters=0;
+__u64 volume_size_in_blocks=0;
+__u64 reserved_tail_size=0;
+__u32 compat_flags = 0;
+int initial_nodes=0;
+int fd=-1;
+void *mapping;
+char *dev_name = NULL;
+char *vol_label = NULL;
+char *uuid = NULL;
+gid_t default_gid = 0;
+mode_t default_mode = 0;
+uid_t default_uid = 0;
+alloc_bm *global_bm=NULL;
+alloc_bm *system_bm=NULL;
+char *progname = NULL;
+time_t format_time;
+
+
+
+alloc_bm * initialize_bitmap (__u32 bits, __u32 unit_bits, char *name,
+ system_file_disk_record *bm_rec,
+ system_file_disk_record *alloc_rec)
+{
+ alloc_bm *bitmap;
+ __u64 bitmap_len = bm_rec->extent_len;
+
+ bitmap = malloc(sizeof(alloc_bm));
+ if (bitmap == NULL)
+ MKFS_FATAL("could not allocate memory for %s\n", name);
+ memset(bitmap, 0, sizeof(alloc_bm));
+
+ bitmap->buf = memalign(blocksize, bitmap_len);
+ memset(bitmap->buf, 0, bitmap_len);
+
+ bitmap->valid_bits = bits;
+ bitmap->unit_bits = unit_bits;
+ bitmap->unit = 1 << unit_bits;
+ bitmap->name = strdup(name);
+
+ bm_rec->file_size = bitmap_len;
+ bm_rec->fe_off = 0ULL; // set later
+ bm_rec->bi.used_bits = 0;
+ bm_rec->bi.total_bits = bits;
+
+ alloc_rec->file_size = bits << unit_bits;
+ alloc_rec->fe_off = 0ULL; // set later
+
+ bitmap->bm_record = bm_rec;
+ bitmap->alloc_record = alloc_rec;
+
+ return bitmap;
+}
+
+void destroy_bitmap (alloc_bm *bm)
+{
+ free(bm->buf);
+ free(bm);
+}
+
+
+int find_clear_bits (alloc_bm * bitmap, __u32 numBits, __u32 offset)
+{
+ __u32 next_zero, off, count, size, first_zero = -1;
+ void *buffer;
+
+ buffer = bitmap->buf;
+ size = bitmap->valid_bits;
+ count = 0;
+ off = offset;
+
+ while ((size - off + count >= numBits) &&
+ (next_zero = find_next_zero_bit (buffer, size, off)) != size) {
+ if (next_zero >= bitmap->valid_bits)
+ break;
+
+ if (next_zero != off) {
+ first_zero = next_zero;
+ off = next_zero + 1;
+ count = 0;
+ } else {
+ off++;
+ if (count == 0)
+ first_zero = next_zero;
+ }
+
+ count++;
+
+ if (count == numBits)
+ goto bail;
+ }
+ first_zero = -1;
+
+ bail:
+ if (first_zero != -1 && first_zero > bitmap->valid_bits) {
+ fprintf(stderr, "um... first_zero>bitmap->valid_bits (%d > %d)",
+ first_zero, bitmap->valid_bits);
+ first_zero = -1;
+ }
+ return first_zero;
+}
+
+int count_bits (alloc_bm * bitmap)
+{
+ __u32 size, count = 0, off = 0;
+ unsigned char tmp;
+ __u8 *buffer;
+
+ buffer = bitmap->buf;
+
+ size = (bitmap->valid_bits >> 3);
+
+ while (off < size) {
+ memcpy (&tmp, buffer, 1);
+ count += BITCOUNT (tmp);
+ off++;
+ buffer++;
+ }
+ return count;
+}
+
+
+/* returns bytes to avoid any confusion */
+int alloc_bytes_from_bitmap (__u64 bytes, alloc_bm *bm, __u64 *start, __u64 *num)
+{
+ __u32 startbit = 0, numbits = 0;
+ char *p;
+
+ numbits = (bytes + bm->unit - 1) >> bm->unit_bits;
+ startbit = find_clear_bits (bm, numbits, 0);
+ if (startbit == (__u32)-1)
+ MKFS_FATAL("could not allocate %llu bits from %s bitmap\n",
+ numbits, bm->name);
+ *start = ((__u64)startbit) << bm->unit_bits;
+ *num = ((__u64)numbits) << bm->unit_bits;
+ bm->bm_record->bi.used_bits += numbits;
+ p = mapping + *start;
+ memset(p, 0, *num);
+ while (numbits--)
+ set_bit (startbit++, bm->buf);
+ return 0;
+}
+
+/* returns bytes to avoid any confusion */
+int alloc_from_bitmap (__u64 numbits, alloc_bm *bm, __u64 *start, __u64 *num)
+{
+ __u32 startbit = 0;
+ char *p;
+
+ startbit = find_clear_bits (bm, numbits, 0);
+ if (startbit == (__u32)-1)
+ MKFS_FATAL("could not allocate %llu bits from %s bitmap\n",
+ numbits, bm->name);
+ *start = ((__u64)startbit) << bm->unit_bits;
+ *num = ((__u64)numbits) << bm->unit_bits;
+ bm->bm_record->bi.used_bits += numbits;
+ p = mapping + *start;
+ memset(p, 0, *num);
+ while (numbits--)
+ set_bit (startbit++, bm->buf);
+ return 0;
+}
+
+__u64 alloc_inode (int numblocks)
+{
+ __u64 ret, num;
+ alloc_from_bitmap (numblocks, system_bm, &ret, &num);
+ return ret;
+}
+
+funky_dir * alloc_directory(void)
+{
+ funky_dir *dir;
+
+ dir = malloc(sizeof(funky_dir));
+ if (!dir)
+ MKFS_FATAL_STR("could not allocate memory for directory");
+ memset(dir, 0, sizeof(funky_dir));
+ return dir;
+}
+
+void add_entry_to_directory(funky_dir *dir, char *name, __u64 ino, __u8 type)
+{
+ struct ocfs2_dir_entry *de, *de1;
+ int new_rec_len;
+ void *newbuf, *p;
+ int newsize, reclen, reallen;
+
+ new_rec_len = OCFS2_DIR_REC_LEN(strlen(name));
+
+ if (dir->buf) {
+ de = (struct ocfs2_dir_entry *)(dir->buf + dir->last_off);
+ reclen = le16_to_cpu(de->rec_len);
+ reallen = OCFS2_DIR_REC_LEN(de->name_len);
+
+ /* find an area with large enough reclen */
+ if ((le64_to_cpu(de->inode) == 0 && reclen >= new_rec_len) ||
+ (reclen >= reallen + new_rec_len)) {
+ if (le64_to_cpu(de->inode)) {
+ // move ahead just past the last entry
+ de1 = (struct ocfs2_dir_entry *) ((char *) de + reallen);
+ // set the next entry's rec_len to the rest of the block
+ de1->rec_len = cpu_to_le16(le16_to_cpu(de->rec_len) - reallen);
+ // shorten the last entry
+ de->rec_len = cpu_to_le16(reallen);
+ de = de1;
+ }
+ goto got_it;
+ }
+ /* no space, add more */
+ newsize = dir->record->file_size + blocksize; // add one block
+ } else
+ newsize = blocksize; // add one block
+
+ newbuf = memalign(blocksize, newsize);
+ if (newbuf == NULL)
+ MKFS_FATAL_STR("failed to grow directory");
+
+ if (dir->buf) {
+ memcpy(newbuf, dir->buf, dir->record->file_size);
+ free(dir->buf);
+ p = newbuf + dir->record->file_size;
+ memset(p, 0, blocksize);
+ } else {
+ p = newbuf;
+ memset(newbuf, 0, newsize);
+ }
+
+ dir->buf = newbuf;
+ dir->record->file_size = newsize;
+
+ de = (struct ocfs2_dir_entry *)p;
+ de->inode = 0;
+ de->rec_len = cpu_to_le16(blocksize);
+
+got_it:
+ de->name_len = strlen(name);
+ de->inode = cpu_to_le64(ino);
+ de->file_type = type;
+ strcpy(de->name, name);
+ dir->last_off = ((char *)de - (char *)dir->buf);
+ if (type == OCFS2_FT_DIR)
+ dir->record->links++;
+}
+
+
+
+#define SYSTEM_FILE_NAME_MAX 40
+
+static inline __u32 blocks_needed(void)
+{
+ __u32 num;
+
+ /*
+ * leading space ???
+ * superblock
+ * global bm fe
+ * system bm fe
+ * system alloc fe
+ * root inode fe
+ * system inode fe
+ * autoconf fe
+ * publish fe
+ * vote fe
+ * autoconf sectors
+ * publish sectors
+ * vote sectors
+ * (extent_alloc, extent_alloc_bitmap, inode_alloc,
+ * inode_alloc_bitmap, journal) x initial_nodes
+ * slop ;-)
+ */
+ num = LEADING_SPACE_BLOCKS;
+ num += SUPERBLOCK_BLOCKS;
+ num += FILE_ENTRY_BLOCKS;
+ num += AUTOCONF_BLOCKS(initial_nodes, 32);
+ num += PUBLISH_BLOCKS(initial_nodes, 32);
+ num += VOTE_BLOCKS(initial_nodes, 32);
+ num += (initial_nodes * NUM_LOCAL_SYSTEM_FILES);
+ num += SLOP_BLOCKS;
+ return num;
+}
+
+static inline __u32 system_dir_blocks_needed(void)
+{
+ int bytes_needed = 0;
+ int each = OCFS2_DIR_REC_LEN(SYSTEM_FILE_NAME_MAX);
+ int entries_per_block = blocksize / each;
+
+ /* blocks_needed() is way more than the number of filenames... */
+ bytes_needed = (blocks_needed() + entries_per_block - 1 / entries_per_block) << blocksize_bits;
+ return (bytes_needed + cluster_size - 1) >> cluster_size_bits;
+}
+
+void adjust_volume_size()
+{
+ __u32 max;
+ __u64 vsize = volume_size_in_bytes -
+ (MIN_RESERVED_TAIL_BLOCKS << blocksize_bits);
+
+ max = MAX(pagesize_bits, blocksize_bits);
+ max = MAX(max, cluster_size_bits);
+ vsize >>= max;
+ vsize <<= max;
+ volume_size_in_blocks = vsize >> blocksize_bits;
+ volume_size_in_clusters = vsize >> cluster_size_bits;
+ reserved_tail_size = volume_size_in_bytes - vsize;
+ volume_size_in_bytes = vsize;
+}
+
+static inline __u32 mmap_len(void);
+
+/* total guess */
+static inline size_t mmap_len(void)
+{
+ size_t ret;
+
+ ret = initial_nodes * OCFS2_DEFAULT_JOURNAL_SIZE;
+ ret += (10 * ONE_MEGA_BYTE);
+ return ret;
+}
+
+void map_device()
+{
+ mapping = mmap(NULL, mmap_len(), PROT_READ | PROT_WRITE, MAP_NORESERVE | MAP_SHARED, fd, 0);
+ if (mapping==MAP_FAILED)
+ MKFS_FATAL("could not mmap the device: %s", strerror(errno));
+}
+void sync_device()
+{
+ if (msync(mapping, mmap_len(), MS_SYNC))
+ MKFS_FATAL_STR("could not sync the device");
+}
+
+void unmap_device()
+{
+ if (munmap(mapping, mmap_len()))
+ MKFS_FATAL_STR("could not munmap the device");
+}
+
+
+void init_format_time()
+{
+ format_time = time(NULL);
+}
+
+
+
+void format_superblock(system_file_disk_record *rec, system_file_disk_record *root_rec, system_file_disk_record *sys_rec)
+{
+ ocfs2_dinode *di;
+ __u64 super_off = rec->fe_off;
+
+ di = mapping + super_off;
+ memset(di, 0, blocksize);
+
+ /* many of these fields will be unused for now, but at least
+ * let's init them to some sane values */
+
+ strcpy (di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE);
+ di->i_suballoc_node = cpu_to_le16((__u16)-1);
+ di->i_suballoc_blkno = cpu_to_le64(super_off >> blocksize_bits);
+
+ di->i_atime = 0; // unused
+ di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
+ di->i_mtime = cpu_to_le64(format_time); // use this as s_mtime (mount time)
+ di->i_blkno = cpu_to_le64(super_off >> blocksize_bits);
+ di->i_flags = cpu_to_le32(OCFS2_VALID_FL | OCFS2_SYSTEM_FL | OCFS2_SUPER_BLOCK_FL);
+ di->id2.i_super.s_major_rev_level = cpu_to_le16(OCFS2_MAJOR_REV_LEVEL);
+ di->id2.i_super.s_minor_rev_level = cpu_to_le16(OCFS2_MINOR_REV_LEVEL);
+ di->id2.i_super.s_root_blkno = cpu_to_le64(root_rec->fe_off >> blocksize_bits);
+ di->id2.i_super.s_system_dir_blkno = cpu_to_le64(sys_rec->fe_off >> blocksize_bits);
+ di->id2.i_super.s_mnt_count = 0;
+ di->id2.i_super.s_max_mnt_count = cpu_to_le16(OCFS2_DFL_MAX_MNT_COUNT);
+ di->id2.i_super.s_state = 0;
+ di->id2.i_super.s_errors = 0;
+ di->id2.i_super.s_lastcheck = cpu_to_le64(format_time);
+ di->id2.i_super.s_checkinterval = cpu_to_le32(OCFS2_DFL_CHECKINTERVAL);
+ di->id2.i_super.s_creator_os = cpu_to_le32(OCFS2_OS_LINUX);
+ di->id2.i_super.s_blocksize_bits = cpu_to_le32(blocksize_bits);
+ di->id2.i_super.s_clustersize_bits = cpu_to_le32(cluster_size_bits);
+ di->id2.i_super.s_max_nodes = cpu_to_le32(initial_nodes);
+ if (strlen(vol_label) > 63)
+ MKFS_FATAL_STR("volume label > 63 bytes long");
+ strcpy(di->id2.i_super.s_label, vol_label);
+ memcpy(di->id2.i_super.s_uuid, uuid, 16);
+}
+
+
+void format_file(system_file_disk_record *rec)
+{
+ ocfs2_dinode *di;
+ int mode = default_mode | (rec->dir ? S_IFDIR : S_IFREG);
+ __u32 clusters;
+
+ //rec->flags |= (OCFS2_VALID_FL | OCFS2_SYSTEM_FL);
+
+ clusters = (rec->extent_len + cluster_size - 1) >> cluster_size_bits;
+
+ di = mapping + rec->fe_off;
+ memset(di, 0, blocksize);
+ strcpy (di->i_signature, OCFS2_FILE_ENTRY_SIGNATURE);
+ di->i_generation = 0;
+ di->i_suballoc_node = cpu_to_le16(-1);
+ di->i_suballoc_blkno = cpu_to_le64(rec->fe_off >> blocksize_bits);
+ di->i_blkno = cpu_to_le64(rec->fe_off >> blocksize_bits);
+ di->i_uid = cpu_to_le32(default_uid);
+ di->i_gid = cpu_to_le32(default_gid);
+ di->i_size = cpu_to_le64(rec->file_size);
+ di->i_mode = cpu_to_le16(mode);
+ di->i_links_count = cpu_to_le16(rec->links);
+ di->i_flags = cpu_to_le32(rec->flags);
+ di->i_atime = di->i_ctime = di->i_mtime = cpu_to_le64(format_time);
+ di->i_dtime = 0;
+ di->i_clusters = cpu_to_le32(clusters);
+ di->id2.i_list.l_next_free_rec = cpu_to_le16(0);
+ di->id2.i_list.l_tree_depth = cpu_to_le16(-1);
+ if (rec->extent_len) {
+ di->id2.i_list.l_next_free_rec = cpu_to_le16(1);
+ di->id2.i_list.l_recs[0].e_cpos = 0;
+ di->id2.i_list.l_recs[0].e_clusters = cpu_to_le32(clusters);
+ di->id2.i_list.l_recs[0].e_blkno = cpu_to_le64(rec->extent_off >> blocksize_bits);
+ }
+ if (rec->flags & OCFS2_BITMAP_FL) {
+ di->id1.bitmap1.i_used = cpu_to_le32(rec->bi.used_bits);
+ di->id1.bitmap1.i_total = cpu_to_le32(rec->bi.total_bits);
+ }
+}
+
+
+void write_bitmap_data(alloc_bm *bm)
+{
+ system_file_disk_record *rec = bm->bm_record;
+ memset(mapping + rec->extent_off, 0, rec->extent_len);
+ memcpy(mapping + rec->extent_off, bm->buf, rec->file_size);
+}
+
+void write_directory_data(funky_dir *dir)
+{
+ system_file_disk_record *rec = dir->record;
+ memset(mapping + rec->extent_off, 0, rec->extent_len);
+ memcpy(mapping + rec->extent_off, dir->buf, rec->file_size);
+}
+
+void format_leading_space(__u64 start)
+{
+ int num_blocks = 2; // 2 blocks were allocated
+ ocfs1_vol_disk_hdr *hdr;
+ ocfs1_vol_label *lbl;
+ char *p;
+
+ p = mapping + start;
+ memset(p, 2, num_blocks << blocksize_bits);
+
+ hdr = (ocfs1_vol_disk_hdr *)p;
+ strcpy(hdr->signature, "this is an ocfs2 volume");
+ strcpy(hdr->mount_point, "this is an ocfs2 volume");
+
+ p += 512;
+ lbl = (ocfs1_vol_label *)p;
+ strcpy(lbl->label, "this is an ocfs2 volume");
+ strcpy(lbl->cluster_name, "this is an ocfs2 volume");
+}
+
+void replacement_journal_create(__u64 journal_off)
+{
+ journal_superblock_t *sb;
+ char *p;
+
+ p = mapping + journal_off;
+ /* zero out all 8mb and stamp this little sb header on it */
+ sb = (journal_superblock_t *) p;
+ memset(sb, 0, OCFS2_DEFAULT_JOURNAL_SIZE);
+
+ sb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER);
+ sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
+ sb->s_blocksize = htonl(blocksize);
+ sb->s_maxlen = htonl(OCFS2_DEFAULT_JOURNAL_SIZE >> blocksize_bits);
+ sb->s_first = htonl(1);
+ sb->s_start = htonl(1);
+ sb->s_sequence = htonl(1);
+ sb->s_errno = htonl(0);
+}
+
+void init_device(void)
+{
+ fd = open(dev_name, O_RDWR);
+ if (fd == -1)
+ MKFS_FATAL("could not open device %d for read/write", dev_name);
+}
+
+
+#define ONE_GB_SHIFT 30
+
+int initial_nodes_for_volume(__u64 size);
+
+/* this is just silly guesswork if the user does not
+ * provide a number for initial_nodes */
+int initial_nodes_for_volume(__u64 size)
+{
+ int shift = ONE_GB_SHIFT;
+ int ret, i;
+
+ /*
+ * <1gb -> 2 nodes
+ * <8gb -> 4 nodes
+ * <64gb -> 8 nodes
+ * <512gb -> 16 nodes
+ * 512+gb -> 32 nodes
+ */
+
+ for (i=0, shift = ONE_GB_SHIFT; i<4; i++, shift += 3) {
+ size >>= shift;
+ if (!size)
+ break;
+ }
+ switch (i)
+ {
+ case 0:
+ ret = 2;
+ break;
+ case 1:
+ ret = 4;
+ break;
+ case 2:
+ ret = 8;
+ break;
+ case 3:
+ ret = 16;
+ break;
+ default:
+ ret = 32;
+ break;
+ }
+ return ret;
+}
+
+void init_globals(void)
+{
+ size_t pagesize;
+ int i;
+ __u32 tmp;
+ unsigned long long tmp2;
+
+ pagesize = getpagesize();
+ pagesize_bits = 0;
+ for (i=32; i>=0; i--) {
+ if ((1 << i) == pagesize)
+ pagesize_bits = i;
+ }
+ if (!pagesize_bits)
+ MKFS_FATAL("could not get pagesize_bits for pagesize %d", pagesize);
+
+ if (blocksize) {
+ printf("blocksize was set manually: %lu\n", blocksize);
+ } else {
+ if (ioctl(fd, BLKSSZGET, &tmp) == -1)
+ MKFS_FATAL_STR("could not get sector size for device");
+ blocksize = tmp;
+ }
+ blocksize_bits = 0;
+ for (i=32; i>=0; i--) {
+ if ((1 << i) == blocksize)
+ blocksize_bits = i;
+ }
+ if (!blocksize_bits)
+ MKFS_FATAL("could not get blocksize_bits for blocksize %lu", blocksize);
+
+ cluster_size_bits = 0;
+ for (i=32; i>=0; i--) {
+ if ((1 << i) == cluster_size)
+ cluster_size_bits = i;
+ }
+ if (!cluster_size_bits)
+ MKFS_FATAL("could not get cluster_size_bits for cluster_size %lu", cluster_size);
+
+ /* these will be readjusted later */
+ tmp2 = lseek64(fd, 0, SEEK_END);
+ if (volume_size_in_bytes) {
+ printf("volume size was set manually: %llu, real size: %llu\n",
+ volume_size_in_bytes, tmp2);
+ } else {
+ volume_size_in_bytes = tmp2;
+ }
+ volume_size_in_clusters = volume_size_in_bytes >> cluster_size_bits;
+ volume_size_in_blocks = (volume_size_in_clusters << cluster_size_bits) >> blocksize_bits;
+ reserved_tail_size = 0;
+
+ if (initial_nodes) {
+ if (initial_nodes < 2 || initial_nodes > OCFS2_MAX_NODES)
+ MKFS_FATAL("initial_nodes given (%lu) out of range", initial_nodes);
+ printf("initial_nodes was set manually: %lu\n", initial_nodes);
+ } else {
+ initial_nodes = initial_nodes_for_volume(volume_size_in_bytes);
+ printf("using %lu for initial_nodes\n", initial_nodes);
+ }
+}
+
+void generate_uuid(void)
+{
+ int randfd = 0;
+ int readlen = 0;
+ int len = 0;
+
+ if ((randfd = open("/dev/urandom", O_RDONLY)) == -1)
+ MKFS_FATAL("error opening /dev/urandom: %s", strerror(errno));
+
+ uuid = malloc(MAX_VOL_ID_LENGTH);
+ if (!uuid)
+ MKFS_FATAL_STR("could not allocate memory");
+
+ while (readlen < MAX_VOL_ID_LENGTH)
+ {
+ if ((len = read(randfd, uuid + readlen, MAX_VOL_ID_LENGTH - readlen)) == -1)
+ MKFS_FATAL("error reading from /dev/urandom: %s", strerror(errno));
+ readlen += len;
+ }
+
+ close(randfd);
+}
+
+
+void usage(void)
+{
+ // "b:c:v:C:n:g:u:m:d:l:U:"
+ fprintf(stderr, "usage: mkfs2 [--blocksize=bytes] [--mode=##] [--uuid=id]\n");
+ fprintf(stderr, " [--volumesize=bytes] [--compatflags=##]\n");
+ fprintf(stderr, " [--nodes=##] [--gid=##] [--uid=##]\n");
+ fprintf(stderr, " --clustersize=bytes --device=/dev/name\n");
+ fprintf(stderr, " --label=\"volume label\"\n");
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+void process_args(int argc, char **argv)
+{
+ int c;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"blocksize", 1, 0, 'b'},
+ {"clustersize", 1, 0, 'c'},
+ {"volumesize", 1, 0, 'v'},
+ {"compatflags", 0, 0, 'C'},
+ {"nodes", 1, 0, 'n'},
+ {"gid", 1, 0, 'g'},
+ {"uid", 1, 0, 'u'},
+ {"mode", 1, 0, 'm'},
+ {"device", 1, 0, 'd'},
+ {"label", 1, 0, 'l'},
+ {"uuid", 1, 0, 'U'},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long (argc, argv, "b:c:v:c:n:g:u:m:d:l:U:", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'b':
+ blocksize = strtoul(optarg, NULL, 10);
+ break;
+ case 'c':
+ cluster_size = strtoul(optarg, NULL, 10);
+ break;
+ case 'v':
+ volume_size_in_bytes = strtoull(optarg, NULL, 10);
+ break;
+ case 'C':
+ compat_flags = strtoul(optarg, NULL, 10);
+ break;
+ case 'n':
+ initial_nodes = strtoul(optarg, NULL, 10);
+ break;
+ case 'g':
+ default_gid = strtoul(optarg, NULL, 10);
+ break;
+ case 'u':
+ default_uid = strtoul(optarg, NULL, 10);
+ break;
+ case 'm':
+ default_mode = strtoul(optarg, NULL, 0);
+ break;
+ case 'd':
+ dev_name = strdup(optarg);
+ break;
+ case 'l':
+ vol_label = strdup(optarg);
+ break;
+ case 'U':
+ uuid = strdup(optarg);
+ break;
+ case '?':
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (optind < argc) {
+ if (dev_name)
+ free(dev_name);
+ dev_name = strdup(argv[optind]);
+ }
+ if (!vol_label) {
+ MKFS_WARN_STR("you must give a volume label");
+ usage();
+ }
+ if (!dev_name) {
+ MKFS_WARN_STR("you must give a volume label");
+ usage();
+ }
+ if (!cluster_size) {
+ MKFS_WARN_STR("you must give a cluster size");
+ usage();
+ }
+}
+
+void adjust_autoconfig_publish_vote(system_file_disk_record *autoconfig_rec,
+ system_file_disk_record *publish_rec,
+ system_file_disk_record *vote_rec)
+{
+ /* whole block was allocated to autoconfig, now divvy it up */
+ __u64 apv_data = autoconfig_rec->extent_off;
+ __u64 apv_data_len = autoconfig_rec->extent_len;
+ __u64 vblocks, ablocks = AUTOCONF_BLOCKS(initial_nodes, 1),
+ pblocks = PUBLISH_BLOCKS(initial_nodes, 1);
+
+ /* autoconf and publish get just enough, vote gets all the rest. */
+ /* this way we can easily tune up to 32 nodes without having to */
+ /* move these, and still keep them contiguous all the time. */
+ vblocks = ((apv_data_len >> blocksize_bits) - ablocks - pblocks);
+
+ autoconfig_rec->extent_off = apv_data;
+ autoconfig_rec->file_size =
+ autoconfig_rec->extent_len = ablocks << blocksize_bits;
+
+ publish_rec->extent_off = autoconfig_rec->extent_off + autoconfig_rec->extent_len;
+ publish_rec->file_size =
+ publish_rec->extent_len = pblocks << blocksize_bits;
+
+ vote_rec->extent_off = publish_rec->extent_off + publish_rec->extent_len;
+ vote_rec->file_size =
+ vote_rec->extent_len = vblocks << blocksize_bits;
+}
+
+void write_autoconfig_header(system_file_disk_record *rec)
+{
+ ocfs_node_config_hdr *hdr;
+
+ hdr = (mapping + rec->extent_off);
+ memset(hdr, 0, blocksize);
+ strcpy(hdr->signature, OCFS2_NODE_CONFIG_HDR_SIGN);
+ hdr->version = OCFS2_NODE_CONFIG_VER;
+ hdr->num_nodes = 0;
+ hdr->disk_lock.dl_master = -1;
+ hdr->last_node = 0;
+}
+void init_record(system_file_disk_record *rec, int type, int dir)
+{
+ memset(rec, 0, sizeof(system_file_disk_record));
+ rec->flags = OCFS2_VALID_FL | OCFS2_SYSTEM_FL;
+ rec->dir = dir;
+ if (dir)
+ rec->links = 0;
+ else
+ rec->links = 1;
+ rec->bi.used_bits = rec->bi.total_bits = 0;
+ rec->flags = (OCFS2_VALID_FL | OCFS2_SYSTEM_FL);
+
+ switch (type) {
+ case sfi_journal:
+ //rec->flags |= OCFS2_JOURNAL_FL;
+ break;
+ case sfi_bitmap:
+ rec->flags |= OCFS2_BITMAP_FL;
+ break;
+ case sfi_alloc:
+ //rec->flags |= OCFS2_ALLOC_FL;
+ break;
+ case sfi_local_alloc:
+ rec->flags |= OCFS2_LOCAL_ALLOC_FL;
+ break;
+ case sfi_other:
+ break;
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ __u64 allocated;
+ __u32 need;
+ char fname[SYSTEM_FILE_NAME_MAX];
+ int i, j, num;
+ __u64 leading_space;
+ funky_dir *orphan_dir;
+ funky_dir *root_dir;
+ funky_dir *system_dir;
+ system_file_disk_record *tmprec, *tmprec2, *tmprec3;
+
+ progname = strdup(argv[0]);
+ process_args(argc, argv);
+ init_format_time();
+ init_device();
+ init_globals();
+ adjust_volume_size();
+ map_device();
+ generate_uuid();
+
+ /*
+ * ALLOCATE STUFF
+ */
+ // dummy record representing the whole volume
+ init_record(&global_alloc_rec, sfi_alloc, 0);
+ global_alloc_rec.extent_off = 0;
+ global_alloc_rec.extent_len = volume_size_in_bytes;
+
+ init_record(&superblock_rec, sfi_other, 0);
+ init_record(&root_dir_rec, sfi_other, 1);
+ init_record(&system_dir_rec, sfi_other, 1);
+
+ for (i=0; i<NUM_SYSTEM_INODES; i++) {
+ num = (system_files[i].global ? 1 : initial_nodes);
+ record[i] = malloc(sizeof(system_file_disk_record) * num);
+ if (record[i] == NULL)
+ MKFS_FATAL_STR("could not allocate memory for system file disk records");
+ for (j=0; j < num; j++)
+ init_record(&record[i][j], system_files[i].type, system_files[i].dir);
+ }
+
+ root_dir = alloc_directory();
+ system_dir = alloc_directory();
+ orphan_dir = alloc_directory();
+
+ /*
+ * INITIALIZE BITMAPS
+ */
+
+ /* create an alloc_bm for the global bitmap and align bytes up to next whole cluster.
+ extent_off is not yet known, since it must be allocated from itself. */
+ need = (volume_size_in_clusters+7) >> 3;
+ need = ((need + cluster_size - 1) >> cluster_size_bits) << cluster_size_bits;
+ tmprec = &(record[GLOBAL_BITMAP_SYSTEM_INODE][0]);
+ tmprec->extent_off = 0; // need to fill this in later
+ tmprec->extent_len = need;
+
+ global_bm = initialize_bitmap (volume_size_in_clusters, cluster_size_bits,
+ "global bitmap", tmprec, &global_alloc_rec);
+
+ /* assign some space from global_bm to system_bm for data and bitmap blocks */
+ tmprec = &(record[GLOBAL_INODE_ALLOC_SYSTEM_INODE][0]);
+ tmprec2 = &(record[GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE][0]);
+ need = blocks_needed();
+ alloc_bytes_from_bitmap (need << blocksize_bits, global_bm,
+ &(tmprec->extent_off), &(tmprec->extent_len));
+
+ need = ((((need+7) >> 3) + cluster_size - 1) >> cluster_size_bits) << cluster_size_bits;
+ alloc_bytes_from_bitmap (need, global_bm, &(tmprec2->extent_off),
+ &(tmprec2->extent_len));
+
+ /* create an alloc_bm for the system inode bitmap */
+ system_bm = initialize_bitmap(tmprec->extent_len >> blocksize_bits, blocksize_bits,
+ "system inode bitmap", tmprec2, tmprec);
+
+
+ /*
+ * ALLOCATE INODES AND DIRECTORIES
+ */
+
+ /* leading space */
+ leading_space = alloc_inode(LEADING_SPACE_BLOCKS);
+ if (leading_space != 0ULL)
+ MKFS_FATAL("leading space blocks start at byte %llu, must start at 0\n", leading_space);
+
+
+ /* superblock */
+ superblock_rec.fe_off = alloc_inode(SUPERBLOCK_BLOCKS);
+ if (superblock_rec.fe_off != MAGIC_SUPERBLOCK_BLOCK_NUMBER << blocksize_bits)
+ MKFS_FATAL("superblock starts at byte %llu, must start at %llu\n",
+ superblock_rec.fe_off, MAGIC_SUPERBLOCK_BLOCK_NUMBER << blocksize_bits);
+
+
+ /* root directory */
+ alloc_from_bitmap (1, global_bm, &root_dir_rec.extent_off, &root_dir_rec.extent_len);
+ root_dir_rec.fe_off = alloc_inode(1);
+ root_dir->record = &root_dir_rec;
+ add_entry_to_directory(root_dir, ".", root_dir_rec.extent_off, OCFS2_FT_DIR);
+ add_entry_to_directory(root_dir, "..", root_dir_rec.extent_off, OCFS2_FT_DIR);
+
+
+ /* system directory */
+ need = system_dir_blocks_needed();
+ alloc_from_bitmap (need, global_bm, &system_dir_rec.extent_off, &system_dir_rec.extent_len);
+ system_dir_rec.fe_off = alloc_inode(1);
+ system_dir->record = &system_dir_rec;
+ add_entry_to_directory(system_dir, ".", system_dir_rec.extent_off, OCFS2_FT_DIR);
+ add_entry_to_directory(system_dir, "..", system_dir_rec.extent_off, OCFS2_FT_DIR);
+ /* alloc and add all local system file inodes to system directory */
+ for (i=0; i<NUM_SYSTEM_INODES; i++) {
+ num = (system_files[i].global) ? 1 : initial_nodes;
+ for (j=0; j < num; j++) {
+ record[i][j].fe_off = alloc_inode(1);
+ sprintf(fname, system_files[i].name, j);
+ add_entry_to_directory(system_dir, fname, record[i][j].fe_off,
+ system_files[i].dir ? OCFS2_FT_DIR : OCFS2_FT_REG_FILE);
+ }
+ }
+
+ /* autoconfig, publish, vote data */
+ /* XXX: ok this is messy ;-) */
+ /* give everything to autoconfig, then adjust it */
+ tmprec = &(record[AUTOCONFIG_SYSTEM_INODE][0]);
+ tmprec2 = &(record[PUBLISH_SYSTEM_INODE][0]);
+ tmprec3 = &(record[VOTE_SYSTEM_INODE][0]);
+ need = (AUTOCONF_BLOCKS(initial_nodes, 32) +
+ PUBLISH_BLOCKS(initial_nodes, 32) +
+ VOTE_BLOCKS(initial_nodes, 32));
+ tmprec->extent_off = alloc_inode(need);
+ tmprec->extent_len = need << blocksize_bits;
+ adjust_autoconfig_publish_vote(tmprec, tmprec2, tmprec3);
+
+
+ /* orphan dir */
+ tmprec = &record[ORPHAN_DIR_SYSTEM_INODE][0];
+ orphan_dir->record = tmprec;
+ alloc_from_bitmap (1, global_bm, &tmprec->extent_off, &tmprec->extent_len);
+ add_entry_to_directory(orphan_dir, ".", tmprec->extent_off, OCFS2_FT_DIR);
+ add_entry_to_directory(orphan_dir, "..", tmprec->extent_off, OCFS2_FT_DIR);
+
+
+ /* finally, allocate (extent_off) the space for the global bitmap from itself */
+ tmprec = global_bm->bm_record;
+ alloc_bytes_from_bitmap (tmprec->extent_len, global_bm,
+ &(tmprec->extent_off), &allocated);
+
+
+ /*
+ * FORMAT BLOCKS
+ */
+ format_leading_space(leading_space);
+ format_superblock(&superblock_rec, &root_dir_rec, &system_dir_rec);
+
+ format_file(&root_dir_rec);
+ format_file(&system_dir_rec);
+
+ for (i=0; i<NUM_SYSTEM_INODES; i++) {
+ num = (system_files[i].global ? 1 : initial_nodes);
+ for (j=0; j<num; j++) {
+ tmprec = &(record[i][j]);
+ if (system_files[i].type == sfi_journal) {
+ alloc_bytes_from_bitmap(OCFS2_DEFAULT_JOURNAL_SIZE, global_bm,
+ &(tmprec->extent_off), &(tmprec->extent_len));
+ replacement_journal_create(tmprec->extent_off);
+ tmprec->file_size = tmprec->extent_len;
+ }
+ format_file(tmprec);
+ }
+ }
+
+ /*
+ * WRITE BITMAPS
+ */
+ write_bitmap_data(global_bm);
+ write_bitmap_data(system_bm);
+
+ /*
+ * WRITE DIRECTORIES
+ */
+ write_directory_data(root_dir);
+ write_directory_data(system_dir);
+ write_directory_data(orphan_dir);
+
+ write_autoconfig_header(&record[AUTOCONFIG_SYSTEM_INODE][0]);
+ /*
+ * SYNC TO DISK
+ */
+ sync_device();
+ unmap_device();
+ close(fd);
+
+ return 0;
+}
+
+
+void version(char *progname)
+{
+ printf("%s %s %s (build %s)\n", progname,
+ OCFS2_BUILD_VERSION,
+ OCFS2_BUILD_DATE,
+ OCFS2_BUILD_MD5);
+ return;
+} /* version */
+
+
More information about the Ocfs-tools-commits
mailing list