[Ocfs2-commits] jlbec commits r2303 - in branches/alloc_inode: . fs/ocfs2 fs/ocfs2/cluster fs/ocfs2/dlm

Fri May 20 22:32:26 CDT 2005

Author: jlbec
Date: 2005-05-20 22:32:25 -0500 (Fri, 20 May 2005)
New Revision: 2303

Modified:
   branches/alloc_inode/README
   branches/alloc_inode/configure.in
   branches/alloc_inode/fs/ocfs2/Makefile
   branches/alloc_inode/fs/ocfs2/alloc.c
   branches/alloc_inode/fs/ocfs2/alloc.h
   branches/alloc_inode/fs/ocfs2/cluster/heartbeat.c
   branches/alloc_inode/fs/ocfs2/cluster/heartbeat.h
   branches/alloc_inode/fs/ocfs2/cluster/tcp.c
   branches/alloc_inode/fs/ocfs2/dcache.c
   branches/alloc_inode/fs/ocfs2/dlm/dlmapi.h
   branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c
   branches/alloc_inode/fs/ocfs2/dlm/dlmunlock.c
   branches/alloc_inode/fs/ocfs2/dlmglue.c
   branches/alloc_inode/fs/ocfs2/dlmglue.h
   branches/alloc_inode/fs/ocfs2/file.c
   branches/alloc_inode/fs/ocfs2/heartbeat.c
   branches/alloc_inode/fs/ocfs2/heartbeat.h
   branches/alloc_inode/fs/ocfs2/inode.c
   branches/alloc_inode/fs/ocfs2/journal.c
   branches/alloc_inode/fs/ocfs2/mmap.c
   branches/alloc_inode/fs/ocfs2/namei.c
   branches/alloc_inode/fs/ocfs2/ocfs.h
   branches/alloc_inode/fs/ocfs2/ocfs2_fs.h
   branches/alloc_inode/fs/ocfs2/super.c
Log:

Merged 2267:2302 from trunk:
	- [2268] Proper formats for loff_t.
	- [2271] Bump versions.
	- [2272] Update README.
	- [2274] Remove workaround for broken applications.
	- [2275] Turn #warnings into comments.
	- [2279] Factor out oracore workarounds.
	- [2280] Fix oops on failed mount.
	- [2281] Remove incorrect comment.
	- [2282] Fix root inode refcount.
	- [2283] Fail mount instead of BUG() when on-disk clustersize
		 is bad.
	- [2284] Barrier support.
	- [2285] Fix LVB alignment.
	- [2287] Whitespace.
	- [2288] Fix net handler comparisons.
	- [2289] Remove unused code.
	- [2290] Better DLM error output.
	- [2292] Subsecond timestamps.
	- [2293] dlm_lock leaks.
	- [2294] Bump versions.
	- [2295] Truncate log coalescing.
	- [2300] Remove bugs assert.
	- [2301] Pack ocfs2_dir_entry.
	- [2302] Automatic flushing of the truncate log.



Modified: branches/alloc_inode/README
===================================================================

--- branches/alloc_inode/README	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/README	2005-05-21 03:32:25 UTC (rev 2303)
@@ -1,92 +1,31 @@
-        OCFS Version 2 BETA 10
+OCFS2 filesystem
+==================
+OCFS2 is a shared disk cluster file system with many similarities to
+ext3. You'll want to install the tools in order to at least get
+"mount.ocfs2".
 
-*** WARNING WARNING WARNING ***
-This is BETA software. It should absolutely NOT be run on production
-systems.  If you are looking to run OCFS on a production system, check
-out OCFS version 1.
+Project web page:    http://oss.oracle.com/projects/ocfs2
+Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
+OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
 
-WHAT IS OCFS2?
- OCFS2 is the latest version of the Oracle Cluster File System
- software. Where OCFS1 was designed specifically "for Oracle Database
- files", OCFS2 supports a shared ORACLE_HOME installation. New features 
- include:
+All code copyright 2005 Oracle except when otherwise noted.
 
- * Shared ORACLE_HOME
- * Improved performance of meta data operations (space allocation, 
-   locking, etc).
- * Improved meta data caching.
- * Improved data caching (for files such as oracle binaries, libraries, etc)
- * Network based DLM is used by default.
- * Improved journaling / node recovery - we now use the Linux Kernel "JBD" 
-   subsystem
- * Keep the same performance for Oracle data files as OCFS1.
- * CDSL for node specific files
- * POSIX compliance
+CREDITS:
+Lots of code taken from ext3 and other projects.
 
-INSTALLING OCFS2
- Step 1: Download and install OCFS2 and OCFS2-Tools
+Authors in alphabetical order:
+Joel Becker   <joel.becker at oracle.com>
+Zach Brown    <zach.brown at oracle.com>
+Mark Fasheh   <mark.fasheh at oracle.com>
+Kurt Hackel   <kurt.hackel at oracle.com>
+Sunil Mushran <sunil.mushran at oracle.com>
+Manish Singh  <manish.singh at oracle.com>
 
- To install you need to download the OCFS2 Beta RPMS for your system
- from the following location:
+Mount options
+=============
 
-    http://oss.oracle.com/projects/ocfs2/files/
+OCFS2 supports the following mount options:
+(*) == default
 
- You will need the module RPM for your kernel version and the
- ocfs2-support rpm for your distribution.
-
- Also, download the latest OCFS2-Tools rpm (version 0.99.4) from the
- following location:
-
-    http://oss.oracle.com/projects/ocfs2-tools/files/
-
- Install the packages using your package manager. The OCFS2 packages
- are designed to co-exist with an existing OCFS1 installation.
-
- Step 2: Configure a cluster
-
- The easiest way to do this is to use the GUI cluster configuration
- utility in ocfs2console.
-
- If you wish to do this by hand, see the file README.O2CB in the
- ocfs2-tools distribution.
-
- Step 3: Create a file system
-
- The example below assumes you want an OCFS2 file system on
- "/dev/sdb1" and are mounting it at "/ocfs2". Replace those values with
- your own.
-
- Creating a new OCFS2 file system is as easy as running the
- "mkfs.ocfs2" program. As a default it picks a block size of 4K and a
- cluster size of 4K and a file system which can contain up to 4
- nodes. You can specify your own options to override the
- defaults (please see the man page). If you're planning to run Oracle
- in a shared home configuration, we recommend you use a cluster size
- between 4K and 16K.
-
-    $ mkfs.ocfs2 /dev/sdb1
-
- Step 4: Mount the file system
-
-    $ mount -t ocfs2 /dev/sdb1 /ocfs2
-
-REPORTING BUGS
- To report OCFS2 bugs, please use bugzilla:
-
-    http://oss.oracle.com/bugzilla/buglist.cgi?product=OCFS2
-
- Please try to be as verbose in bug reports as possible.
-
-KNOWN ISSUES
- People upgrading from previous versions should upgrade all nodes before
- remounting the file system as some incompatible dlm fixes have gone
- in.
-
- Please see bugzilla for an up to date list of issues.
-
-MAILING LISTS
- Please join the OCFS2 mailing lists at: 
-
-    http://oss.oracle.com/projects/ocfs2/mailman/
-
-
+barrier=1		This enables/disables barriers. barrier=0 disables it,
+			barrier=1 enables it.

Modified: branches/alloc_inode/configure.in
===================================================================
--- branches/alloc_inode/configure.in	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/configure.in	2005-05-21 03:32:25 UTC (rev 2303)
@@ -9,14 +9,14 @@
 # Adjust these for the software version.
 MAJOR_VERSION=0
 MINOR_VERSION=99
-MICRO_VERSION=8
-EXTRA_VERSION=BETA12
+MICRO_VERSION=9
+EXTRA_VERSION=BETA13
 
 # Adjust this only to bump the RPM packaging version
 RPM_VERSION=1
 
 # Required version of ocfs-tools
-TOOLS_REQUIRED_VERSION=0.99.6
+TOOLS_REQUIRED_VERSION=0.99.8
 
 DIST_VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION
 VERSION=$DIST_VERSION-$EXTRA_VERSION

Modified: branches/alloc_inode/fs/ocfs2/Makefile
===================================================================
--- branches/alloc_inode/fs/ocfs2/Makefile	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/Makefile	2005-05-21 03:32:25 UTC (rev 2303)
@@ -21,7 +21,7 @@
 EXTRA_CFLAGS += -DDEBUG
 endif
 
-EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES
+EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES -DOCFS2_ORACORE_WORKAROUNDS
 
 ifdef JOURNAL_ACCESS_WITH_CREDITS
 EXTRA_CFLAGS += -DJOURNAL_ACCESS_WITH_CREDITS

Modified: branches/alloc_inode/fs/ocfs2/alloc.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/alloc.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/alloc.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -871,6 +871,22 @@
 	return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
 }
 
+static int ocfs2_truncate_log_can_coalesce(ocfs2_truncate_log *tl,
+					   unsigned int new_start)
+{
+	unsigned int index = le16_to_cpu(tl->tl_used);
+	unsigned int current_tail;
+
+	/* No records, nothing to coalesce */
+	if (!index)
+		return 0;
+
+	current_tail = le32_to_cpu(tl->tl_recs[index].t_start);
+	current_tail += le32_to_cpu(tl->tl_recs[index].t_clusters);
+
+	return current_tail == new_start;
+}
+
 static int ocfs2_truncate_log_append(ocfs_super *osb,
 				     ocfs_journal_handle *handle,
 				     u64 start_blk,
@@ -919,11 +935,17 @@
 	     "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
 	     OCFS2_I(tl_inode)->ip_blkno, index);
 
-	/* TODO: Do we bother searching the truncate records for a
-	 * contiguous one and coalesce? */
-	tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
+	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
+		/* Coalesce with the most recent record if possible */
+		num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
+		mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
+		     index, le32_to_cpu(tl->tl_recs[index].t_start),
+		     num_clusters);
+	} else {
+		tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
+		tl->tl_used = cpu_to_le16(index + 1);
+	}
 	tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
-	tl->tl_used = cpu_to_le16(index + 1);
 
 	status = ocfs_journal_dirty(handle, tl_bh);
 	if (status < 0) {
@@ -989,17 +1011,20 @@
 						    le32_to_cpu(rec.t_start));
 		num_clusters = le32_to_cpu(rec.t_clusters);
 
-		mlog(0, "free record %d, start = %u, clusters = %u\n", i,
-		     le32_to_cpu(rec.t_start), num_clusters);
+		/* if start_blk is not set, we ignore the record as
+		 * invalid. */
+		if (start_blk) {
+			mlog(0, "free record %d, start = %u, clusters = %u\n",
+			     i, le32_to_cpu(rec.t_start), num_clusters);
 
-		status = ocfs_free_clusters(handle, data_alloc_inode,
-					    data_alloc_bh, start_blk,
-					    num_clusters);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
+			status = ocfs_free_clusters(handle, data_alloc_inode,
+						    data_alloc_bh, start_blk,
+						    num_clusters);
+			if (status < 0) {
+				mlog_errno(status);
+				goto bail;
+			}
 		}
-
 		i--;
 	}
 
@@ -1009,7 +1034,7 @@
 }
 
 /* Expects you to already be holding tl_inode->i_sem */
-static int ocfs2_flush_truncate_log(ocfs_super *osb)
+static int __ocfs2_flush_truncate_log(ocfs_super *osb)
 {
 	int status;
 	unsigned int num_to_flush;
@@ -1086,6 +1111,47 @@
 	return status;
 }
 
+int ocfs2_flush_truncate_log(ocfs_super *osb)
+{
+	int status;
+	struct inode *tl_inode = osb->osb_tl_inode;
+
+	down(&tl_inode->i_sem);
+	status = __ocfs2_flush_truncate_log(osb);
+	up(&tl_inode->i_sem);
+
+	return status;
+}
+
+static void ocfs2_truncate_log_worker(void *data)
+{
+	int status;
+	ocfs_super *osb = data;
+
+	mlog_entry();
+
+	status = ocfs2_flush_truncate_log(osb);
+	if (status < 0)
+		mlog_errno(status);
+
+	mlog_exit(status);
+}
+
+#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
+void ocfs2_schedule_truncate_log_flush(ocfs_super *osb,
+				       int cancel)
+{
+	if (osb->osb_tl_inode) {
+		/* We want to push off log flushes while truncates are
+		 * still running. */
+		if (cancel)
+			cancel_delayed_work(&osb->osb_truncate_log_wq);
+
+		schedule_delayed_work(&osb->osb_truncate_log_wq,
+				      OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
+	}
+}
+
 static int ocfs2_get_truncate_log_info(ocfs_super *osb,
 				       int slot_num,
 				       struct inode **tl_inode,
@@ -1214,7 +1280,7 @@
 	down(&tl_inode->i_sem);
 	for(i = 0; i < num_recs; i++) {
 		if (ocfs2_truncate_log_needs_flush(osb)) {
-			status = ocfs2_flush_truncate_log(osb);
+			status = __ocfs2_flush_truncate_log(osb);
 			if (status < 0) {
 				mlog_errno(status);
 				goto bail_up;
@@ -1256,20 +1322,18 @@
 
 	mlog_entry();
 
-	if (!tl_inode)
-		return;
+	if (tl_inode) {
+		cancel_delayed_work(&osb->osb_truncate_log_wq);
+		flush_scheduled_work();
 
-	down(&tl_inode->i_sem);
+		status = ocfs2_flush_truncate_log(osb);
+		if (status < 0)
+			mlog_errno(status);
 
-	status = ocfs2_flush_truncate_log(osb);
-	if (status)
-		mlog_errno(status);
+		brelse(osb->osb_tl_bh);
+		iput(osb->osb_tl_inode);
+	}
 
-	up(&tl_inode->i_sem);
-
-	brelse(osb->osb_tl_bh);
-	iput(osb->osb_tl_inode);
-
 	mlog_exit_void();
 }
 
@@ -1291,8 +1355,9 @@
 	/* ocfs2_truncate_log_shutdown keys on the existence of
 	 * osb->osb_tl_inode so we don't set any of the osb variables
 	 * until we're sure all is well. */
+	INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
+	osb->osb_tl_bh    = tl_bh;
 	osb->osb_tl_inode = tl_inode;
-	osb->osb_tl_bh    = tl_bh;
 
 	mlog_exit(status);
 	return status;
@@ -1423,6 +1488,7 @@
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 	fe->i_clusters -= clusters_to_del;
 	fe->i_mtime = CURRENT_TIME.tv_sec;
+	fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
 
 	i = el->l_next_free_rec - 1;
 
@@ -1671,7 +1737,7 @@
 	 * record is free for use. If there isn't any, we flush to get
 	 * an empty truncate log.  */
 	if (ocfs2_truncate_log_needs_flush(osb)) {
-		status = ocfs2_flush_truncate_log(osb);
+		status = __ocfs2_flush_truncate_log(osb);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1719,6 +1785,8 @@
 bail:
 	up_write(&OCFS_I(inode)->ip_alloc_sem);
 
+	ocfs2_schedule_truncate_log_flush(osb, 1);
+
 	if (tl_sem)
 		up(&tl_inode->i_sem);
 

Modified: branches/alloc_inode/fs/ocfs2/alloc.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/alloc.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/alloc.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -53,6 +53,9 @@
 
 int ocfs2_truncate_log_init(ocfs_super *osb);
 void ocfs2_truncate_log_shutdown(ocfs_super *osb);
+void ocfs2_schedule_truncate_log_flush(ocfs_super *osb,
+				       int cancel);
+int ocfs2_flush_truncate_log(ocfs_super *osb);
 int ocfs2_begin_truncate_log_recovery(ocfs_super *osb,
 				      int slot_num,
 				      ocfs2_dinode **tl_copy);

Modified: branches/alloc_inode/fs/ocfs2/cluster/heartbeat.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/cluster/heartbeat.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/cluster/heartbeat.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -1266,6 +1266,7 @@
 	hc->hc_data = data;
 	hc->hc_priority = priority;
 	hc->hc_type = type;
+	hc->hc_magic = HB_CB_MAGIC;
 }
 EXPORT_SYMBOL(hb_setup_callback);
 
@@ -1275,6 +1276,7 @@
 	struct list_head *iter;
 	struct hb_callback *hbcall;
 
+	BUG_ON(hc->hc_magic != HB_CB_MAGIC);
 	BUG_ON(!list_empty(&hc->hc_item));
 
 	hbcall = hbcall_from_type(hc->hc_type);
@@ -1301,8 +1303,11 @@
 
 int hb_unregister_callback(struct hb_callback_func *hc)
 {
-	BUG_ON(list_empty(&hc->hc_item));
+	BUG_ON(hc->hc_magic != HB_CB_MAGIC);
 
+	if (list_empty(&hc->hc_item))
+		return 0;
+
 	down_write(&hb_callback_sem);
 
 	list_del_init(&hc->hc_item);

Modified: branches/alloc_inode/fs/ocfs2/cluster/heartbeat.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/cluster/heartbeat.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/cluster/heartbeat.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -29,6 +29,8 @@
 
 #include "ocfs2_heartbeat.h"
 
+#define HB_CB_MAGIC		0x51d1e4ec
+
 /* callback stuff */
 enum hb_callback_type {
 	HB_NODE_DOWN_CB = 0,
@@ -40,6 +42,7 @@
 typedef void (hb_cb_func)(struct nm_node *, int, void *);
 
 struct hb_callback_func {
+	u32			hc_magic;
 	struct list_head	hc_item;
 	hb_cb_func		*hc_func;
 	void			*hc_data;

Modified: branches/alloc_inode/fs/ocfs2/cluster/tcp.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/cluster/tcp.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/cluster/tcp.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -472,13 +472,14 @@
 	return 0;
 }
 
-static int net_handler_cmp(struct net_msg_handler *nmh,
-			   u32 msg_type, u32 key)
+static int net_handler_cmp(struct net_msg_handler *nmh, u32 msg_type, u32 key)
 {
-	if (nmh->nh_msg_type != msg_type)
-		return nmh->nh_msg_type - msg_type;
+	int ret = memcmp(&nmh->nh_key, &key, sizeof(key));
 
-	return nmh->nh_key - key;
+	if (ret == 0)
+		ret = memcmp(&nmh->nh_msg_type, &msg_type, sizeof(msg_type));
+
+	return ret;
 }
 
 static struct net_msg_handler *

Modified: branches/alloc_inode/fs/ocfs2/dcache.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dcache.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/dcache.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -69,7 +69,7 @@
 	}
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
-#warning "should we do this for all files?"
+	/* XXX: Should we do this for all files? */
 	if (S_ISDIR(inode->i_mode) && (!inode->i_nlink)) {
 		mlog(0, "dir inode (%"MLFu64") orphaned, returning false\n",
 		     OCFS2_I(inode)->ip_blkno);

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmapi.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmapi.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmapi.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -185,4 +185,6 @@
 
 void dlm_unregister_domain(dlm_ctxt *dlm);
 
+void dlm_print_one_lock(dlm_lock *lockid);
+
 #endif /* DLMAPI_H */

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -124,11 +124,58 @@
 	spin_unlock(&dlm_domain_lock);
 }
 
+static void dlm_print_one_lock_resource(dlm_lock_resource *res)
+{
+	struct list_head *iter2;
+	dlm_lock *lock;
+
+	printk("lockres: %.*s, owner=%u, state=%u\n", 
+	       res->lockname.len, res->lockname.name, 
+	       res->owner, res->state);
+	spin_lock(&res->spinlock);
+	printk("  granted queue: \n");
+	list_for_each(iter2, &res->granted) {
+		lock = list_entry(iter2, dlm_lock, list);
+		spin_lock(&lock->spinlock);
+		printk("    type=%d, conv=%d, node=%u, " 
+		       "cookie=%"MLFu64"\n", lock->ml.type, 
+		       lock->ml.convert_type, lock->ml.node, 
+		       lock->ml.cookie);
+		spin_unlock(&lock->spinlock);
+	}
+	printk("  converting queue: \n");
+	list_for_each(iter2, &res->converting) {
+		lock = list_entry(iter2, dlm_lock, list);
+		spin_lock(&lock->spinlock);
+		printk("    type=%d, conv=%d, node=%u, " 
+		       "cookie=%"MLFu64"\n", lock->ml.type, 
+		       lock->ml.convert_type, lock->ml.node, 
+		       lock->ml.cookie);
+		spin_unlock(&lock->spinlock);
+	}
+	printk("  blocked queue: \n");
+	list_for_each(iter2, &res->blocked) {
+		lock = list_entry(iter2, dlm_lock, list);
+		spin_lock(&lock->spinlock);
+		printk("    type=%d, conv=%d, node=%u, " 
+		       "cookie=%"MLFu64"\n", lock->ml.type, 
+		       lock->ml.convert_type, lock->ml.node, 
+		       lock->ml.cookie);
+		spin_unlock(&lock->spinlock);
+	}
+	spin_unlock(&res->spinlock);
+}
+
+void dlm_print_one_lock(dlm_lock *lockid)
+{
+	dlm_print_one_lock_resource(lockid->lockres);
+}
+EXPORT_SYMBOL(dlm_print_one_lock);
+
 void dlm_dump_lock_resources(dlm_ctxt *dlm)
 {
 	dlm_lock_resource *res;
-	dlm_lock *lock;
-	struct list_head *iter, *iter2;
+	struct list_head *iter;
 	struct list_head *bucket;
 	int i;
 
@@ -145,41 +192,7 @@
 		bucket = &(dlm->resources[i]);
 		list_for_each(iter, bucket) {
 			res = list_entry(iter, dlm_lock_resource, list);
-			printk("lockres: %.*s, owner=%u, state=%u\n", 
-			       res->lockname.len, res->lockname.name, 
-			       res->owner, res->state);
-			spin_lock(&res->spinlock);
-			printk("  granted queue: \n");
-			list_for_each(iter2, &res->granted) {
-				lock = list_entry(iter2, dlm_lock, list);
-				spin_lock(&lock->spinlock);
-				printk("    type=%d, conv=%d, node=%u, " 
-				       "cookie=%"MLFu64"\n", lock->ml.type, 
-				       lock->ml.convert_type, lock->ml.node, 
-				       lock->ml.cookie);
-				spin_unlock(&lock->spinlock);
-			}
-			printk("  converting queue: \n");
-			list_for_each(iter2, &res->converting) {
-				lock = list_entry(iter2, dlm_lock, list);
-				spin_lock(&lock->spinlock);
-				printk("    type=%d, conv=%d, node=%u, " 
-				       "cookie=%"MLFu64"\n", lock->ml.type, 
-				       lock->ml.convert_type, lock->ml.node, 
-				       lock->ml.cookie);
-				spin_unlock(&lock->spinlock);
-			}
-			printk("  blocked queue: \n");
-			list_for_each(iter2, &res->blocked) {
-				lock = list_entry(iter2, dlm_lock, list);
-				spin_lock(&lock->spinlock);
-				printk("    type=%d, conv=%d, node=%u, " 
-				       "cookie=%"MLFu64"\n", lock->ml.type, 
-				       lock->ml.convert_type, lock->ml.node, 
-				       lock->ml.cookie);
-				spin_unlock(&lock->spinlock);
-			}
-			spin_unlock(&res->spinlock);
+			dlm_print_one_lock_resource(res);
 		}
 	}
 	spin_unlock(&dlm->spinlock);

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmunlock.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmunlock.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmunlock.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -195,14 +195,13 @@
 	spin_unlock(&res->spinlock);
 	wake_up(&res->wq);
 
-	/* let the caller's final dlm_lock_put handle the actual kfree
-	 * NOTE: this silly block and the FREE_LOCK constant
-	 * can go once the lock refcounting stuff is tested */
+	/* let the caller's final dlm_lock_put handle the actual kfree */
 	if (actions & DLM_UNLOCK_FREE_LOCK) {
 		/* this should always be coupled with list removal */
 		DLM_ASSERT(actions & DLM_UNLOCK_REMOVE_LOCK);
 		mlog(0, "lock %"MLFu64" should be gone now! refs=%d\n",
-		     lock->ml.cookie, atomic_read(&lock->lock_refs.refcount));
+		     lock->ml.cookie, atomic_read(&lock->lock_refs.refcount)-1);
+		dlm_lock_put(lock);
 	}
 	if (actions & DLM_UNLOCK_CALL_AST)
 		*call_ast = 1;
@@ -448,8 +447,7 @@
 		/* cancel this outright */
 		lksb->status = DLM_NORMAL;
 		status = DLM_NORMAL;
-		*actions = (DLM_UNLOCK_FREE_LOCK |
-			    DLM_UNLOCK_CALL_AST |
+		*actions = (DLM_UNLOCK_CALL_AST |
 			    DLM_UNLOCK_REMOVE_LOCK);
 	} else if (dlm_lock_on_list(&res->converting, lock)) {
 		/* cancel the request, put back on granted */

Modified: branches/alloc_inode/fs/ocfs2/dlmglue.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlmglue.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/dlmglue.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -828,7 +828,8 @@
 			 lockres,
 			 lockres->l_ops->bast);
 	if (status != DLM_NORMAL) {
-		mlog(ML_ERROR, "Dlm returns %d\n", status);
+		mlog(ML_ERROR, "Dlm returns %d for lock %s\n", status,
+		     lockres->l_name);
 		ret = -ENOENT;
 		ocfs2_recover_from_dlm_error(lockres, 1);
 	}
@@ -1013,7 +1014,9 @@
 			    (status == DLM_NOTQUEUED))
 				ret = -EAGAIN;
 			else {
-				mlog(ML_ERROR, "Dlm returns %d\n", status);
+				mlog(ML_ERROR,
+				     "Dlm returns %d for lock %s\n",
+				     status, lockres->l_name);
 				ret = -ENOENT;
 			}
 			ocfs2_recover_from_dlm_error(lockres, 1);
@@ -1196,6 +1199,22 @@
 	mlog_exit_void();
 }
 
+#define OCFS2_SEC_SHIFT  34
+#define OCFS2_NSEC_MASK  ((1ULL << (64 - OCFS2_SEC_SHIFT)) - 1)
+
+/* LVB only has room for 64 bits of time here so we pack it for
+ * now. */
+static u64 ocfs2_pack_timespec(struct timespec *spec)
+{
+	u64 res;
+	u64 sec = spec->tv_sec;
+	u32 nsec = spec->tv_nsec;
+
+	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
+
+	return res;
+}
+
 /* Call this with the lockres locked. I am reasonably sure we don't
  * need ip_lock in this function as anyone who would be changing those
  * values is supposed to be blocked in ocfs2_meta_lock right now.
@@ -1214,13 +1233,20 @@
 	lvb->lvb_isize     = cpu_to_be64(i_size_read(inode));
 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
-	lvb->lvb_iatime    = cpu_to_be64(inode->i_atime.tv_sec);
-	lvb->lvb_ictime    = cpu_to_be64(inode->i_ctime.tv_sec);
-	lvb->lvb_imtime    = cpu_to_be64(inode->i_mtime.tv_sec);
+	lvb->lvb_iatime    = cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
+	lvb->lvb_ictime    = cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
+	lvb->lvb_imtime    = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
 
 	mlog_exit_void();
 }
 
+static void ocfs2_unpack_timespec(struct timespec *spec,
+				  u64 packed_time)
+{
+	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
+	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
+}
+
 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 {
 	ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
@@ -1252,9 +1278,9 @@
 		inode->i_blocks = 0;
 	else
 		inode->i_blocks  = ocfs2_align_bytes_to_sectors(i_size_read(inode));
-	inode->i_atime.tv_sec = be64_to_cpu(lvb->lvb_iatime);
-	inode->i_ctime.tv_sec = be64_to_cpu(lvb->lvb_ictime);
-	inode->i_mtime.tv_sec = be64_to_cpu(lvb->lvb_imtime);
+	ocfs2_unpack_timespec(&inode->i_atime, be64_to_cpu(lvb->lvb_iatime));
+	ocfs2_unpack_timespec(&inode->i_mtime, be64_to_cpu(lvb->lvb_imtime));
+	ocfs2_unpack_timespec(&inode->i_ctime, be64_to_cpu(lvb->lvb_ictime));
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 	mlog_exit_void();
@@ -1561,51 +1587,6 @@
 	return status;
 }
 
-#if 0
-/* grabs the meta lock synchronusly.  */
-int ocfs2_meta_lock_flags(struct inode *inode,
-			  ocfs_journal_handle *handle,
-			  struct buffer_head **ret_bh,
-			  int ex,
-			  int flags)
-{
-	int status;
-
-	mlog_entry_void();
-
-	BUG_ON(handle && !ex);
-
-	status = ocfs2_meta_lock_flags_async(inode, ret_bh, ex, flags,
-					     NULL, 0);
-	if (status)
-		goto bail;
-
-	if (ret_bh && !(*ret_bh)) {
-		/* caller wants a buffer head but we haven't read it yet. */
-		status = ocfs_read_block(OCFS2_SB(inode->i_sb),
-					 OCFS2_I(inode)->ip_blkno, ret_bh,
-					 OCFS_BH_CACHED, inode);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
-	}
-	if (handle) {
-		status = ocfs_handle_add_lock(handle, inode);
-		if (status < 0)
-			mlog_errno(status);
-	}
-bail:
-	if (status < 0 && ret_bh && (*ret_bh)) {
-		brelse(*ret_bh);
-		ret_bh = NULL;
-	}
-
-	mlog_exit(status);
-	return status;
-}
-#endif
-
 void ocfs2_meta_unlock(struct inode *inode,
 		       int ex)
 {
@@ -1619,6 +1600,7 @@
 	     ex ? "EXMODE" : "PRMODE");
 
 	ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
+
 	mlog_exit_void();
 }
 
@@ -1719,6 +1701,7 @@
 void ocfs2_dlm_shutdown(ocfs_super *osb)
 {
 	mlog_entry_void();
+
 	ocfs2_drop_super_lock(osb);
 
 	if (osb->vote_task) {
@@ -1743,7 +1726,8 @@
 	     lockres->l_unlock_action);
 
 	if (status != DLM_NORMAL)
-		mlog(ML_ERROR, "Dlm returns status %d\n", status);
+		mlog(ML_ERROR, "Dlm returns status %d for lock %s\n",
+		     status, lockres->l_name);
 
 	spin_lock(&lockres->l_lock);
 	switch(lockres->l_unlock_action) {
@@ -1765,6 +1749,7 @@
 	mlog_exit_void();
 }
 
+
 /* BEWARE: called with lockres lock, and always drops it. Caller
  * should not be calling us with a busy lock... */
 static int __ocfs2_drop_lock(ocfs_super *osb,
@@ -1774,7 +1759,8 @@
 	dlm_status status;
 
 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
-		mlog(0,"destroying busy lock: \"%s\"\n", lockres->l_name);
+		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
+		     lockres->l_name);
 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
 
@@ -1802,9 +1788,10 @@
 			   lockres->l_ops->unlock_ast,
 			   lockres);
 	if (status != DLM_NORMAL) {
-		mlog(ML_ERROR, "Dlm returns %d\n", status);
-		ret = -ENOENT;
-		goto bail;
+		mlog(ML_ERROR, "Dlm returns %d for lock %s\n", status,
+		     lockres->l_name);
+		dlm_print_one_lock(lockres->l_lksb.lockid);
+		BUG();
 	}
 	mlog(0, "lock %s, successfull return from dlmunlock\n",
 	     lockres->l_name);
@@ -1945,7 +1932,8 @@
 			 lockres,
 			 lockres->l_ops->bast);
 	if (status != DLM_NORMAL) {
-		mlog(ML_ERROR, "Dlm returns %d\n", status);
+		mlog(ML_ERROR, "Dlm returns %d for lock %s\n", status,
+		     lockres->l_name);
 		ret = -ENOENT;
 		ocfs2_recover_from_dlm_error(lockres, 1);
 		goto bail;
@@ -1993,7 +1981,8 @@
 			lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
 		spin_unlock(&lockres->l_lock);
 	} else if (status != DLM_NORMAL) {
-		mlog(ML_ERROR, "Dlm returns %d\n", status);
+		mlog(ML_ERROR, "Dlm returns %d for lock %s\n", status,
+		     lockres->l_name);
 		ret = -ENOENT;
 		ocfs2_recover_from_dlm_error(lockres, 0);
 	}
@@ -2122,7 +2111,6 @@
 	spin_lock(&lockres->l_lock);
 
 	OCFS_ASSERT(lockres->l_flags & OCFS2_LOCK_BLOCKED);
-	OCFS_ASSERT(!(lockres->l_flags & OCFS2_LOCK_BUSY));
 
 recheck:
 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {

Modified: branches/alloc_inode/fs/ocfs2/dlmglue.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlmglue.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/dlmglue.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -69,9 +69,9 @@
 	u32       lvb_iclusters;
 	u32       lvb_iuid;
 	u32       lvb_igid;
-	u64       lvb_isize;
 	u16       lvb_imode;
 	u16       lvb_inlink;
+	u64       lvb_isize;
 	u64       lvb_iatime;
 	u64       lvb_ictime;
 	u64       lvb_imtime;

Modified: branches/alloc_inode/fs/ocfs2/file.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/file.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/file.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -171,8 +171,8 @@
 void ocfs2_file_finish_extension(struct inode *inode, loff_t newsize,
 				 unsigned should_zero)
 {
-	mlog(0, "inode %"MLFu64", newsize = %"MLFu64" should_zero = %u\n",
-	     OCFS2_I(inode)->ip_blkno, newsize, should_zero);
+	mlog(0, "inode %"MLFu64", newsize = %lld, should_zero = %u\n",
+	     OCFS2_I(inode)->ip_blkno, (long long)newsize, should_zero);
 
 	ocfs2_update_inode_size(inode, newsize);
 
@@ -196,22 +196,23 @@
 
 /*
  * ocfs_file_write()
- * Linux 2.6 TODO: Remove all O_DIRECT conditionals here, they are no longer
- * needed.
  */
-static ssize_t ocfs_file_write(struct file *filp, const char __user *buf,
-		size_t count, loff_t *ppos)
+static ssize_t ocfs_file_write(struct file *filp,
+			       const char __user *buf,
+			       size_t count,
+			       loff_t *ppos)
 {
 	struct iovec local_iov = { .iov_base = (void __user *)buf, 
 				   .iov_len = count };
-	unsigned int savedflags;
 	int ret = 0;
 	ocfs_super *osb = NULL;
 	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
-	int sector_size;
 	struct ocfs2_write_lock_info info = {0, };
 	DECLARE_BUFFER_LOCK_CTXT(ctxt);
+#ifdef OCFS2_ORACORE_WORKAROUNDS
+	unsigned int saved_flags;
+#endif
 
 	mlog_entry ("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
 			(unsigned int)count,
@@ -231,7 +232,6 @@
 	}
 
 	osb = OCFS2_SB(inode->i_sb);
-	sector_size = 1 << osb->s_sectsize_bits;
 
 	ret = ocfs_write_lock_maybe_extend(filp, buf, count, ppos, &info,
 					   &ctxt);
@@ -240,14 +240,17 @@
 
 	down_read(&OCFS_I(inode)->ip_alloc_sem);
 
-	/* ick.  seems to be our only way of toggling directio */
-	savedflags = filp->f_flags;
+#ifdef OCFS2_ORACORE_WORKAROUNDS
+	saved_flags = filp->f_flags;
 	if (info.wl_do_direct_io) 
 		filp->f_flags |= O_DIRECT;
 	else
 		filp->f_flags &= ~O_DIRECT;
-	ret = generic_file_write_nolock (filp, &local_iov, 1, ppos);
-	filp->f_flags = savedflags;
+#endif
+	ret = generic_file_write_nolock(filp, &local_iov, 1, ppos);
+#ifdef OCFS2_ORACORE_WORKAROUNDS
+	filp->f_flags = saved_flags;
+#endif
 
 	up_read(&OCFS_I(inode)->ip_alloc_sem);
 
@@ -268,20 +271,17 @@
 
 /*
  * ocfs_file_read()
- * Linux 2.6 TODO: Remove all O_DIRECT conditionals here, they are no longer
- * needed.
  */
-static ssize_t ocfs_file_read(struct file *filp, char __user *buf,
-		size_t count, loff_t *ppos)
+static ssize_t ocfs_file_read(struct file *filp,
+			      char __user *buf,
+			      size_t count,
+			      loff_t *ppos)
 {
-	unsigned int savedflags;
 	int ret = 0;
 	ocfs_super *osb = NULL;
 	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
 	ocfs2_backing_inode *target_binode;
-	int do_direct_io = 0;
-	int sector_size;
 	DECLARE_BUFFER_LOCK_CTXT(ctxt);
 
 	mlog_entry ("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
@@ -294,19 +294,19 @@
 	}
 
 	osb = OCFS2_SB(inode->i_sb);
-	sector_size = 1 << osb->s_sectsize_bits;
 
+#ifdef OCFS2_ORACORE_WORKAROUNDS
 	if (filp->f_flags & O_DIRECT) {
-		/* anything special for o_direct? */
-		mlog(0, "O_DIRECT\n");
-		if (((*ppos) & (sector_size - 1)) || (count & (sector_size - 1)) || 
-		    ((unsigned long)buf & (sector_size - 1)) || (i_size_read(inode) & (sector_size -1))) {
-			do_direct_io = 0;
+		int sector_size = 1 << osb->s_sectsize_bits;
+
+		if (((*ppos) & (sector_size - 1)) ||
+		    (count & (sector_size - 1)) || 
+		    ((unsigned long)buf & (sector_size - 1)) ||
+		    (i_size_read(inode) & (sector_size -1))) {
 			filp->f_flags &= ~O_DIRECT;
-		} else {
-			do_direct_io = 1;
 		}
 	}
+#endif
 
 	ret = ocfs2_setup_io_locks(inode->i_sb, inode, buf, count, &ctxt,
 				   &target_binode);
@@ -315,7 +315,7 @@
 		goto bail;
 	}
 
-	target_binode->ba_lock_data = do_direct_io ? 0 : 1;
+	target_binode->ba_lock_data = (filp->f_flags & O_DIRECT) ? 0 : 1;
 
 	ret = ocfs2_lock_buffer_inodes(&ctxt, NULL);
 	if (ret < 0) {
@@ -325,14 +325,7 @@
 
 	down_read(&OCFS_I(inode)->ip_alloc_sem);
 
-	/* ick.  seems to be our only way of toggling directio */
-	savedflags = filp->f_flags;
-	if (do_direct_io) 
-		filp->f_flags |= O_DIRECT;
-	else
-		filp->f_flags &= ~O_DIRECT;
 	ret = generic_file_read (filp, buf, count, ppos);
-	filp->f_flags = savedflags;
 
 	up_read(&OCFS_I(inode)->ip_alloc_sem);
 
@@ -850,6 +843,7 @@
 			fe->i_size = new_fe_size;
 
 			fe->i_mtime = CURRENT_TIME.tv_sec;
+			fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
 
 			status = ocfs_journal_dirty(handle, bh);
 			if (status < 0) {
@@ -896,6 +890,7 @@
 	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));
 
 	fe->i_ctime = fe->i_mtime = CURRENT_TIME.tv_sec;
+	fe->i_ctime_nsec = fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
 
 	status = ocfs_journal_dirty(handle, bh);
 	if (status < 0) {

Modified: branches/alloc_inode/fs/ocfs2/heartbeat.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/heartbeat.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/heartbeat.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -102,21 +102,26 @@
 	ocfs_node_map_clear_bit(osb, &osb->umount_map, node_num);
 }
 
+void ocfs2_setup_hb_callbacks(ocfs_super *osb)
+{
+	hb_setup_callback(&osb->osb_hb_down, HB_NODE_DOWN_CB,
+			  ocfs2_hb_node_down_cb, osb, OCFS2_HB_NODE_DOWN_PRI);
+
+	hb_setup_callback(&osb->osb_hb_up, HB_NODE_UP_CB, ocfs2_hb_node_up_cb,
+			  osb, OCFS2_HB_NODE_UP_PRI);
+}
+
 /* Most functions here are just stubs for now... */
 int ocfs2_register_hb_callbacks(ocfs_super *osb)
 {
 	int status;
 
-	hb_setup_callback(&osb->osb_hb_down, HB_NODE_DOWN_CB,
-			  ocfs2_hb_node_down_cb, osb, OCFS2_HB_NODE_DOWN_PRI);
 	status = hb_register_callback(&osb->osb_hb_down);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
-	hb_setup_callback(&osb->osb_hb_up, HB_NODE_UP_CB, ocfs2_hb_node_up_cb,
-			  osb, OCFS2_HB_NODE_UP_PRI);
 	status = hb_register_callback(&osb->osb_hb_up);
 	if (status < 0)
 		mlog_errno(status);

Modified: branches/alloc_inode/fs/ocfs2/heartbeat.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/heartbeat.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/heartbeat.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -28,6 +28,7 @@
 
 void ocfs2_init_node_maps(ocfs_super *osb);
 
+void ocfs2_setup_hb_callbacks(ocfs_super *osb);
 int ocfs2_register_hb_callbacks(ocfs_super *osb);
 void ocfs2_clear_hb_callbacks(ocfs_super *osb);
 void ocfs2_stop_heartbeat(ocfs_super *osb);

Modified: branches/alloc_inode/fs/ocfs2/inode.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/inode.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/inode.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -278,8 +278,11 @@
 	inode->i_mapping->a_ops = &ocfs_aops;
 	inode->i_flags |= S_NOATIME;
 	inode->i_atime.tv_sec = fe->i_atime;
+	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = fe->i_mtime;
+	inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
 	inode->i_ctime.tv_sec = fe->i_ctime;
+	inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
 
 	if (OCFS2_I(inode)->ip_blkno != fe->i_blkno)
 		mlog(ML_ERROR,
@@ -728,6 +731,9 @@
 	mlog(0, "Clearing inode: %"MLFu64", nlink = %u)\n",
 	     OCFS2_I(inode)->ip_blkno, inode->i_nlink);
 
+	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
+			"Inode=%lu\n", inode->i_ino);
+
 	if (!inode->u.generic_ip) {
 		mlog(ML_ERROR, "inode %lu has no generic_ip!\n", inode->i_ino);
 		goto bail;
@@ -810,8 +816,9 @@
 	u64 p_blkno;
 	int readflags = OCFS_BH_CACHED;
 
-#warning only turn this on if we know we can deal with read_block returning nothing
 #if 0
+	/* only turn this on if we know we can deal with read_block
+	 * returning nothing */
 	if (reada)
 		readflags |= OCFS_BH_READAHEAD;
 #endif
@@ -924,10 +931,13 @@
 	fe->i_gid = inode->i_gid;
 	fe->i_mode = inode->i_mode;
 	fe->i_atime = inode->i_atime.tv_sec;
+	fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
 	fe->i_ctime = inode->i_ctime.tv_sec;
+	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 	fe->i_mtime = inode->i_mtime.tv_sec;
-#warning "do we want to update these here?"
-//	fe->i_dtime = inode->i_dtime.tv_sec;
+	fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+	/* XXX: Do we want to update i_dtime here? */
+	/* fe->i_dtime = inode->i_dtime.tv_sec; */
 
 	status = ocfs_journal_dirty(handle, bh);
 	if (status < 0)
@@ -968,10 +978,11 @@
 	else
 		inode->i_blocks = ocfs2_align_bytes_to_sectors(i_size_read(inode));
 	inode->i_atime.tv_sec = fe->i_atime;
+	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = fe->i_mtime;
+	inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
 	inode->i_ctime.tv_sec = fe->i_ctime;
+	inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
 
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 }				/* ocfs_refresh_inode */
-
-

Modified: branches/alloc_inode/fs/ocfs2/journal.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/journal.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/journal.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -56,7 +56,7 @@
 static int ocfs_recover_node(ocfs_super *osb,
 			     int node_num);
 static int __ocfs_recovery_thread(void *arg);
-static int ocfs_commit_cache (ocfs_super * osb);
+static int ocfs_commit_cache(ocfs_super *osb);
 static int ocfs_wait_on_mount(ocfs_super *osb);
 static void ocfs_handle_cleanup_locks(ocfs_journal *journal, 
 				      ocfs_journal_handle *handle,
@@ -78,7 +78,7 @@
 	int status = 0;
 	unsigned int flushed;
 	unsigned long old_id;
-	ocfs_journal * journal = NULL;
+	ocfs_journal *journal = NULL;
 
 	mlog_entry_void();
 
@@ -114,13 +114,13 @@
 	ocfs2_kick_vote_thread(osb);
 	wake_up(&journal->j_checkpointed);
 finally:
-	mlog_exit (status);
+	mlog_exit(status);
 	return status;
 }				/* ocfs_commit_cache */
 
-ocfs_journal_handle * ocfs_alloc_handle(ocfs_super *osb)
+ocfs_journal_handle *ocfs_alloc_handle(ocfs_super *osb)
 {
-	ocfs_journal_handle * retval = NULL;
+	ocfs_journal_handle *retval = NULL;
 
 	retval = kmalloc(sizeof(*retval), GFP_KERNEL);
 	if (!retval) {
@@ -134,8 +134,8 @@
 	retval->num_locks = 0;
 	retval->k_handle = NULL;
 
-	INIT_LIST_HEAD(&(retval->locks));
-	INIT_LIST_HEAD(&(retval->inode_list));
+	INIT_LIST_HEAD(&retval->locks);
+	INIT_LIST_HEAD(&retval->inode_list);
 	retval->journal = osb->journal;
 
 	return retval;
@@ -144,13 +144,13 @@
 /* pass it NULL and it will allocate a new handle object for you.  If
  * you pass it a handle however, it may still return NULL, in which
  * case it has free'd the passed handle for you. */
-ocfs_journal_handle * ocfs_start_trans(ocfs_super *osb, 
+ocfs_journal_handle *ocfs_start_trans(ocfs_super *osb, 
 				       ocfs_journal_handle *handle, 
 				       int max_buffs) 
 {
-	journal_t * journal = osb->journal->j_journal;
+	journal_t *journal = osb->journal->j_journal;
 
-	mlog_entry ("(max_buffs = %d)\n", max_buffs);
+	mlog_entry("(max_buffs = %d)\n", max_buffs);
 
 	if (!osb || !osb->journal->j_journal)
 		BUG();
@@ -261,7 +261,7 @@
 /*
  * ocfs_commit_trans
  */
-void ocfs_commit_trans(ocfs_journal_handle * handle) 
+void ocfs_commit_trans(ocfs_journal_handle *handle) 
 {
 	handle_t *jbd_handle;
 	int retval;
@@ -377,12 +377,12 @@
 	OCFS_ASSERT(bh);
 	OCFS_ASSERT((handle->flags & OCFS_HANDLE_STARTED));
 
-	mlog_entry("(bh->b_blocknr=%llu, type=%d (\"%s\"), "
-		       "bh->b_size = %hu)\n", 
-		       (unsigned long long)bh->b_blocknr, type, 
-		       (type == OCFS_JOURNAL_ACCESS_CREATE) ? 
-		       "OCFS_JOURNAL_ACCESS_CREATE" : 
-		       "OCFS_JOURNAL_ACCESS_WRITE", bh->b_size);
+	mlog_entry("(bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %hu)\n",
+		   (unsigned long long)bh->b_blocknr, type, 
+		   (type == OCFS_JOURNAL_ACCESS_CREATE) ? 
+			"OCFS_JOURNAL_ACCESS_CREATE" : 
+			"OCFS_JOURNAL_ACCESS_WRITE",
+		   bh->b_size);
 
 	/* we can safely remove this assertion after testing. */
 	if (!buffer_uptodate(bh)) {
@@ -412,6 +412,7 @@
 	if (status < 0)
 		mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
 		     status, type);
+
 	mlog_exit(status);
 	return status;
 } /* ocfs_journal_access */
@@ -428,7 +429,7 @@
 	OCFS_ASSERT((handle->flags & OCFS_HANDLE_STARTED));
 
 	mlog_entry("(bh->b_blocknr=%llu)\n",
-			(unsigned long long)bh->b_blocknr);
+		   (unsigned long long)bh->b_blocknr);
 
 	status = journal_dirty_metadata(handle->k_handle, bh);
 	if (status < 0)
@@ -448,6 +449,7 @@
 	ocfs_journal_lock *lock;
 
 	OCFS_ASSERT(inode);
+
 	lock = kmem_cache_alloc(ocfs2_lock_cache, GFP_NOFS);
 	if (!lock) {
 		status = -ENOMEM;
@@ -486,8 +488,9 @@
 			ocfs_set_inode_lock_trans(journal, inode);
 		ocfs2_meta_unlock(inode, 1);
 		if (atomic_read(&inode->i_count) == 1)
-			mlog(ML_ERROR, "Inode %"MLFu64", I'm doing a last iput "
-			     "for!", OCFS2_I(inode)->ip_blkno);
+			mlog(ML_ERROR,
+			     "Inode %"MLFu64", I'm doing a last iput for!",
+			     OCFS2_I(inode)->ip_blkno);
 		iput(inode);
 		kmem_cache_free(ocfs2_lock_cache, lock);
 	}
@@ -502,7 +505,7 @@
 {
 	int status = -1;
 	struct inode *inode = NULL; /* the journal inode */
-	journal_t * j_journal = NULL;
+	journal_t *j_journal = NULL;
 	ocfs2_dinode *fe = NULL;
 	struct buffer_head *bh = NULL;
 	ocfs_super *osb;
@@ -521,9 +524,9 @@
 		mlog_errno(status);
 		goto done;
 	}
-	if (is_bad_inode (inode)) {
+	if (is_bad_inode(inode)) {
 		mlog(ML_ERROR, "access error (bad inode)\n");
-		iput (inode);
+		iput(inode);
 		inode = NULL;
 		status = -EACCES;
 		goto done;
@@ -537,7 +540,7 @@
 			mlog(ML_ERROR, "Could not get lock on journal!\n");
 		goto done;
 	}
-	fe = (ocfs2_dinode *) bh->b_data;
+	fe = (ocfs2_dinode *)bh->b_data;
 
 	if (fe->i_size <  OCFS2_MIN_JOURNAL_SIZE) {
 		mlog(ML_ERROR, "Journal file size (%"MLFu64") is too small!",
@@ -555,8 +558,8 @@
 		mlog(ML_ERROR, "inode and fe alloc sizes differ! (%u != %u",
 		     OCFS2_I(inode)->ip_clusters, fe->i_clusters);
 	if (inode->i_size != fe->i_size)
-		mlog(ML_ERROR, "inode and fe i_size's differ! "
-			       "(%lld != %"MLFu64")",
+		mlog(ML_ERROR,
+		     "inode and fe i_size's differ! (%lld != %"MLFu64")",
 		     inode->i_size, fe->i_size);
 
 	OCFS2_I(inode)->ip_open_count++;
@@ -573,6 +576,13 @@
 	mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
 	j_journal->j_commit_interval = OCFS_DEFAULT_COMMIT_INTERVAL;
 
+	spin_lock(&j_journal->j_state_lock);
+	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
+		j_journal->j_flags |= JFS_BARRIER;
+	else
+		j_journal->j_flags &= ~JFS_BARRIER;
+	spin_unlock(&j_journal->j_state_lock);
+
 	*dirty = (le32_to_cpu(fe->id1.journal1.ij_flags) &
 		  OCFS2_JOURNAL_DIRTY_FL);
 
@@ -603,13 +613,13 @@
 {
 	int status;
 	unsigned int flags;
-	ocfs_journal * journal = osb->journal;
+	ocfs_journal *journal = osb->journal;
 	struct buffer_head *bh = journal->j_bh;
 	ocfs2_dinode *fe;
 
 	mlog_entry_void();
 
-	fe = (ocfs2_dinode *) bh->b_data;
+	fe = (ocfs2_dinode *)bh->b_data;
 	OCFS2_BUG_ON_INVALID_DINODE(fe);
 
 	flags = le32_to_cpu(fe->id1.journal1.ij_flags);
@@ -632,9 +642,9 @@
 */
 void ocfs_journal_shutdown(ocfs_super *osb) 
 {
-	ocfs_journal * journal = NULL;
+	ocfs_journal *journal = NULL;
 	int status = 0;
-	struct inode * inode = NULL;
+	struct inode *inode = NULL;
 	int num_running_trans = 0;
 
 	mlog_entry_void();
@@ -657,8 +667,9 @@
 
 	num_running_trans = atomic_read(&(osb->journal->j_num_trans));
 	if (num_running_trans > 0)
-		mlog(0, "Shutting down journal: must wait on %d"
-		     " running transactions!\n", num_running_trans);
+		mlog(0, "Shutting down journal: must wait on %d "
+		     "running transactions!\n",
+		     num_running_trans);
 
 	/* Do a commit_cache here. It will flush our journal, *and*
 	 * release any locks that are still held.
@@ -690,7 +701,7 @@
 	/* unlock our journal */
 	ocfs2_meta_unlock(inode, 1);
 
-	brelse (journal->j_bh);
+	brelse(journal->j_bh);
 	journal->j_bh = NULL;
 
 	journal->j_state = OCFS_JOURNAL_FREE;
@@ -1007,9 +1018,10 @@
 
 		status = ocfs_recover_node(osb, node_num);
 		if (status < 0) {
-			mlog(ML_ERROR, "Error %d recovering node %d on device "
-			     "(%u,%u)!\n", status, node_num,
-			     MAJOR(osb->sb->s_dev),MINOR(osb->sb->s_dev));
+			mlog(ML_ERROR,
+			     "Error %d recovering node %d on device (%u,%u)!\n",
+			     status, node_num,
+			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 			mlog(ML_ERROR, "Volume requires unmount.\n");
 			continue;
 		}
@@ -1048,7 +1060,7 @@
 void ocfs_recovery_thread(ocfs_super *osb, int node_num)
 {
 	mlog_entry("(node_num=%d, osb->node_num = %d)\n",
-		       node_num, osb->node_num);
+		   node_num, osb->node_num);
 
 	down(&osb->recovery_lock);
 	if (osb->disable_recovery)
@@ -1097,9 +1109,9 @@
 		mlog_errno(status);
 		goto done;
 	}
-	if (is_bad_inode (inode)) {
+	if (is_bad_inode(inode)) {
 		status = -EACCES;
-		iput (inode);
+		iput(inode);
 		inode = NULL;
 		mlog_errno(status);
 		goto done;
@@ -1126,8 +1138,8 @@
 	}
 
 	mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
-	     node_num, slot_num, MAJOR(osb->sb->s_dev),
-	     MINOR(osb->sb->s_dev));
+	     node_num, slot_num,
+	     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 
 	OCFS2_I(inode)->ip_clusters = fe->i_clusters;
 
@@ -1214,7 +1226,7 @@
 	ocfs2_dinode *tl_copy = NULL;
 
 	mlog_entry("(node_num=%d, osb->node_num = %d)\n",
-		       node_num, osb->node_num);
+		   node_num, osb->node_num);
 
 	mlog(0, "checking node %d\n", node_num);
 
@@ -1286,9 +1298,9 @@
 		status = -EACCES;
 		goto bail;
 	}
-	if (is_bad_inode (inode)) {
+	if (is_bad_inode(inode)) {
 		mlog(ML_ERROR, "access error (bad inode)\n");
-		iput (inode);
+		iput(inode);
 		inode = NULL;
 		status = -EACCES;
 		goto bail;
@@ -1415,7 +1427,7 @@
 				up(&orphan_dir_inode->i_sem);
 				status = -EINVAL;
 				mlog_errno(status);
-				brelse (bh);
+				brelse(bh);
 				goto bail;
 			}
 
@@ -1426,11 +1438,12 @@
 			if (!le64_to_cpu(de->inode))
 				continue;
 			if (de->file_type > OCFS2_FT_MAX) {
-				mlog(ML_ERROR, "block %llu contains invalid "
-				     "de: inode = %"MLFu64", rec_len = %u, "
+				mlog(ML_ERROR,
+				     "block %llu contains invalid de: "
+				     "inode = %"MLFu64", rec_len = %u, "
 				     "name_len = %u, file_type = %u, "
 				     "name='%.*s'\n", 
-				     (unsigned long long) bh->b_blocknr, 
+				     (unsigned long long)bh->b_blocknr, 
 				     le64_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len),
 				     de->name_len, 
@@ -1531,8 +1544,9 @@
 			mlog_errno(status);
 
 		if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
-			mlog(ML_KTHREAD, "commit_thread: %u transactions "
-			     "pending on shutdown\n",
+			mlog(ML_KTHREAD,
+			     "commit_thread: %u transactions pending on "
+			     "shutdown\n",
 			     atomic_read(&journal->j_num_trans));
 		}
 	}

Modified: branches/alloc_inode/fs/ocfs2/mmap.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/mmap.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/mmap.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -447,13 +447,11 @@
 	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
 	int status;
-	int sector_size;
 	int level = filp->f_flags & O_APPEND;
 	loff_t saved_ppos;
 	u64 bytes_added = 0;
 
 	osb = OCFS2_SB(inode->i_sb);
-	sector_size = 1 << osb->s_sectsize_bits;
 
 	/* the target inode is different from the other inodes.  in o_direct it
 	 * doesn't get a data lock and when appending it gets a level 1 meta
@@ -513,16 +511,18 @@
 		saved_ppos = i_size_read(inode);
 		mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_ppos);
 
+#ifdef OCFS2_ORACORE_WORKAROUNDS
 		/* ugh, work around some applications which open
 		 * everything O_DIRECT + O_APPEND and really don't
 		 * mean to use O_DIRECT. */
-#warning this is wrong wrong wrong
 		filp->f_flags &= ~O_DIRECT;
+#endif
 	}
 
 	if (filp->f_flags & O_DIRECT) {
-		/* anything special for o_direct? */
-		mlog(0, "O_DIRECT\n");
+#ifdef OCFS2_ORACORE_WORKAROUNDS
+		int sector_size = 1 << osb->s_sectsize_bits;
+
 		if ((saved_ppos & (sector_size - 1)) || 
 		    (count & (sector_size - 1)) || 
 		    ((unsigned long)buf & (sector_size - 1))) {
@@ -531,7 +531,12 @@
 		} else {
 			info->wl_do_direct_io = 1;
 		}
+#else
+		info->wl_do_direct_io = 1;
+#endif
+		mlog(0, "O_DIRECT\n");
 	}
+
 	info->wl_target_binode->ba_lock_data = info->wl_do_direct_io ? 0 : 1;
 
 	info->wl_newsize = count + saved_ppos;

Modified: branches/alloc_inode/fs/ocfs2/namei.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/namei.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/namei.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -577,6 +577,8 @@
 	strcpy (fe->i_signature, OCFS2_INODE_SIGNATURE);
 	fe->i_flags |= OCFS2_VALID_FL;
 	fe->i_atime = fe->i_ctime = fe->i_mtime = CURRENT_TIME.tv_sec;
+	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = 
+		cpu_to_le32(CURRENT_TIME.tv_nsec);
 	fe->i_dtime = 0;
 
 	fel = &fe->id2.i_list;
@@ -713,10 +715,11 @@
 		goto bail;
 	}
 
-	fe->i_links_count++;
-	fe->i_ctime = CURRENT_TIME.tv_sec;
-	inode->i_nlink = fe->i_links_count;
-	inode->i_ctime.tv_sec = fe->i_ctime;
+	inode->i_nlink++;
+	inode->i_ctime = CURRENT_TIME;
+	fe->i_links_count = inode->i_nlink;
+	fe->i_ctime = inode->i_ctime.tv_sec;
+	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 
 	err = ocfs_journal_dirty(handle, fe_bh);
 	if (err < 0) {
@@ -1916,7 +1919,7 @@
 				}
 				num++;
 		
-#warning questionable readahead stuff here	
+				/* XXX: questionable readahead stuff here */
 				bh = ocfs_bread(dir, b++, &err, 1);
 				bh_use[ra_max] = bh;
 #if 0		// ???

Modified: branches/alloc_inode/fs/ocfs2/ocfs.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/ocfs.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/ocfs.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -238,6 +238,12 @@
 	OCFS2_LA_DISABLED
 };
 
+enum ocfs2_mount_options
+{
+	OCFS2_MOUNT_HB_OK   = 1 << 0,	/* Heartbeat started */
+	OCFS2_MOUNT_BARRIER = 1 << 1	/* Use block barriers */
+};
+
 struct _ocfs_journal;
 struct _ocfs2_slot_info;
 
@@ -280,6 +286,8 @@
 	spinlock_t s_next_gen_lock;
 	u32 s_next_generation;
 
+	unsigned long s_mount_opt;
+
 	u16 max_nodes;
 	u16 num_nodes;
 	s16 node_num;
@@ -349,6 +357,7 @@
 	/* Truncate log info */
 	struct inode			*osb_tl_inode;
 	struct buffer_head		*osb_tl_bh;
+	struct work_struct		osb_truncate_log_wq;
 };
 
 #define NAMEI_RA_CHUNKS  2

Modified: branches/alloc_inode/fs/ocfs2/ocfs2_fs.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/ocfs2_fs.h	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/ocfs2_fs.h	2005-05-21 03:32:25 UTC (rev 2303)
@@ -402,8 +402,10 @@
 	__u64 i_last_eb_blk;		/* Pointer to last extent
 					   block */
 /*60*/	__u32 i_fs_generation;		/* Generation per fs-instance */
-	__u32 i_reserved1;		/* Generation per fs-instance */
-/*68*/	__u64 i_reserved2[10];
+	__u32 i_atime_nsec;
+	__u32 i_ctime_nsec;
+	__u32 i_mtime_nsec;
+/*70*/	__u64 i_reserved1[9];
 /*B8*/	union {
 		__u64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
@@ -435,6 +437,8 @@
 
 /*
  * On-disk directory entry structure for OCFS2
+ *
+ * Packed as this structure could be accessed unaligned on 64-bit platforms
  */
 struct ocfs2_dir_entry {
 /*00*/	__u64   inode;                  /* Inode number */
@@ -443,7 +447,7 @@
 	__u8    file_type;
 /*0C*/	char    name[OCFS2_MAX_FILENAME_LEN];   /* File name */
 /* Actual on-disk length specified by rec_len */
-};
+} __attribute__ ((packed));
 
 /*
  * On disk allocator group structure for OCFS2

Modified: branches/alloc_inode/fs/ocfs2/super.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/super.c	2005-05-20 22:02:32 UTC (rev 2302)
+++ branches/alloc_inode/fs/ocfs2/super.c	2005-05-21 03:32:25 UTC (rev 2303)
@@ -36,6 +36,7 @@
 #include <linux/blkdev.h>
 #include <linux/socket.h>
 #include <linux/inet.h>
+#include <linux/parser.h>
 
 #include <cluster/nodemanager.h>
 
@@ -79,27 +80,28 @@
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
 
+static int ocfs2_parse_options(char *options, unsigned long *mount_opt);
 static void ocfs_put_super(struct super_block *sb);
-static int ocfs_mount_volume(struct super_block *sb);
-static void ocfs_dismount_volume(struct super_block *sb);
+static int ocfs_mount_volume(struct super_block *sb, unsigned long mount_opt);
+static void ocfs_dismount_volume(struct super_block *sb, int mnt_err);
 static int ocfs_initialize_mem_caches(void);
 static void ocfs_free_mem_caches(void);
-static void ocfs_delete_osb(ocfs_super * osb);
+static void ocfs_delete_osb(ocfs_super *osb);
 
 static int ocfs_statfs(struct super_block *sb, struct kstatfs *buf);
 
-static int ocfs_sync_fs(struct super_block *sb, int wait);
+static int ocfs2_sync_fs(struct super_block *sb, int wait);
 
 static int ocfs_init_global_system_inodes(ocfs_super *osb);
 static int ocfs_init_local_system_inodes(ocfs_super *osb);
 static int ocfs_release_system_inodes(ocfs_super *osb);
 static int ocfs2_fill_local_node_info(ocfs_super *osb);
-static int ocfs_check_volume(ocfs_super * osb);
+static int ocfs_check_volume(ocfs_super *osb);
 static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
 			      u32 sectsize);
 static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh);
 static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size);
-static void ocfs_write_super(struct super_block * sb);
+static void ocfs_write_super(struct super_block *sb);
 static struct inode *ocfs2_alloc_inode(struct super_block *sb);
 static void ocfs2_destroy_inode(struct inode *inode);
 
@@ -111,7 +113,7 @@
 	.destroy_inode	= ocfs2_destroy_inode,
 	.clear_inode	= ocfs2_clear_inode,
 	.delete_inode	= ocfs_delete_inode, 
-	.sync_fs	= ocfs_sync_fs,
+	.sync_fs	= ocfs2_sync_fs,
 	.write_super	= ocfs_write_super,
 	.put_super	= ocfs_put_super,
 };
@@ -121,21 +123,44 @@
 	.get_parent	= ocfs_get_parent,
 };
 
+enum {
+	Opt_hbok,
+	Opt_barrier,
+	Opt_err,
+};
+
+static match_table_t tokens = {
+	{Opt_hbok, OCFS2_HB_OK},
+	{Opt_barrier, "barrier=%u"},
+	{Opt_err, NULL}
+};
+
 /*
  * write_super and sync_fs ripped right out of ext3.
  */
-static void ocfs_write_super (struct super_block * sb)
+static void ocfs_write_super(struct super_block *sb)
 {
 	if (down_trylock(&sb->s_lock) == 0)
 		BUG();
 	sb->s_dirt = 0;
 }
 
-static int ocfs_sync_fs(struct super_block *sb, int wait)
+static int ocfs2_sync_fs(struct super_block *sb, int wait)
 {
+	int status = 0;
 	tid_t target;
+	ocfs_super *osb = OCFS2_SB(sb);
 
 	sb->s_dirt = 0;
+
+	if (wait) {
+		status = ocfs2_flush_truncate_log(osb);
+		if (status < 0)
+			mlog_errno(status);
+	} else {
+		ocfs2_schedule_truncate_log_flush(osb, 0);
+	}
+
 	if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) {
 		if (wait)
 			log_wait_commit(OCFS2_SB(sb)->journal->j_journal, 
@@ -208,7 +233,8 @@
 		if (!new) {
 			ocfs_release_system_inodes(osb);
 			status = -EINVAL;
-			mlog_errno(status);
+			mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
+			     status, i, osb->slot_num);
 			goto bail;
 		}
 		// the array now has one ref, so drop this one
@@ -244,6 +270,12 @@
 		osb->sys_root_inode = NULL;
 	}
 
+	inode = osb->root_inode;
+	if (inode) {
+		iput(inode);
+		osb->root_inode = NULL;
+	}
+
 	mlog_exit(status);
 	return status;
 } /* ocfs_release_system_inodes */
@@ -306,8 +338,9 @@
 	int status;
 	struct inode *inode = NULL;
 	ocfs_super *osb = NULL;
+	unsigned long mount_opt = 0;
 
-	mlog_entry ("%p, %p, %i", sb, data, silent);
+	mlog_entry("%p, %p, %i", sb, data, silent);
 
 	/* for now we only have one cluster/node, make sure we see it
 	 * in the heartbeat universe */
@@ -316,24 +349,30 @@
 		goto read_super_error;
 	}
 
-	/* Stopgap check to ensure that mount.ocfs2 mounted the volume */
-	if (!data || strcmp(data, OCFS2_HB_OK)) {
+	if (!ocfs2_parse_options(data, &mount_opt)) {
 		status = -EINVAL;
-		if (data)
-			mlog(ML_ERROR, "Invalid options: %s\n", (char *)data);
 		goto read_super_error;
 	}
 
+	/* Stopgap check to ensure that mount.ocfs2 mounted the volume */
+	if (!(mount_opt & OCFS2_MOUNT_HB_OK)) {
+		  mlog(ML_ERROR, "No heartbeat for device (%s)\n", sb->s_id);
+		  status = -EINVAL;
+		  goto read_super_error;
+	}
+
 	sb->s_magic = OCFS2_SUPER_MAGIC;
 	sb->s_op = &ocfs_sops;
 	sb->s_export_op = &ocfs_export_ops;
 	sb->s_flags |= MS_NOATIME;
 	sb->s_fs_info = NULL;
 
-	status = ocfs_mount_volume(sb);
+	status = ocfs_mount_volume(sb, mount_opt);
 	/* ocfs_mount_volume may set osb even on error so we want to
 	 * pull it off for proper cleanup. */
 	osb = OCFS2_SB(sb);
+	if (osb && osb->root_inode)
+		inode = igrab(osb->root_inode);
 
 	if (status < 0)
 		goto read_super_error;
@@ -346,19 +385,16 @@
 		goto read_super_error;
 	}
 
-	inode = osb->root_inode;
 	if (!inode) {
 		status = -EIO;
 		mlog_errno(status);
 		goto read_super_error;
 	}
 
-	root = d_alloc_root (inode);
+	root = d_alloc_root(inode);
 	if (!root) {
 		status = -ENOMEM;
 		mlog_errno(status);
-		iput (inode);
-		inode = NULL;
 		goto read_super_error;
 	}
 
@@ -377,15 +413,15 @@
 	return status;		
 
 read_super_error:
+	if (inode)
+		iput(inode);
+
 	if (osb) {
 		atomic_set(&osb->vol_state, VOLUME_DISABLED);
 		wake_up(&osb->osb_mount_event);
-		ocfs_dismount_volume (sb);
+		ocfs_dismount_volume(sb, 1);
 	}
 
-	if (inode)
-		iput (inode);
-
 	mlog_exit(status);
 	return status;
 }
@@ -407,11 +443,61 @@
 	.next           = NULL
 };
 
+static int ocfs2_parse_options(char *options, unsigned long *mount_opt)
+{
+	int status;
+	char *p;
+
+	mlog_entry("options: \"%s\"\n", options ? options : "(none)");
+
+	if (!options) {
+		status = 1;
+		goto bail;
+	}
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token, option;
+		substring_t args[MAX_OPT_ARGS];
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_hbok:
+			*mount_opt |= OCFS2_MOUNT_HB_OK;
+			break;
+		case Opt_barrier:
+			if (match_int(&args[0], &option)) {
+				status = 0;
+				goto bail;
+			}
+			if (option)
+				*mount_opt |= OCFS2_MOUNT_BARRIER;
+			else
+				*mount_opt &= ~OCFS2_MOUNT_BARRIER;
+			break;
+		default:
+			mlog(ML_ERROR,
+			     "Unrecognized mount option \"%s\" "
+			     "or missing value\n", p);
+			status = 0;
+			goto bail;
+		}
+	}
+
+	status = 1;
+
+bail:
+	mlog_exit(status);
+	return status;
+}
+
 static int __init ocfs2_init(void)
 {
 	int status;
 
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	ocfs2_print_version();
 
@@ -425,9 +511,9 @@
 		goto leave;
 	}
 	
-	spin_lock (&ocfs2_globals_lock);
+	spin_lock(&ocfs2_globals_lock);
 	osb_id = 0;
-	spin_unlock (&ocfs2_globals_lock);
+	spin_unlock(&ocfs2_globals_lock);
 
 	/* Initialize the proc interface */
 	ocfs_proc_init();
@@ -438,10 +524,10 @@
 		exit_ocfs2_extent_maps();
 	}
 
-	mlog_exit (status);
+	mlog_exit(status);
 
 	if (status >= 0) {
-		return register_filesystem (&ocfs_fs_type);
+		return register_filesystem(&ocfs_fs_type);
 	} else
 		return -1;
 }
@@ -449,18 +535,18 @@
 static void __exit ocfs2_exit(void)
 {
 
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	ocfs_free_mem_caches();
 
 	/* Deinit the proc interface */
-	ocfs_proc_deinit ();
+	ocfs_proc_deinit();
 
-	unregister_filesystem (&ocfs_fs_type);
+	unregister_filesystem(&ocfs_fs_type);
 
 	exit_ocfs2_extent_maps();
 
-	mlog_exit_void ();
+	mlog_exit_void();
 }
 
 
@@ -468,14 +554,14 @@
  * ocfs_put_super()
  *
  */
-static void ocfs_put_super (struct super_block *sb)
+static void ocfs_put_super(struct super_block *sb)
 {
-	mlog_entry ("(0x%p)\n", sb);
+	mlog_entry("(0x%p)\n", sb);
 
 	ocfs_sync_blockdev(sb);
-	ocfs_dismount_volume (sb);
+	ocfs_dismount_volume(sb, 0);
 
-	mlog_exit_void ();
+	mlog_exit_void();
 }				/* ocfs_put_super */
 
 
@@ -493,7 +579,7 @@
 	struct buffer_head *bh = NULL;
 	struct inode *inode = NULL;
 
-	mlog_entry ("(%p, %p)\n", sb, buf);
+	mlog_entry("(%p, %p)\n", sb, buf);
 
 	osb = OCFS2_SB(sb);
 
@@ -534,9 +620,9 @@
 	status = 0;
 bail:
 	if (inode)
-		iput (inode);
+		iput(inode);
 
-	mlog_exit (status);
+	mlog_exit(status);
 
 	return status;
 }                               /* ocfs_statfs */
@@ -653,8 +739,8 @@
 		     hdr->major_version, hdr->minor_version);
 		status = -EINVAL;
 	}
-	if (memcmp (hdr->signature, OCFS1_VOLUME_SIGNATURE,
-		    strlen (OCFS1_VOLUME_SIGNATURE)) == 0) {
+	if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE,
+		   strlen(OCFS1_VOLUME_SIGNATURE)) == 0) {
 		mlog(ML_ERROR, "incompatible volume signature: %8s\n",
 		     hdr->signature);
 		status = -EINVAL;
@@ -742,14 +828,14 @@
  * ocfs_mount_volume()
  *
  */
-static int ocfs_mount_volume (struct super_block *sb)
+static int ocfs_mount_volume(struct super_block *sb, unsigned long mount_opt)
 {
 	int status, sector_size;
 	int unlock_super = 0;
 	ocfs_super *osb = NULL;
 	struct buffer_head *bh = NULL;
 
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	/* probe for superblock */
 	status = ocfs2_sb_probe(sb, &bh, &sector_size);
@@ -768,6 +854,8 @@
 	sb->s_fs_info = osb;
 	osb->sb = sb;
 
+	osb->s_mount_opt = mount_opt;
+
 	/* Save off for ocfs_rw_direct */
 	osb->s_sectsize_bits = blksize_bits(sector_size);
 	if (!osb->s_sectsize_bits)
@@ -829,12 +917,12 @@
 	}
 
 	/* Add proc entry for this volume */
-	ocfs_proc_add_volume (osb);
+	ocfs_proc_add_volume(osb);
 
 	/* Read the publish sector for this node and cleanup dirent being */
 	/* modified when we crashed. */
 	mlog(0, "ocfs_check_volume...\n");
-	status = ocfs_check_volume (osb);
+	status = ocfs_check_volume(osb);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -861,7 +949,7 @@
 
 	if (bh != NULL)
 		brelse(bh);
-	mlog_exit (status);
+	mlog_exit(status);
 	return status;
 }				/* ocfs_mount_volume */
 
@@ -879,12 +967,12 @@
  * ocfs_dismount_volume()
  *
  */
-static void ocfs_dismount_volume (struct super_block *sb)
+static void ocfs_dismount_volume(struct super_block *sb, int mnt_err)
 {
 	int tmp;
 	ocfs_super *osb = NULL;
 
-	mlog_entry ("(0x%p)\n", sb);
+	mlog_entry("(0x%p)\n", sb);
 
 	OCFS_ASSERT(sb);
 	osb = OCFS2_SB(sb);
@@ -928,7 +1016,8 @@
 		if (tmp < 0)
 			mlog_errno(tmp);
 
-		ocfs2_put_slot(osb);
+		if (osb->slot_num != OCFS_INVALID_NODE_NUM)
+			ocfs2_put_slot(osb);
 	}
 
 	ocfs_release_system_inodes(osb);
@@ -941,7 +1030,8 @@
 
 	ocfs2_clear_hb_callbacks(osb);
 
-	ocfs2_stop_heartbeat(osb);
+	if (!mnt_err)
+		ocfs2_stop_heartbeat(osb);
 
 	atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
 
@@ -951,6 +1041,7 @@
 	ocfs_delete_osb(osb);
 	kfree(osb);
 	sb->s_dev = 0;
+	sb->s_fs_info = NULL;
 }				/* ocfs_dismount_volume */
 
 static int osb_setup_uuid(ocfs_super *osb, const unsigned char *uuid,
@@ -972,7 +1063,7 @@
 		ret = snprintf(ptr, 3, "%02X", uuid[i]);
 		if (ret != 2) /* drop super cleans up */
 			return -EINVAL;
-		/* then only advace past the last char */
+		/* then only advance past the last char */
 		ptr += 2;
 	}
 
@@ -992,7 +1083,7 @@
 	struct buffer_head *bitmap_bh = NULL;
 	ocfs_journal *journal;
 
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	INIT_LIST_HEAD(&osb->osb_net_handlers);
 	init_waitqueue_head(&osb->recovery_event);
@@ -1023,14 +1114,14 @@
 	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
 		 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 
-	init_MUTEX (&(osb->recovery_lock));
+	init_MUTEX(&osb->recovery_lock);
 
 	osb->disable_recovery = 0;
 	osb->recovery_thread_task = NULL;
 
-	init_waitqueue_head (&osb->checkpoint_event);
-	atomic_set (&osb->needs_checkpoint, 0);
-	atomic_set (&osb->clean_buffer_seq, 1);
+	init_waitqueue_head(&osb->checkpoint_event);
+	atomic_set(&osb->needs_checkpoint, 0);
+	atomic_set(&osb->clean_buffer_seq, 1);
 
 	osb->node_num = OCFS_INVALID_NODE_NUM;
 	osb->slot_num = OCFS_INVALID_NODE_NUM;
@@ -1038,14 +1129,17 @@
 	osb->local_alloc_state = OCFS2_LA_UNUSED;
 	osb->local_alloc_bh = NULL;
 
+	ocfs2_setup_hb_callbacks(osb);
+
 	init_waitqueue_head(&osb->osb_mount_event);
 	
-	osb->vol_label = kmalloc(64, GFP_KERNEL);
+	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
 	if (!osb->vol_label) {
 		mlog(ML_ERROR, "unable to alloc vol label\n");
 		status = -ENOMEM;
 		goto bail;
 	}
+
 	osb->uuid = kmalloc(OCFS2_VOL_UUID_LEN, GFP_KERNEL);
 	if (!osb->uuid) {
 		mlog(ML_ERROR, "unable to alloc uuid\n");
@@ -1124,11 +1218,19 @@
 	journal->j_state = OCFS_JOURNAL_FREE;
 
 	/* get some pseudo constants for clustersize bits */
-	osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
+	osb->s_clustersize_bits =
+		le32_to_cpu(di->id2.i_super.s_clustersize_bits);
 	osb->s_clustersize = 1 << osb->s_clustersize_bits;
 	mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits);
-	OCFS_ASSERT(osb->s_clustersize_bits);
 
+	if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE ||
+	    osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) {
+		mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n",
+		     osb->s_clustersize);
+		status = -EINVAL;
+		goto bail;
+	}
+
 	if (ocfs2_clusters_to_blocks(osb->sb, di->i_clusters - 1)
 	    > (u32)~0UL) {
 		mlog(ML_ERROR, "Volume might try to write to blocks beyond "
@@ -1148,14 +1250,15 @@
 	osb->vol_label[63] = '\0';
 	osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
 	osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
-	osb->first_cluster_group_blkno = le64_to_cpu(di->id2.i_super.s_first_cluster_group);
+	osb->first_cluster_group_blkno =
+		le64_to_cpu(di->id2.i_super.s_first_cluster_group);
 	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
 	mlog(0, "vol_label: %s\n", osb->vol_label);
 	mlog(0, "uuid: %s\n", osb->uuid_str);
 	mlog(0, "root_blkno=%"MLFu64", system_dir_blkno=%"MLFu64"\n",
 	     osb->root_blkno, osb->system_dir_blkno);
 
-	atomic_set (&osb->vol_state, VOLUME_INIT);
+	atomic_set(&osb->vol_state, VOLUME_INIT);
 
 	/* load root, system_dir, and all global system inodes */
 	status = ocfs_init_global_system_inodes(osb);
@@ -1175,8 +1278,9 @@
 	}
 
 	if (inode->i_size >> osb->sb->s_blocksize_bits < OCFS2_MAX_NODES) {
-		mlog(ML_ERROR, "heartbeat area size incorrect: found=%llu, "
-		     "need=%u\n", inode->i_size,
+		mlog(ML_ERROR,
+		     "heartbeat area size incorrect: found=%llu, need=%u\n",
+		     inode->i_size,
 		     OCFS2_MAX_NODES << osb->sb->s_blocksize_bits);
 		status = -EINVAL;
 		goto bail;
@@ -1223,15 +1327,13 @@
 	if (osb_id < OCFS_MAX_OSB_ID)
 		osb_id++;
 	else {
-		spin_unlock (&ocfs2_globals_lock);
 		mlog(ML_ERROR, "Too many volumes mounted\n");
 		status = -ENOMEM;
-		goto bail;
 	}
-	spin_unlock (&ocfs2_globals_lock);
+	spin_unlock(&ocfs2_globals_lock);
 
 bail:
-	mlog_exit (status);
+	mlog_exit(status);
 	return status;
 }				/* ocfs_initialize_osb */
 
@@ -1246,7 +1348,7 @@
 {
 	int status = -EAGAIN;
 
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
 		   strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
@@ -1289,7 +1391,7 @@
 		}
 	}
 
-	mlog_exit (status);
+	mlog_exit(status);
 	return status;
 }				/* ocfs_verify_volume */
 
@@ -1297,7 +1399,7 @@
  * ocfs_check_volume()
  *
  */
-static int ocfs_check_volume (ocfs_super * osb)
+static int ocfs_check_volume(ocfs_super *osb)
 {
 	int status = 0;
 	int dirty;
@@ -1305,7 +1407,7 @@
 					   * recover
 					   * ourselves. */
 
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	/* Init our journal object. */
 	status = ocfs_journal_init(osb->journal, &dirty);
@@ -1371,7 +1473,7 @@
 	if (local_alloc)
 		kfree(local_alloc);
 
-	mlog_exit (status);
+	mlog_exit(status);
 	return status;
 }				/* ocfs_check_volume */
 
@@ -1384,9 +1486,9 @@
  * It will remove the osb from the global list and also free up all the
  * initialized resources and fileobject.
  */
-static void ocfs_delete_osb (ocfs_super * osb)
+static void ocfs_delete_osb(ocfs_super *osb)
 {
-	mlog_entry_void ();
+	mlog_entry_void();
 
 	/* This function assumes that the caller has the main osb resource */
 
@@ -1402,9 +1504,9 @@
 	if (osb->local_alloc_copy)
 		kfree(osb->local_alloc_copy);
 	kfree(osb->uuid_str);
-	memset (osb, 0, sizeof (ocfs_super));
+	memset(osb, 0, sizeof(ocfs_super));
 
-	mlog_exit_void ();
+	mlog_exit_void();
 }				/* ocfs_delete_osb */