[Ocfs2-commits] jlbec commits r2269 - in branches/alloc_inode/fs/ocfs2: . cluster dlm

Tue May 17 19:19:39 CDT 2005

Author: jlbec
Date: 2005-05-17 19:19:37 -0500 (Tue, 17 May 2005)
New Revision: 2269

Modified:
   branches/alloc_inode/fs/ocfs2/alloc.c
   branches/alloc_inode/fs/ocfs2/alloc.h
   branches/alloc_inode/fs/ocfs2/cluster/nodemanager.c
   branches/alloc_inode/fs/ocfs2/dlm/dlmast.c
   branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c
   branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.h
   branches/alloc_inode/fs/ocfs2/dlm/dlmmaster.c
   branches/alloc_inode/fs/ocfs2/file.c
   branches/alloc_inode/fs/ocfs2/inode.c
   branches/alloc_inode/fs/ocfs2/journal.c
   branches/alloc_inode/fs/ocfs2/journal.h
   branches/alloc_inode/fs/ocfs2/ocfs.h
   branches/alloc_inode/fs/ocfs2/ocfs2_fs.h
   branches/alloc_inode/fs/ocfs2/super.c
Log:

o Merged 2255:2267 from trunk:
	- [2257] Big endian config of IP address.
	- [2259] Fix third node crash.
	- [2261] Kill the failing node on mastery.
	- [2264] Inform surviving nodes of other node asserts.
	- [2265] Per-node truncate log.



Modified: branches/alloc_inode/fs/ocfs2/alloc.c
===================================================================

--- branches/alloc_inode/fs/ocfs2/alloc.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/alloc.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -90,13 +90,6 @@
 				      struct buffer_head *old_last_eb,
 				      struct buffer_head **new_last_eb);
 
-static int ocfs_do_truncate(ocfs_super *osb, 
-			    unsigned int clusters_to_del,
-			    struct inode *inode,
-			    struct buffer_head *fe_bh,
-			    struct buffer_head *old_last_eb_bh,
-			    ocfs2_truncate_context *tc);
-
 static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
 			      u64 blkno)
 {
@@ -862,6 +855,449 @@
 	return status;
 }
 
+static inline int ocfs2_truncate_log_needs_flush(ocfs_super *osb)
+{
+	struct buffer_head *tl_bh = osb->osb_tl_bh;
+	ocfs2_dinode *di;
+	ocfs2_truncate_log *tl;
+
+	di = (ocfs2_dinode *) tl_bh->b_data;
+	tl = &di->id2.i_dealloc;
+
+	mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
+			"slot %d, invalid truncate log parameters: used = "
+			"%u, count = %u\n", osb->slot_num,
+			le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
+	return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
+}
+
+static int ocfs2_truncate_log_append(ocfs_super *osb,
+				     ocfs_journal_handle *handle,
+				     u64 start_blk,
+				     unsigned int num_clusters)
+{
+	int status, index;
+	unsigned int start_cluster, tl_count;
+	struct inode *tl_inode = osb->osb_tl_inode;
+	struct buffer_head *tl_bh = osb->osb_tl_bh;
+	ocfs2_dinode *di;
+	ocfs2_truncate_log *tl;
+
+	mlog_entry();
+
+	BUG_ON(!down_trylock(&tl_inode->i_sem));
+
+	start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
+
+	di = (ocfs2_dinode *) tl_bh->b_data;
+	tl = &di->id2.i_dealloc;
+	OCFS2_BUG_ON_INVALID_DINODE(di);
+	tl_count = le16_to_cpu(tl->tl_count);
+	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 
+			tl_count == 0,
+			"Truncate record count on #%"MLFu64" invalid ("
+			"wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno,
+			ocfs2_truncate_recs_per_inode(osb->sb),
+			le16_to_cpu(tl->tl_count));
+
+	/* Caller should have known to flush before calling us. */
+	index = le16_to_cpu(tl->tl_used);
+	if (index >= tl_count) {
+		status = -ENOSPC;
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = ocfs_journal_access(handle, tl_inode, tl_bh,
+				     OCFS_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	mlog(0, "Log truncate of %u clusters starting at cluster %u to "
+	     "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
+	     OCFS2_I(tl_inode)->ip_blkno, index);
+
+	/* TODO: Do we bother searching the truncate records for a
+	 * contiguous one and coalesce? */
+	tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
+	tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
+	tl->tl_used = cpu_to_le16(index + 1);
+
+	status = ocfs_journal_dirty(handle, tl_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+bail:
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_replay_truncate_records(ocfs_super *osb,
+					 ocfs_journal_handle *handle,
+					 struct inode *data_alloc_inode,
+					 struct buffer_head *data_alloc_bh)
+{
+	int status = 0;
+	int i;
+	unsigned int num_clusters;
+	u64 start_blk;
+	ocfs2_truncate_rec rec;
+	ocfs2_dinode *di;
+	ocfs2_truncate_log *tl;
+	struct inode *tl_inode = osb->osb_tl_inode;
+	struct buffer_head *tl_bh = osb->osb_tl_bh;
+
+	mlog_entry();
+
+	di = (ocfs2_dinode *) tl_bh->b_data;
+	tl = &di->id2.i_dealloc;
+	i = le16_to_cpu(tl->tl_used) - 1;
+	while (i >= 0) {
+		/* Caller has given us at least enough credits to
+		 * update the truncate log dinode */
+		status = ocfs_journal_access(handle, tl_inode, tl_bh,
+					     OCFS_JOURNAL_ACCESS_WRITE);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		tl->tl_used = cpu_to_le16(i);
+
+		status = ocfs_journal_dirty(handle, tl_bh);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		/* TODO: Perhaps we can calculate the bulk of the
+		 * credits up front rather than extending like
+		 * this. */
+		status = ocfs_extend_trans(handle,
+					   OCFS_TRUNCATE_LOG_FLUSH_ONE_REC);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		rec = tl->tl_recs[i];
+		start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
+						    le32_to_cpu(rec.t_start));
+		num_clusters = le32_to_cpu(rec.t_clusters);
+
+		mlog(0, "free record %d, start = %u, clusters = %u\n", i,
+		     le32_to_cpu(rec.t_start), num_clusters);
+
+		status = ocfs_free_clusters(handle, data_alloc_inode,
+					    data_alloc_bh, start_blk,
+					    num_clusters);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		i--;
+	}
+
+bail:
+	mlog_exit(status);
+	return status;
+}
+
+/* Expects you to already be holding tl_inode->i_sem */
+static int ocfs2_flush_truncate_log(ocfs_super *osb)
+{
+	int status;
+	unsigned int num_to_flush;
+	ocfs_journal_handle *handle = NULL;
+	struct inode *tl_inode = osb->osb_tl_inode;
+	struct inode *data_alloc_inode = NULL;
+	struct buffer_head *tl_bh = osb->osb_tl_bh;
+	struct buffer_head *data_alloc_bh = NULL;
+	ocfs2_dinode *di;
+	ocfs2_truncate_log *tl;
+
+	mlog_entry();
+
+	BUG_ON(!down_trylock(&tl_inode->i_sem));
+
+	di = (ocfs2_dinode *) tl_bh->b_data;
+	tl = &di->id2.i_dealloc;
+	OCFS2_BUG_ON_INVALID_DINODE(di);
+
+	num_to_flush = le32_to_cpu(tl->tl_used);
+	mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
+	     num_to_flush, OCFS2_I(tl_inode)->ip_blkno);
+	if (!num_to_flush) {
+		status = 0;
+		goto bail;
+	}
+
+	handle = ocfs_alloc_handle(osb);
+	if (!handle) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
+	data_alloc_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+	if (!data_alloc_inode) {
+		status = -EINVAL;
+		mlog(ML_ERROR, "Could not get bitmap inode!\n");
+		goto bail;
+	}
+
+	ocfs_handle_add_inode(handle, data_alloc_inode);
+	status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	handle = ocfs_start_trans(osb, handle, OCFS_TRUNCATE_LOG_UPDATE);
+	if (!handle) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+					       data_alloc_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+bail:
+	if (handle)
+		ocfs_commit_trans(handle);
+
+	if (data_alloc_inode)
+		iput(data_alloc_inode);
+
+	if (data_alloc_bh)
+		brelse(data_alloc_bh);
+
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_get_truncate_log_info(ocfs_super *osb,
+				       int slot_num,
+				       struct inode **tl_inode,
+				       struct buffer_head **tl_bh)
+{
+	int status;
+	struct inode *inode = NULL;
+	struct buffer_head *bh = NULL;
+
+	inode = ocfs_get_system_file_inode(osb,
+					   TRUNCATE_LOG_SYSTEM_INODE,
+					   slot_num);
+	if (!inode) {
+		status = -EINVAL;
+		mlog(ML_ERROR, "Could not get load truncate log inode!\n");
+		goto bail;
+	}
+
+	status = ocfs_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
+				 OCFS_BH_CACHED, inode);
+	if (status < 0) {
+		iput(inode);
+		mlog_errno(status);
+		goto bail;
+	}
+
+	*tl_inode = inode;
+	*tl_bh    = bh;
+bail:
+	mlog_exit(status);
+	return status;
+}
+
+/* called during the 1st stage of node recovery. we stamp a clean
+ * truncate log and pass back a copy for processing later. if the
+ * truncate log does not require processing, a *tl_copy is set to
+ * NULL. */
+int ocfs2_begin_truncate_log_recovery(ocfs_super *osb,
+				      int slot_num,
+				      ocfs2_dinode **tl_copy)
+{
+	int status;
+	struct inode *tl_inode = NULL;
+	struct buffer_head *tl_bh = NULL;
+	ocfs2_dinode *di;
+	ocfs2_truncate_log *tl;
+
+	*tl_copy = NULL;
+
+	mlog(0, "recover truncate log from slot %d\n", slot_num);
+
+	status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	di = (ocfs2_dinode *) tl_bh->b_data;
+	tl = &di->id2.i_dealloc;
+	OCFS2_BUG_ON_INVALID_DINODE(di);
+
+	if (le16_to_cpu(tl->tl_used)) {
+		mlog(0, "We'll have %u logs to recover\n",
+		     le16_to_cpu(tl->tl_used));
+
+		*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
+		if (!(*tl_copy)) {
+			status = -ENOMEM;
+			mlog_errno(status);
+			goto bail;
+		}
+
+		/* Assuming the write-out below goes well, this copy
+		 * will be passed back to recovery for processing. */
+		memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
+
+		/* All we need to do to clear the truncate log is set
+		 * tl_used. */
+		tl->tl_used = 0;
+
+		status = ocfs_write_block(osb, tl_bh, tl_inode);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
+bail:
+	if (tl_inode)
+		iput(tl_inode);
+	if (tl_bh)
+		brelse(tl_bh);
+
+	if (status < 0 && (*tl_copy)) {
+		kfree(*tl_copy);
+		*tl_copy = NULL;
+	}
+
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_complete_truncate_log_recovery(ocfs_super *osb,
+					 ocfs2_dinode *tl_copy)
+{
+	int status = 0;
+	int i;
+	unsigned int clusters, num_recs, start_cluster;
+	u64 start_blk;
+	ocfs_journal_handle *handle;
+	struct inode *tl_inode = osb->osb_tl_inode;
+	ocfs2_truncate_log *tl;
+
+	mlog_entry();
+
+	if (OCFS2_I(tl_inode)->ip_blkno == tl_copy->i_blkno) {
+		mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
+		return -EINVAL;
+	}
+
+	tl = &tl_copy->id2.i_dealloc;
+	num_recs = le16_to_cpu(tl->tl_used);
+	mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
+	     tl_copy->i_blkno);
+
+	down(&tl_inode->i_sem);
+	for(i = 0; i < num_recs; i++) {
+		if (ocfs2_truncate_log_needs_flush(osb)) {
+			status = ocfs2_flush_truncate_log(osb);
+			if (status < 0) {
+				mlog_errno(status);
+				goto bail_up;
+			}
+		}
+
+		handle = ocfs_start_trans(osb, NULL,
+					   OCFS_TRUNCATE_LOG_UPDATE);
+		if (!handle) {
+			status = -ENOMEM;
+			mlog_errno(status);
+			goto bail_up;
+		}
+
+		clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
+		start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
+		start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
+
+		status = ocfs2_truncate_log_append(osb, handle,
+						   start_blk, clusters);
+		ocfs_commit_trans(handle);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail_up;
+		}
+	}
+
+bail_up:
+	up(&tl_inode->i_sem);
+
+	mlog_exit(status);
+	return status;
+}
+
+void ocfs2_truncate_log_shutdown(ocfs_super *osb)
+{
+	int status;
+	struct inode *tl_inode = osb->osb_tl_inode;
+
+	mlog_entry();
+
+	if (!tl_inode)
+		return;
+
+	down(&tl_inode->i_sem);
+
+	status = ocfs2_flush_truncate_log(osb);
+	if (status)
+		mlog_errno(status);
+
+	up(&tl_inode->i_sem);
+
+	brelse(osb->osb_tl_bh);
+	iput(osb->osb_tl_inode);
+
+	mlog_exit_void();
+}
+
+int ocfs2_truncate_log_init(ocfs_super *osb)
+{
+	int status;
+	struct inode *tl_inode = NULL;
+	struct buffer_head *tl_bh = NULL;
+
+	mlog_entry();
+
+	status = ocfs2_get_truncate_log_info(osb,
+					     osb->slot_num,
+					     &tl_inode,
+					     &tl_bh);
+	if (status < 0)
+		mlog_errno(status);
+
+	/* ocfs2_truncate_log_shutdown keys on the existence of
+	 * osb->osb_tl_inode so we don't set any of the osb variables
+	 * until we're sure all is well. */
+	osb->osb_tl_inode = tl_inode;
+	osb->osb_tl_bh    = tl_bh;
+
+	mlog_exit(status);
+	return status;
+}
+
 /* This function will figure out whether the currently last extent
  * block will be deleted, and if it will, what the new last extent
  * block will be so we can update his h_next_leaf_blk field, as well
@@ -946,10 +1382,10 @@
 			    struct inode *inode,
 			    struct buffer_head *fe_bh,
 			    struct buffer_head *old_last_eb_bh,
+			    ocfs_journal_handle *handle,
 			    ocfs2_truncate_context *tc)
 {
 	int status, i, depth;
-	ocfs_journal_handle *handle;
 	ocfs2_dinode *fe;
 	ocfs2_extent_block *eb;
 	ocfs2_extent_block *last_eb = NULL;
@@ -959,7 +1395,6 @@
 	u64 next_eb = 0;
 	u64 delete_blk = 0;
 
-	handle = tc->tc_handle;
 	fe = (ocfs2_dinode *) fe_bh->b_data;
 
 	status = ocfs_find_new_last_ext_blk(osb, 
@@ -1125,10 +1560,9 @@
 		depth--;
 	}
 
-	OCFS_ASSERT(delete_blk);
-	status = ocfs_free_clusters(handle, tc->tc_bitmap_inode,
-				    tc->tc_bitmap_bh, delete_blk, 
-				    clusters_to_del);
+	BUG_ON(!delete_blk);
+	status = ocfs2_truncate_log_append(osb, handle, delete_blk,
+					   clusters_to_del);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1149,22 +1583,22 @@
  *
  * This will start, restart and commit your handle for you.
  *
- * WARNING: This will gobble the contexts reference to last_eb_bh
- *          *and* the journal handle.
+ * WARNING: This will kfree the truncate context
  */
 int ocfs_commit_truncate(ocfs_super *osb,
 			 struct inode *inode,
 			 struct buffer_head *fe_bh,
 			 ocfs2_truncate_context *tc)
 {
-	int status, i, credits;
+	int status, i, credits, tl_sem = 0;
 	unsigned int clusters_to_del, target_i_clusters;
 	u64 last_eb = 0;
 	ocfs2_dinode *fe;
 	ocfs2_extent_block *eb;
 	ocfs2_extent_list *el;
 	struct buffer_head *last_eb_bh;
-	ocfs_journal_handle *handle;
+	ocfs_journal_handle *handle = NULL;
+	struct inode *tl_inode = osb->osb_tl_inode;
 
 	mlog_entry_void();
 
@@ -1178,7 +1612,6 @@
 
 	last_eb_bh = tc->tc_last_eb_bh;
 	tc->tc_last_eb_bh = NULL;
-	handle = tc->tc_handle;
 
 	fe = (ocfs2_dinode *) fe_bh->b_data;
 
@@ -1231,49 +1664,73 @@
 				   + el->l_recs[i].e_cpos) - target_i_clusters;
 
 	mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
-	credits = ocfs_calc_tree_trunc_credits(osb->sb, clusters_to_del, 
-					       fe, el);
-	if (!ocfs_handle_started(handle)) {
-		handle = ocfs_start_trans(osb, handle, credits);
-		if (!handle) {
-			status = -ENOMEM;
-			mlog_errno(status);
-			goto bail;
-		}
-		/* Since we got our cluster lock from caller and we
-		 * don't add it to the handle: */
-		ocfs_set_inode_lock_trans(osb->journal, inode);
 
-		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-		status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
-		if (status < 0)
-			mlog_errno(status);
-	} else {
-		status = ocfs_extend_trans(handle, credits);
+	down(&tl_inode->i_sem);
+	tl_sem = 1;
+	/* ocfs2_truncate_log_needs_flush guarantees us at least one
+	 * record is free for use. If there isn't any, we flush to get
+	 * an empty truncate log.  */
+	if (ocfs2_truncate_log_needs_flush(osb)) {
+		status = ocfs2_flush_truncate_log(osb);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
 		}
 	}
 
+	credits = ocfs_calc_tree_trunc_credits(osb->sb, clusters_to_del, 
+					       fe, el);
+	handle = ocfs_start_trans(osb, NULL, credits);
+	if (!handle) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
+	/* None of the cluster locks for a truncate are added
+	 * to the handle, so we update the last transaction
+	 * info manually. */
+	if (tc->tc_ext_alloc_inode)
+		ocfs_set_inode_lock_trans(osb->journal,
+					  tc->tc_ext_alloc_inode);
+	ocfs_set_inode_lock_trans(osb->journal, inode);
+
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
+	if (status < 0)
+		mlog_errno(status);
+
 	status = ocfs_do_truncate(osb, clusters_to_del, inode, fe_bh, 
-				  last_eb_bh, tc);
+				  last_eb_bh, handle, tc);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
+	up(&tl_inode->i_sem);
+	tl_sem = 0;
+
+	ocfs_commit_trans(handle);
+	handle = NULL;
+
 	OCFS_ASSERT(fe->i_clusters >= target_i_clusters);
 	if (fe->i_clusters > target_i_clusters)
 		goto start;
 bail:
 	up_write(&OCFS_I(inode)->ip_alloc_sem);
-	ocfs_commit_trans(handle);
-	tc->tc_handle = NULL;
 
+	if (tl_sem)
+		up(&tl_inode->i_sem);
+
+	if (handle)
+		ocfs_commit_trans(handle);
+
 	if (last_eb_bh)
 		brelse(last_eb_bh);
 
+	/* This will drop the ext_alloc cluster lock for us */
+	ocfs_free_truncate_context(tc);
+
 	mlog_exit(status);
 	return status;
 }
@@ -1291,15 +1748,12 @@
 {
 	int status, metadata_delete;
 	unsigned int new_i_clusters;
-	ocfs_journal_handle *handle = NULL;
 	ocfs2_dinode *fe;
 	ocfs2_extent_block *eb;
 	ocfs2_extent_list *el;
 	struct buffer_head *last_eb_bh = NULL;
 	struct inode *ext_alloc_inode = NULL;
 	struct buffer_head *ext_alloc_bh = NULL;
-	struct inode *data_alloc_inode = NULL;
-	struct buffer_head *data_alloc_bh = NULL;
 
 	mlog_entry_void();
 
@@ -1322,13 +1776,6 @@
 	}
 	memset(*tc, 0, sizeof(ocfs2_truncate_context));
 
-	handle = ocfs_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	metadata_delete = 0;
 	if (fe->id2.i_list.l_tree_depth) {
 		/* If we have a tree, then the truncate may result in
@@ -1347,6 +1794,8 @@
 			metadata_delete = 1;
 	}
 
+	(*tc)->tc_last_eb_bh = last_eb_bh;
+
 	if (metadata_delete) {
 		mlog(0, "Will have to delete metadata for this trunc. "
 		     "locking allocator.\n");
@@ -1357,51 +1806,24 @@
 			goto bail;
 		}
 
-		ocfs_handle_add_inode(handle, ext_alloc_inode);
+		down(&ext_alloc_inode->i_sem);
+		(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
+
 		status = ocfs2_meta_lock(ext_alloc_inode,
-					handle,
+					NULL,
 					&ext_alloc_bh,
 					1);
 		if (status < 0) {
 			mlog_errno (status);
 			goto bail;
 		}
+		(*tc)->tc_ext_alloc_bh = ext_alloc_bh;
+		(*tc)->tc_ext_alloc_locked = 1;
 	}
 
-	data_alloc_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
-	if (!data_alloc_inode) {
-		status = -EINVAL;
-		mlog(ML_ERROR, "Could not get bitmap inode!\n");
-		goto bail;
-	}
-
-	ocfs_handle_add_inode(handle, data_alloc_inode);
-	status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
-	if (status < 0) {
-		mlog_errno (status);
-		goto bail;
-	}
-
-	(*tc)->tc_bitmap_inode    = data_alloc_inode;
-	(*tc)->tc_bitmap_bh       = data_alloc_bh;
-	(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
-	(*tc)->tc_ext_alloc_bh    = ext_alloc_bh;
-	(*tc)->tc_last_eb_bh      = last_eb_bh;
-	(*tc)->tc_handle          = handle;
+	status = 0;
 bail:
 	if (status < 0) {
-		if (handle)
-			ocfs_commit_trans(handle);
-		if (last_eb_bh)
-			brelse(last_eb_bh);
-		if (ext_alloc_inode)
-			iput(ext_alloc_inode);
-		if (data_alloc_inode)
-			iput(data_alloc_inode);
-		if (ext_alloc_bh)
-			brelse(ext_alloc_bh);
-		if (data_alloc_bh)
-			brelse(data_alloc_bh);
 		if (*tc)
 			ocfs_free_truncate_context(*tc);
 		*tc = NULL;
@@ -1412,19 +1834,19 @@
 
 void ocfs_free_truncate_context(ocfs2_truncate_context *tc)
 {
-	if (tc->tc_bitmap_inode)
-		iput(tc->tc_bitmap_inode);
-	if (tc->tc_bitmap_bh)
-		brelse(tc->tc_bitmap_bh);
-	if (tc->tc_ext_alloc_inode)
+	if (tc->tc_ext_alloc_inode) {
+		if (tc->tc_ext_alloc_locked)
+			ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
+
+		up(&tc->tc_ext_alloc_inode->i_sem);
 		iput(tc->tc_ext_alloc_inode);
+	}
+
 	if (tc->tc_ext_alloc_bh)
 		brelse(tc->tc_ext_alloc_bh);
+
 	if (tc->tc_last_eb_bh)
 		brelse(tc->tc_last_eb_bh);
-	if (tc->tc_handle) {
-		OCFS_ASSERT(!ocfs_handle_started(tc->tc_handle));
-		ocfs_commit_trans(tc->tc_handle);
-	}
+
 	kfree(tc);
 }

Modified: branches/alloc_inode/fs/ocfs2/alloc.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/alloc.h	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/alloc.h	2005-05-18 00:19:37 UTC (rev 2269)
@@ -51,14 +51,20 @@
 	return fe->id2.i_list.l_tree_depth + 2;
 }
 
+int ocfs2_truncate_log_init(ocfs_super *osb);
+void ocfs2_truncate_log_shutdown(ocfs_super *osb);
+int ocfs2_begin_truncate_log_recovery(ocfs_super *osb,
+				      int slot_num,
+				      ocfs2_dinode **tl_copy);
+int ocfs2_complete_truncate_log_recovery(ocfs_super *osb,
+					 ocfs2_dinode *tl_copy);
+
 typedef struct _ocfs2_truncate_context {
-	struct inode *tc_bitmap_inode;
-	struct buffer_head *tc_bitmap_bh;
 	struct inode *tc_ext_alloc_inode;
 	struct buffer_head *tc_ext_alloc_bh;
+	int tc_ext_alloc_locked; /* is it cluster locked? */
 	/* these get destroyed once it's passed to ocfs_commit_truncate. */
 	struct buffer_head *tc_last_eb_bh;
-	ocfs_journal_handle *tc_handle;
 } ocfs2_truncate_context;
 
 void ocfs_free_truncate_context(ocfs2_truncate_context *tc);

Modified: branches/alloc_inode/fs/ocfs2/cluster/nodemanager.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/cluster/nodemanager.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/cluster/nodemanager.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -361,8 +361,8 @@
 	unsigned int octets[4];
 	u32 ipv4_addr = 0; /* network order */
 
-	ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[0], &octets[1],
-		     &octets[2], &octets[3]);
+	ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[3], &octets[2],
+		     &octets[1], &octets[0]);
 	if (ret != 4)
 		return -EINVAL;
 
@@ -371,6 +371,7 @@
 			return -ERANGE;
 		ipv4_addr |= octets[i] << (i * 8);
 	}
+	ipv4_addr = htonl(ipv4_addr);
 
 	ret = 0;
 	write_lock(&cluster->cl_nodes_lock);

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmast.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmast.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmast.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -52,7 +52,44 @@
 
 static void dlm_update_lvb(dlm_ctxt *dlm, dlm_lock_resource *res,
 			   dlm_lock *lock);
+static int dlm_should_cancel_bast(dlm_ctxt *dlm, dlm_lock *lock);
 
+/* Should be called as an ast gets queued to see if the new
+ * lock level will obsolete a pending bast.
+ * For example, if dlm_thread queued a bast for an EX lock that 
+ * was blocking another EX, but before sending the bast the 
+ * lock owner downconverted to NL, the bast is now obsolete. 
+ * Only the ast should be sent. 
+ * This is needed because the lock and convert paths can queue
+ * asts out-of-band (not waiting for dlm_thread) in order to 
+ * allow for LKM_NOQUEUE to get immediate responses. */
+static int dlm_should_cancel_bast(dlm_ctxt *dlm, dlm_lock *lock)
+{
+	assert_spin_locked(&dlm->ast_lock);
+	assert_spin_locked(&lock->spinlock);
+
+	if (lock->ml.highest_blocked == LKM_IVMODE)
+		return 0;
+	DLM_ASSERT(lock->ml.highest_blocked != LKM_NLMODE);
+
+	if (lock->bast_pending && 
+	    list_empty(&lock->bast_list))
+		/* old bast already sent, ok */
+		return 0;
+	
+	if (lock->ml.type == LKM_EXMODE)
+		/* EX blocks anything left, any bast still valid */
+		return 0;
+	else if (lock->ml.type == LKM_NLMODE)
+		/* NL blocks nothing, no reason to send any bast, cancel it */
+		return 1;
+	else if (lock->ml.highest_blocked != LKM_EXMODE)
+		/* PR only blocks EX */
+		return 1;
+
+	return 0;
+}
+
 void __dlm_queue_ast(dlm_ctxt *dlm, dlm_lock *lock)
 {
 	mlog_entry_void();
@@ -68,6 +105,13 @@
 	/* putting lock on list, add a ref */
 	dlm_lock_get(lock);
 	spin_lock(&lock->spinlock);
+
+	/* check to see if this ast obsoletes the bast */
+	if (dlm_should_cancel_bast(dlm, lock)) {
+		lock->bast_pending = 0;
+		list_del_init(&lock->bast_list);
+		lock->ml.highest_blocked = LKM_IVMODE;
+	}
 	list_add_tail(&lock->ast_list, &dlm->pending_asts);
 	lock->ast_pending = 1;
 	spin_unlock(&lock->spinlock);

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -41,12 +41,12 @@
 #include "dlmdebug.h"
 
 #include "dlmdomain.h"
+#include "dlmdebug.h"
 
 #define MLOG_MASK_PREFIX ML_DLM
 #include "cluster/masklog.h"
 
 static void dlm_dump_all_lock_resources(const char __user *data, int len);
-static void dlm_dump_lock_resources(dlm_ctxt *dlm);
 static void dlm_dump_purge_list(dlm_ctxt *dlm);
 static void dlm_dump_all_purge_lists(const char __user *data, int len);
 static void dlm_trigger_migration(const char __user *data, int len);
@@ -124,7 +124,7 @@
 	spin_unlock(&dlm_domain_lock);
 }
 
-static void dlm_dump_lock_resources(dlm_ctxt *dlm)
+void dlm_dump_lock_resources(dlm_ctxt *dlm)
 {
 	dlm_lock_resource *res;
 	dlm_lock *lock;

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.h	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmdebug.h	2005-05-18 00:19:37 UTC (rev 2269)
@@ -26,5 +26,6 @@
 #define DLMDEBUG_H
 
 void dlm_create_dlm_debug_proc_entry(void);
+void dlm_dump_lock_resources(dlm_ctxt *dlm);
 
 #endif

Modified: branches/alloc_inode/fs/ocfs2/dlm/dlmmaster.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/dlm/dlmmaster.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/dlm/dlmmaster.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -46,6 +46,7 @@
 
 #include "dlmapi.h"
 #include "dlmcommon.h"
+#include "dlmdebug.h"
 
 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
 #include "cluster/masklog.h"
@@ -134,7 +135,8 @@
 
 
 static int dlm_wait_for_lock_mastery(dlm_ctxt *dlm, dlm_lock_resource *res, 
-				     dlm_master_list_entry *mle);
+				     dlm_master_list_entry *mle,
+				     int blocked);
 static int dlm_restart_lock_mastery(dlm_ctxt *dlm, dlm_lock_resource *res,
 				    dlm_master_list_entry *mle);
 static int dlm_add_migration_mle(dlm_ctxt *dlm, 
@@ -693,7 +695,7 @@
 	/* keep going until the response map includes all nodes */
 	ret = -EAGAIN;
 	while (ret == -EAGAIN) {
-		ret = dlm_wait_for_lock_mastery(dlm, res, mle);
+		ret = dlm_wait_for_lock_mastery(dlm, res, mle, blocked);
 		if (ret == -EINVAL) {
 			mlog(ML_ERROR, "some error occurred.  restarting "
 			     "lock mastery!\n");
@@ -719,7 +721,8 @@
 
 
 static int dlm_wait_for_lock_mastery(dlm_ctxt *dlm, dlm_lock_resource *res, 
-				     dlm_master_list_entry *mle)
+				     dlm_master_list_entry *mle,
+				     int blocked)
 {
 	u8 m;
 	int ret = 0, tmpret, bit;
@@ -765,7 +768,7 @@
 	} else {
 		sleep = 1;
 		/* have all nodes responded? */
-		if (voting_done) {
+		if (voting_done && !blocked) {
 			bit = find_next_bit(mle->maybe_map, NM_MAX_NODES, 0);
 			if (dlm->node_num <= bit) {
 				/* my node number is lowest.
@@ -1152,10 +1155,10 @@
 			ret = tmpret;
 			break;
 		} else if (r < 0) {
-			/* nothing returns this yet */
 			/* ok, something horribly messed.  kill thyself. */
 			mlog(ML_ERROR,"during assert master of %.*s to %u, "
 			     "got %d.\n", namelen, lockname, to, r);
+			dlm_dump_lock_resources(dlm);
 			BUG();
 		}
 	}
@@ -1240,7 +1243,7 @@
 					  "%u! (%.*s)\n",
 				       assert->node_idx, res->owner,
 				       namelen, name);
-				BUG();
+				goto kill;
 			}
 		} else if (mle->type != DLM_MLE_MIGRATION) {
 			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
@@ -1248,7 +1251,7 @@
 				     "node %u, but %u is the owner! "
 				     "(%.*s)\n", assert->node_idx, 
 				     res->owner, namelen, name);
-				BUG();
+				goto kill;
 			}
 			if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
 				mlog(ML_ERROR, "got assert from %u, but lock "
@@ -1256,7 +1259,7 @@
 				     "in-progress! (%.*s)\n", 
 				     assert->node_idx,
 				     namelen, name);
-				BUG();
+				goto kill;
 			}
 		} else /* mle->type == DLM_MLE_MIGRATION */ {
 			/* should only be getting an assert from new master */
@@ -1266,7 +1269,7 @@
 				     "was %u (%.*s)\n", 
 				     assert->node_idx, mle->new_master,
 				     mle->master, namelen, name);
-				BUG();
+				goto kill;
 			}
 
 		}
@@ -1303,6 +1306,16 @@
 done:
 	dlm_put(dlm);
 	return 0;
+
+kill:
+	/* kill the caller! */
+	spin_unlock(&res->spinlock);
+	spin_unlock(&dlm->spinlock);
+	mlog(ML_ERROR, "Bad message received from another node.  Dumping state "
+	     "and killing the other node now!  This node is OK and can continue.\n");
+	dlm_dump_lock_resources(dlm);
+	dlm_put(dlm);
+	return -EINVAL;
 }
 
 int dlm_dispatch_assert_master(dlm_ctxt *dlm, dlm_lock_resource *res, 

Modified: branches/alloc_inode/fs/ocfs2/file.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/file.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/file.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -539,9 +539,6 @@
 	if (fe_bh)
 		brelse(fe_bh);
 
-	if (tc)
-		ocfs_free_truncate_context(tc);
-
 	mlog_exit (status);
 	return status;
 }  /* ocfs_truncate_file */

Modified: branches/alloc_inode/fs/ocfs2/inode.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/inode.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/inode.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -482,9 +482,6 @@
 	if (handle)
 		ocfs_commit_trans(handle);
 
-	if (tc)
-		ocfs_free_truncate_context(tc);
-
 	mlog_exit (status);
 	return status;
 }

Modified: branches/alloc_inode/fs/ocfs2/journal.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/journal.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/journal.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -846,7 +846,8 @@
 struct ocfs2_la_recovery_item {
 	struct list_head	lri_list;
 	int			lri_slot;
-	ocfs2_dinode		*lri_dinode;
+	ocfs2_dinode		*lri_la_dinode;
+	ocfs2_dinode		*lri_tl_dinode;
 };
 
 /* Does the second half of the recovery process. By this point, the
@@ -864,7 +865,7 @@
 	int ret;
 	ocfs_super *osb = data;
 	ocfs_journal *journal = osb->journal;
-	ocfs2_dinode *la_dinode;
+	ocfs2_dinode *la_dinode, *tl_dinode;
 	struct ocfs2_la_recovery_item *item;
 	struct list_head *p, *n;
 	LIST_HEAD(tmp_la_list);
@@ -883,7 +884,7 @@
 
 		mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
 
-		la_dinode = item->lri_dinode;
+		la_dinode = item->lri_la_dinode;
 		if (la_dinode) {
 			mlog(0, "Clean up local alloc %"MLFu64"\n",
 			     la_dinode->i_blkno);
@@ -896,6 +897,19 @@
 			kfree(la_dinode);
 		}
 
+		tl_dinode = item->lri_tl_dinode;
+		if (tl_dinode) {
+			mlog(0, "Clean up truncate log %"MLFu64"\n",
+			     tl_dinode->i_blkno);
+
+			ret = ocfs2_complete_truncate_log_recovery(osb,
+								   tl_dinode);
+			if (ret < 0)
+				mlog_errno(ret);
+
+			kfree(tl_dinode);
+		}
+
 		ret = ocfs_recover_orphans(osb, item->lri_slot);
 		if (ret < 0)
 			mlog_errno(ret);
@@ -908,11 +922,13 @@
 	mlog_exit_void();
 }
 
-/* NOTE: This function always eats the reference to la_dinode, either
- * manually on error, or by passing it to ocfs2_complete_recovery */
+/* NOTE: This function always eats your references to la_dinode and
+ * tl_dinode, either manually on error, or by passing them to
+ * ocfs2_complete_recovery */
 static void ocfs2_queue_recovery_completion(ocfs_journal *journal,
 					    int slot_num,
-					    ocfs2_dinode *la_dinode)
+					    ocfs2_dinode *la_dinode,
+					    ocfs2_dinode *tl_dinode)
 {
 	struct ocfs2_la_recovery_item *item;
 
@@ -924,13 +940,17 @@
 		if (la_dinode)
 			kfree(la_dinode);
 
+		if (tl_dinode)
+			kfree(tl_dinode);
+
 		mlog_errno(-ENOMEM);
 		return;
 	}
 
 	INIT_LIST_HEAD(&item->lri_list);
-	item->lri_dinode = la_dinode;
+	item->lri_la_dinode = la_dinode;
 	item->lri_slot = slot_num;
+	item->lri_tl_dinode = tl_dinode;
 
 	spin_lock(&journal->j_lock);
 	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -945,9 +965,12 @@
 	ocfs_journal *journal = osb->journal;
 
 	if (osb->dirty) {
+		/* No need to queue up our truncate_log as regular
+		 * cleanup will catch that. */
 		ocfs2_queue_recovery_completion(journal,
 						osb->slot_num,
-						osb->local_alloc_copy);
+						osb->local_alloc_copy,
+						NULL);
 		osb->local_alloc_copy = NULL;
 		osb->dirty = 0;
 	}
@@ -998,7 +1021,8 @@
 	/* We always run recovery on our own orphan dir - the dead
 	 * node(s) may have voted "no" on an inode delete earlier. A
 	 * revote is therefore required. */
-	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL);
+	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
+					NULL);
 
 bail:
 	down(&osb->recovery_lock);
@@ -1172,16 +1196,13 @@
  * Do the most important parts of node recovery:
  *  - Replay it's journal
  *  - Stamp a clean local allocator file
+ *  - Stamp a clean truncate log
  *  - Mark the node clean
  *
  * If this function completes without error, a node in OCFS2 can be
  * said to have been safely recovered. As a result, failure during the
  * second part of a nodes recovery process (local alloc recovery) is
  * far less concerning.
- * 
- * A copy of the nodes local alloc file is passed back so unused space
- * can be reclaimed once all nodes are recovered. This must be kfree'd
- * by the caller.
  */
 static int ocfs_recover_node(ocfs_super *osb,
 			     int node_num)
@@ -1190,11 +1211,12 @@
 	int slot_num;
 	ocfs2_slot_info *si = osb->slot_info;
 	ocfs2_dinode *la_copy = NULL;
+	ocfs2_dinode *tl_copy = NULL;
 
 	mlog_entry("(node_num=%d, osb->node_num = %d)\n",
 		       node_num, osb->node_num);
 
-	mlog(0, "ocfs2_recover_node: checking node %d\n", node_num);
+	mlog(0, "checking node %d\n", node_num);
 
 	/* Should not ever be called to recover ourselves -- in that
 	 * case we should've called ocfs_journal_load instead. */
@@ -1204,13 +1226,11 @@
 	slot_num = ocfs2_node_num_to_slot(si, node_num);
 	if (slot_num == OCFS_INVALID_NODE_NUM) {
 		status = 0;
-		mlog(0, "ocfs2_recover_node: no slot for this node, so "
-		     "no recovery required.\n");
+		mlog(0, "no slot for this node, so no recovery required.\n");
 		goto done;
 	}
 
-	mlog(0, "ocfs2_recover_node: node %d was using slot %d\n",
-	     node_num, slot_num);
+	mlog(0, "node %d was using slot %d\n", node_num, slot_num);
 
 	status = ocfs2_replay_journal(osb, node_num, slot_num);
 	if (status < 0) {
@@ -1225,15 +1245,23 @@
 		goto done;
 	}
 
-	/* This would be a strange but ultimately not so harmful place
-	 * to get an error... */
+	/* An error from begin_truncate_log_recovery is not
+	 * serious enough to warrant halting the rest of
+	 * recovery. */
+	status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
+	if (status < 0)
+		mlog_errno(status);
+
+	/* Likewise, this would be a strange but ultimately not so
+	 * harmful place to get an error... */
 	ocfs2_clear_slot(si, slot_num);
 	status = ocfs2_update_disk_slots(osb, si);
 	if (status < 0)
 		mlog_errno(status);
 
-	/* This will gobble the memory pointed to by la_copy */
-	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy);
+	/* This will kfree the memory pointed to by la_copy and tl_copy */
+	ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
+					tl_copy);
 
 	status = 0;
 done:

Modified: branches/alloc_inode/fs/ocfs2/journal.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/journal.h	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/journal.h	2005-05-18 00:19:37 UTC (rev 2269)
@@ -333,6 +333,10 @@
 /* dinode + group descriptor update. We don't relink on free yet. */
 #define OCFS_SUBALLOC_FREE  (2)
 
+#define OCFS_TRUNCATE_LOG_UPDATE OCFS_INODE_UPDATE_CREDITS
+#define OCFS_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS_SUBALLOC_FREE 		      \
+					 + OCFS_TRUNCATE_LOG_UPDATE)
+
 /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
  * bitmap block for the new bit) */
 #define OCFS_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
@@ -422,9 +426,9 @@
 					       ocfs2_dinode *fe,
 					       ocfs2_extent_list *last_el)
 {
- 	/* for file entry + all headers in this pass + update to next leaf */
+ 	/* for dinode + all headers in this pass + update to next leaf */
 	int credits = 1 + fe->id2.i_list.l_tree_depth + 1;
-	int bitmap_blocks, i;
+	int i;
 
 	i = last_el->l_next_free_rec - 1;
 	OCFS_ASSERT(i >= 0);
@@ -436,11 +440,9 @@
 	    && ((last_el->l_recs[i].e_clusters - clusters_to_del) == 0))
 		credits += 1 + fe->id2.i_list.l_tree_depth;
 
-	/* bitmap fe + group descriptor */
-	bitmap_blocks = OCFS_SUBALLOC_FREE;
+	/* update to the truncate log. */
+	credits += OCFS_TRUNCATE_LOG_UPDATE;
 
-	credits += bitmap_blocks;
-
 	return credits;
 }
 

Modified: branches/alloc_inode/fs/ocfs2/ocfs.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/ocfs.h	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/ocfs.h	2005-05-18 00:19:37 UTC (rev 2269)
@@ -356,6 +356,10 @@
 	wait_queue_head_t		osb_okp_pending_wq;
 
 	wait_queue_head_t		osb_mount_event;
+
+	/* Truncate log info */
+	struct inode			*osb_tl_inode;
+	struct buffer_head		*osb_tl_bh;
 };
 
 #define NAMEI_RA_CHUNKS  2

Modified: branches/alloc_inode/fs/ocfs2/ocfs2_fs.h
===================================================================
--- branches/alloc_inode/fs/ocfs2/ocfs2_fs.h	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/ocfs2_fs.h	2005-05-18 00:19:37 UTC (rev 2269)
@@ -116,6 +116,7 @@
 #define OCFS2_JOURNAL_FL	(0x00000100)	/* Node journal */
 #define OCFS2_HEARTBEAT_FL	(0x00000200)	/* Heartbeat area */
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
+#define OCFS2_DEALLOC_FL	(0x00000800)	/* Truncate log */
 
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
@@ -163,6 +164,7 @@
 	INODE_ALLOC_SYSTEM_INODE,
 	JOURNAL_SYSTEM_INODE,
 	LOCAL_ALLOC_SYSTEM_INODE,
+	TRUNCATE_LOG_SYSTEM_INODE,
 	NUM_SYSTEM_INODES
 };
 
@@ -182,7 +184,8 @@
 	[EXTENT_ALLOC_SYSTEM_INODE]		= { "extent_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
 	[INODE_ALLOC_SYSTEM_INODE]		= { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
 	[JOURNAL_SYSTEM_INODE]			= { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
-	[LOCAL_ALLOC_SYSTEM_INODE]		= { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }
+	[LOCAL_ALLOC_SYSTEM_INODE]		= { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
+	[TRUNCATE_LOG_SYSTEM_INODE]		= { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
 };
 
 /* Parameter passed from mount.ocfs2 to module */
@@ -253,6 +256,11 @@
 	__u64 c_blkno;	/* Physical disk offset (blocks) of 1st group */
 } ocfs2_chain_rec;
 
+typedef struct _ocfs2_truncate_rec {
+	__u32 t_start;		/* 1st cluster in this log */
+	__u32 t_clusters;	/* Number of total clusters covered */
+} ocfs2_truncate_rec;
+
 /*
  * On disk extent list for OCFS2 (node in the tree).  Note that this
  * is contained inside ocfs2_dinode or ocfs2_extent_block, so the
@@ -287,6 +295,18 @@
 } ocfs2_chain_list;
 
 /*
+ * On disk deallocation log for OCFS2.  Note that this is
+ * contained inside ocfs2_dinode, so the offsets are relative to
+ * ocfs2_dinode.id2.i_dealloc.
+ */
+typedef struct _ocfs2_truncate_log {
+/*00*/	__u16 tl_count;			/* Total records in this log */
+	__u16 tl_used;			/* Number of records in use */
+	__u32 tl_reserved1;
+/*08*/	ocfs2_truncate_rec tl_recs[0];	/* Truncate records */
+} ocfs2_truncate_log;
+
+/*
  * On disk extent block (indirect block) for OCFS2
  */
 typedef struct _ocfs2_extent_block
@@ -403,11 +423,12 @@
 		} journal1;
 	} id1;				/* Inode type dependant 1 */
 /*C0*/	union {
-		ocfs2_super_block i_super;
-		ocfs2_local_alloc i_lab;
-		ocfs2_chain_list  i_chain;
-		ocfs2_extent_list i_list;
-		__u8              i_symlink[0];
+		ocfs2_super_block  i_super;
+		ocfs2_local_alloc  i_lab;
+		ocfs2_chain_list   i_chain;
+		ocfs2_extent_list  i_list;
+		ocfs2_truncate_log i_dealloc;
+		__u8               i_symlink[0];
 	} id2;
 /* Actual on-disk size is one block */
 } ocfs2_dinode;
@@ -503,6 +524,16 @@
 
 	return size;
 }
+
+static inline int ocfs2_truncate_recs_per_inode(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct _ocfs2_dinode, id2.i_dealloc.tl_recs);
+
+	return size / sizeof(struct _ocfs2_truncate_rec);
+}
 #else
 static inline int ocfs2_fast_symlink_chars(int blocksize)
 {
@@ -558,6 +589,16 @@
 
 	return size;
 }
+
+static inline int ocfs2_truncate_recs_per_inode(int blocksize)
+{
+	int size;
+
+	size = blocksize -
+		offsetof(struct _ocfs2_dinode, id2.i_dealloc.tl_recs);
+
+	return size / sizeof(struct _ocfs2_truncate_rec);
+}
 #endif  /* __KERNEL__ */
 
 

Modified: branches/alloc_inode/fs/ocfs2/super.c
===================================================================
--- branches/alloc_inode/fs/ocfs2/super.c	2005-05-17 23:57:46 UTC (rev 2268)
+++ branches/alloc_inode/fs/ocfs2/super.c	2005-05-18 00:19:37 UTC (rev 2269)
@@ -840,6 +840,12 @@
 		goto leave;
 	}
 
+	status = ocfs2_truncate_log_init(osb);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
 	/* This should be sent *after* we recovered our journal as it
 	 * will cause other nodes to unmark us as needing
 	 * recovery. However, we need to send it *before* dropping the
@@ -888,6 +894,8 @@
 
 	ocfs_shutdown_local_alloc(osb);
 
+	ocfs2_truncate_log_shutdown(osb);
+
 	/* disable any new recovery threads and wait for any currently
 	 * running ones to exit. Do this before setting the vol_state. */
 	down(&osb->recovery_lock);