[Ocfs2-commits] mfasheh commits r2265 - trunk/fs/ocfs2
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Tue May 17 17:00:37 CDT 2005
Author: mfasheh
Signed-off-by: jlbec
Date: 2005-05-17 17:00:36 -0500 (Tue, 17 May 2005)
New Revision: 2265
Modified:
trunk/fs/ocfs2/alloc.c
trunk/fs/ocfs2/alloc.h
trunk/fs/ocfs2/file.c
trunk/fs/ocfs2/inode.c
trunk/fs/ocfs2/journal.c
trunk/fs/ocfs2/journal.h
trunk/fs/ocfs2/ocfs.h
trunk/fs/ocfs2/ocfs2_fs.h
trunk/fs/ocfs2/super.c
Log:
* support a per node truncate log, this speeds up multi-node truncate
performance, making it roughly equivalent to single node in time.
Signed-off-by: jlbec
Modified: trunk/fs/ocfs2/alloc.c
===================================================================
--- trunk/fs/ocfs2/alloc.c 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/alloc.c 2005-05-17 22:00:36 UTC (rev 2265)
@@ -90,13 +90,6 @@
struct buffer_head *old_last_eb,
struct buffer_head **new_last_eb);
-static int ocfs_do_truncate(ocfs_super *osb,
- unsigned int clusters_to_del,
- struct inode *inode,
- struct buffer_head *fe_bh,
- struct buffer_head *old_last_eb_bh,
- ocfs2_truncate_context *tc);
-
static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
u64 blkno)
{
@@ -862,6 +855,449 @@
return status;
}
+static inline int ocfs2_truncate_log_needs_flush(ocfs_super *osb)
+{
+ struct buffer_head *tl_bh = osb->osb_tl_bh;
+ ocfs2_dinode *di;
+ ocfs2_truncate_log *tl;
+
+ di = (ocfs2_dinode *) tl_bh->b_data;
+ tl = &di->id2.i_dealloc;
+
+ mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
+ "slot %d, invalid truncate log parameters: used = "
+ "%u, count = %u\n", osb->slot_num,
+ le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
+ return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
+}
+
+static int ocfs2_truncate_log_append(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ u64 start_blk,
+ unsigned int num_clusters)
+{
+ int status, index;
+ unsigned int start_cluster, tl_count;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ struct buffer_head *tl_bh = osb->osb_tl_bh;
+ ocfs2_dinode *di;
+ ocfs2_truncate_log *tl;
+
+ mlog_entry();
+
+ BUG_ON(!down_trylock(&tl_inode->i_sem));
+
+ start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
+
+ di = (ocfs2_dinode *) tl_bh->b_data;
+ tl = &di->id2.i_dealloc;
+ OCFS2_BUG_ON_INVALID_DINODE(di);
+ tl_count = le16_to_cpu(tl->tl_count);
+ mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
+ tl_count == 0,
+ "Truncate record count on #%"MLFu64" invalid ("
+ "wanted %u, actual %u\n", OCFS_I(tl_inode)->ip_blkno,
+ ocfs2_truncate_recs_per_inode(osb->sb),
+ le16_to_cpu(tl->tl_count));
+
+ /* Caller should have known to flush before calling us. */
+ index = le16_to_cpu(tl->tl_used);
+ if (index >= tl_count) {
+ status = -ENOSPC;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ status = ocfs_journal_access(handle, tl_inode, tl_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ mlog(0, "Log truncate of %u clusters starting at cluster %u to "
+ "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
+ OCFS_I(tl_inode)->ip_blkno, index);
+
+ /* TODO: Do we bother searching the truncate records for a
+ * contiguous one and coalesce? */
+ tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
+ tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
+ tl->tl_used = cpu_to_le16(index + 1);
+
+ status = ocfs_journal_dirty(handle, tl_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+bail:
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_replay_truncate_records(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *data_alloc_inode,
+ struct buffer_head *data_alloc_bh)
+{
+ int status = 0;
+ int i;
+ unsigned int num_clusters;
+ u64 start_blk;
+ ocfs2_truncate_rec rec;
+ ocfs2_dinode *di;
+ ocfs2_truncate_log *tl;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ struct buffer_head *tl_bh = osb->osb_tl_bh;
+
+ mlog_entry();
+
+ di = (ocfs2_dinode *) tl_bh->b_data;
+ tl = &di->id2.i_dealloc;
+ i = le16_to_cpu(tl->tl_used) - 1;
+ while (i >= 0) {
+ /* Caller has given us at least enough credits to
+ * update the truncate log dinode */
+ status = ocfs_journal_access(handle, tl_inode, tl_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ tl->tl_used = cpu_to_le16(i);
+
+ status = ocfs_journal_dirty(handle, tl_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ /* TODO: Perhaps we can calculate the bulk of the
+ * credits up front rather than extending like
+ * this. */
+ status = ocfs_extend_trans(handle,
+ OCFS_TRUNCATE_LOG_FLUSH_ONE_REC);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ rec = tl->tl_recs[i];
+ start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
+ le32_to_cpu(rec.t_start));
+ num_clusters = le32_to_cpu(rec.t_clusters);
+
+ mlog(0, "free record %d, start = %u, clusters = %u\n", i,
+ le32_to_cpu(rec.t_start), num_clusters);
+
+ status = ocfs_free_clusters(handle, data_alloc_inode,
+ data_alloc_bh, start_blk,
+ num_clusters);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ i--;
+ }
+
+bail:
+ mlog_exit(status);
+ return status;
+}
+
+/* Expects you to already be holding tl_inode->i_sem */
+static int ocfs2_flush_truncate_log(ocfs_super *osb)
+{
+ int status;
+ unsigned int num_to_flush;
+ ocfs_journal_handle *handle = NULL;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ struct inode *data_alloc_inode = NULL;
+ struct buffer_head *tl_bh = osb->osb_tl_bh;
+ struct buffer_head *data_alloc_bh = NULL;
+ ocfs2_dinode *di;
+ ocfs2_truncate_log *tl;
+
+ mlog_entry();
+
+ BUG_ON(!down_trylock(&tl_inode->i_sem));
+
+ di = (ocfs2_dinode *) tl_bh->b_data;
+ tl = &di->id2.i_dealloc;
+ OCFS2_BUG_ON_INVALID_DINODE(di);
+
+ num_to_flush = le32_to_cpu(tl->tl_used);
+ mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
+ num_to_flush, OCFS_I(tl_inode)->ip_blkno);
+ if (!num_to_flush) {
+ status = 0;
+ goto bail;
+ }
+
+ handle = ocfs_alloc_handle(osb);
+ if (!handle) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ data_alloc_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+ if (!data_alloc_inode) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "Could not get bitmap inode!\n");
+ goto bail;
+ }
+
+ ocfs_handle_add_inode(handle, data_alloc_inode);
+ status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ handle = ocfs_start_trans(osb, handle, OCFS_TRUNCATE_LOG_UPDATE);
+ if (!handle) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+ data_alloc_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+bail:
+ if (handle)
+ ocfs_commit_trans(handle);
+
+ if (data_alloc_inode)
+ iput(data_alloc_inode);
+
+ if (data_alloc_bh)
+ brelse(data_alloc_bh);
+
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_get_truncate_log_info(ocfs_super *osb,
+ int slot_num,
+ struct inode **tl_inode,
+ struct buffer_head **tl_bh)
+{
+ int status;
+ struct inode *inode = NULL;
+ struct buffer_head *bh = NULL;
+
+ inode = ocfs_get_system_file_inode(osb,
+ TRUNCATE_LOG_SYSTEM_INODE,
+ slot_num);
+ if (!inode) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "Could not get load truncate log inode!\n");
+ goto bail;
+ }
+
+ status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &bh,
+ OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ iput(inode);
+ mlog_errno(status);
+ goto bail;
+ }
+
+ *tl_inode = inode;
+ *tl_bh = bh;
+bail:
+ mlog_exit(status);
+ return status;
+}
+
+/* called during the 1st stage of node recovery. we stamp a clean
+ * truncate log and pass back a copy for processing later. if the
+ * truncate log does not require processing, a *tl_copy is set to
+ * NULL. */
+int ocfs2_begin_truncate_log_recovery(ocfs_super *osb,
+ int slot_num,
+ ocfs2_dinode **tl_copy)
+{
+ int status;
+ struct inode *tl_inode = NULL;
+ struct buffer_head *tl_bh = NULL;
+ ocfs2_dinode *di;
+ ocfs2_truncate_log *tl;
+
+ *tl_copy = NULL;
+
+ mlog(0, "recover truncate log from slot %d\n", slot_num);
+
+ status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ di = (ocfs2_dinode *) tl_bh->b_data;
+ tl = &di->id2.i_dealloc;
+ OCFS2_BUG_ON_INVALID_DINODE(di);
+
+ if (le16_to_cpu(tl->tl_used)) {
+ mlog(0, "We'll have %u logs to recover\n",
+ le16_to_cpu(tl->tl_used));
+
+ *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
+ if (!(*tl_copy)) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ /* Assuming the write-out below goes well, this copy
+ * will be passed back to recovery for processing. */
+ memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
+
+ /* All we need to do to clear the truncate log is set
+ * tl_used. */
+ tl->tl_used = 0;
+
+ status = ocfs_write_block(osb, tl_bh, tl_inode);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
+bail:
+ if (tl_inode)
+ iput(tl_inode);
+ if (tl_bh)
+ brelse(tl_bh);
+
+ if (status < 0 && (*tl_copy)) {
+ kfree(*tl_copy);
+ *tl_copy = NULL;
+ }
+
+ mlog_exit(status);
+ return status;
+}
+
+int ocfs2_complete_truncate_log_recovery(ocfs_super *osb,
+ ocfs2_dinode *tl_copy)
+{
+ int status = 0;
+ int i;
+ unsigned int clusters, num_recs, start_cluster;
+ u64 start_blk;
+ ocfs_journal_handle *handle;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ ocfs2_truncate_log *tl;
+
+ mlog_entry();
+
+ if (OCFS_I(tl_inode)->ip_blkno == tl_copy->i_blkno) {
+ mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
+ return -EINVAL;
+ }
+
+ tl = &tl_copy->id2.i_dealloc;
+ num_recs = le16_to_cpu(tl->tl_used);
+ mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
+ tl_copy->i_blkno);
+
+ down(&tl_inode->i_sem);
+ for(i = 0; i < num_recs; i++) {
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ status = ocfs2_flush_truncate_log(osb);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_up;
+ }
+ }
+
+ handle = ocfs_start_trans(osb, NULL,
+ OCFS_TRUNCATE_LOG_UPDATE);
+ if (!handle) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail_up;
+ }
+
+ clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
+ start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
+ start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
+
+ status = ocfs2_truncate_log_append(osb, handle,
+ start_blk, clusters);
+ ocfs_commit_trans(handle);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_up;
+ }
+ }
+
+bail_up:
+ up(&tl_inode->i_sem);
+
+ mlog_exit(status);
+ return status;
+}
+
+void ocfs2_truncate_log_shutdown(ocfs_super *osb)
+{
+ int status;
+ struct inode *tl_inode = osb->osb_tl_inode;
+
+ mlog_entry();
+
+ if (!tl_inode)
+ return;
+
+ down(&tl_inode->i_sem);
+
+ status = ocfs2_flush_truncate_log(osb);
+ if (status)
+ mlog_errno(status);
+
+ up(&tl_inode->i_sem);
+
+ brelse(osb->osb_tl_bh);
+ iput(osb->osb_tl_inode);
+
+ mlog_exit_void();
+}
+
+int ocfs2_truncate_log_init(ocfs_super *osb)
+{
+ int status;
+ struct inode *tl_inode = NULL;
+ struct buffer_head *tl_bh = NULL;
+
+ mlog_entry();
+
+ status = ocfs2_get_truncate_log_info(osb,
+ osb->slot_num,
+ &tl_inode,
+ &tl_bh);
+ if (status < 0)
+ mlog_errno(status);
+
+ /* ocfs2_truncate_log_shutdown keys on the existence of
+ * osb->osb_tl_inode so we don't set any of the osb variables
+ * until we're sure all is well. */
+ osb->osb_tl_inode = tl_inode;
+ osb->osb_tl_bh = tl_bh;
+
+ mlog_exit(status);
+ return status;
+}
+
/* This function will figure out whether the currently last extent
* block will be deleted, and if it will, what the new last extent
* block will be so we can update his h_next_leaf_blk field, as well
@@ -946,10 +1382,10 @@
struct inode *inode,
struct buffer_head *fe_bh,
struct buffer_head *old_last_eb_bh,
+ ocfs_journal_handle *handle,
ocfs2_truncate_context *tc)
{
int status, i, depth;
- ocfs_journal_handle *handle;
ocfs2_dinode *fe;
ocfs2_extent_block *eb;
ocfs2_extent_block *last_eb = NULL;
@@ -959,7 +1395,6 @@
u64 next_eb = 0;
u64 delete_blk = 0;
- handle = tc->tc_handle;
fe = (ocfs2_dinode *) fe_bh->b_data;
status = ocfs_find_new_last_ext_blk(osb,
@@ -1125,10 +1560,9 @@
depth--;
}
- OCFS_ASSERT(delete_blk);
- status = ocfs_free_clusters(handle, tc->tc_bitmap_inode,
- tc->tc_bitmap_bh, delete_blk,
- clusters_to_del);
+ BUG_ON(!delete_blk);
+ status = ocfs2_truncate_log_append(osb, handle, delete_blk,
+ clusters_to_del);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1149,22 +1583,22 @@
*
* This will start, restart and commit your handle for you.
*
- * WARNING: This will gobble the contexts reference to last_eb_bh
- * *and* the journal handle.
+ * WARNING: This will kfree the truncate context
*/
int ocfs_commit_truncate(ocfs_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
ocfs2_truncate_context *tc)
{
- int status, i, credits;
+ int status, i, credits, tl_sem = 0;
unsigned int clusters_to_del, target_i_clusters;
u64 last_eb = 0;
ocfs2_dinode *fe;
ocfs2_extent_block *eb;
ocfs2_extent_list *el;
struct buffer_head *last_eb_bh;
- ocfs_journal_handle *handle;
+ ocfs_journal_handle *handle = NULL;
+ struct inode *tl_inode = osb->osb_tl_inode;
mlog_entry_void();
@@ -1178,7 +1612,6 @@
last_eb_bh = tc->tc_last_eb_bh;
tc->tc_last_eb_bh = NULL;
- handle = tc->tc_handle;
fe = (ocfs2_dinode *) fe_bh->b_data;
@@ -1231,49 +1664,73 @@
+ el->l_recs[i].e_cpos) - target_i_clusters;
mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
- credits = ocfs_calc_tree_trunc_credits(osb->sb, clusters_to_del,
- fe, el);
- if (!ocfs_handle_started(handle)) {
- handle = ocfs_start_trans(osb, handle, credits);
- if (!handle) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
- /* Since we got our cluster lock from caller and we
- * don't add it to the handle: */
- ocfs_set_inode_lock_trans(osb->journal, inode);
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
- status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
- if (status < 0)
- mlog_errno(status);
- } else {
- status = ocfs_extend_trans(handle, credits);
+ down(&tl_inode->i_sem);
+ tl_sem = 1;
+ /* ocfs2_truncate_log_needs_flush guarantees us at least one
+ * record is free for use. If there isn't any, we flush to get
+ * an empty truncate log. */
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ status = ocfs2_flush_truncate_log(osb);
if (status < 0) {
mlog_errno(status);
goto bail;
}
}
+ credits = ocfs_calc_tree_trunc_credits(osb->sb, clusters_to_del,
+ fe, el);
+ handle = ocfs_start_trans(osb, NULL, credits);
+ if (!handle) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ /* None of the cluster locks for a truncate are added
+ * to the handle, so we update the last transaction
+ * info manually. */
+ if (tc->tc_ext_alloc_inode)
+ ocfs_set_inode_lock_trans(osb->journal,
+ tc->tc_ext_alloc_inode);
+ ocfs_set_inode_lock_trans(osb->journal, inode);
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
+ if (status < 0)
+ mlog_errno(status);
+
status = ocfs_do_truncate(osb, clusters_to_del, inode, fe_bh,
- last_eb_bh, tc);
+ last_eb_bh, handle, tc);
if (status < 0) {
mlog_errno(status);
goto bail;
}
+ up(&tl_inode->i_sem);
+ tl_sem = 0;
+
+ ocfs_commit_trans(handle);
+ handle = NULL;
+
OCFS_ASSERT(fe->i_clusters >= target_i_clusters);
if (fe->i_clusters > target_i_clusters)
goto start;
bail:
up_write(&OCFS_I(inode)->ip_alloc_sem);
- ocfs_commit_trans(handle);
- tc->tc_handle = NULL;
+ if (tl_sem)
+ up(&tl_inode->i_sem);
+
+ if (handle)
+ ocfs_commit_trans(handle);
+
if (last_eb_bh)
brelse(last_eb_bh);
+ /* This will drop the ext_alloc cluster lock for us */
+ ocfs_free_truncate_context(tc);
+
mlog_exit(status);
return status;
}
@@ -1291,15 +1748,12 @@
{
int status, metadata_delete;
unsigned int new_i_clusters;
- ocfs_journal_handle *handle = NULL;
ocfs2_dinode *fe;
ocfs2_extent_block *eb;
ocfs2_extent_list *el;
struct buffer_head *last_eb_bh = NULL;
struct inode *ext_alloc_inode = NULL;
struct buffer_head *ext_alloc_bh = NULL;
- struct inode *data_alloc_inode = NULL;
- struct buffer_head *data_alloc_bh = NULL;
mlog_entry_void();
@@ -1322,13 +1776,6 @@
}
memset(*tc, 0, sizeof(ocfs2_truncate_context));
- handle = ocfs_alloc_handle(osb);
- if (handle == NULL) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
-
metadata_delete = 0;
if (fe->id2.i_list.l_tree_depth) {
/* If we have a tree, then the truncate may result in
@@ -1347,6 +1794,8 @@
metadata_delete = 1;
}
+ (*tc)->tc_last_eb_bh = last_eb_bh;
+
if (metadata_delete) {
mlog(0, "Will have to delete metadata for this trunc. "
"locking allocator.\n");
@@ -1357,51 +1806,24 @@
goto bail;
}
- ocfs_handle_add_inode(handle, ext_alloc_inode);
+ down(&ext_alloc_inode->i_sem);
+ (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
+
status = ocfs2_meta_lock(ext_alloc_inode,
- handle,
+ NULL,
&ext_alloc_bh,
1);
if (status < 0) {
mlog_errno (status);
goto bail;
}
+ (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
+ (*tc)->tc_ext_alloc_locked = 1;
}
- data_alloc_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
- if (!data_alloc_inode) {
- status = -EINVAL;
- mlog(ML_ERROR, "Could not get bitmap inode!\n");
- goto bail;
- }
-
- ocfs_handle_add_inode(handle, data_alloc_inode);
- status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
- if (status < 0) {
- mlog_errno (status);
- goto bail;
- }
-
- (*tc)->tc_bitmap_inode = data_alloc_inode;
- (*tc)->tc_bitmap_bh = data_alloc_bh;
- (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
- (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
- (*tc)->tc_last_eb_bh = last_eb_bh;
- (*tc)->tc_handle = handle;
+ status = 0;
bail:
if (status < 0) {
- if (handle)
- ocfs_commit_trans(handle);
- if (last_eb_bh)
- brelse(last_eb_bh);
- if (ext_alloc_inode)
- iput(ext_alloc_inode);
- if (data_alloc_inode)
- iput(data_alloc_inode);
- if (ext_alloc_bh)
- brelse(ext_alloc_bh);
- if (data_alloc_bh)
- brelse(data_alloc_bh);
if (*tc)
ocfs_free_truncate_context(*tc);
*tc = NULL;
@@ -1412,19 +1834,19 @@
void ocfs_free_truncate_context(ocfs2_truncate_context *tc)
{
- if (tc->tc_bitmap_inode)
- iput(tc->tc_bitmap_inode);
- if (tc->tc_bitmap_bh)
- brelse(tc->tc_bitmap_bh);
- if (tc->tc_ext_alloc_inode)
+ if (tc->tc_ext_alloc_inode) {
+ if (tc->tc_ext_alloc_locked)
+ ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
+
+ up(&tc->tc_ext_alloc_inode->i_sem);
iput(tc->tc_ext_alloc_inode);
+ }
+
if (tc->tc_ext_alloc_bh)
brelse(tc->tc_ext_alloc_bh);
+
if (tc->tc_last_eb_bh)
brelse(tc->tc_last_eb_bh);
- if (tc->tc_handle) {
- OCFS_ASSERT(!ocfs_handle_started(tc->tc_handle));
- ocfs_commit_trans(tc->tc_handle);
- }
+
kfree(tc);
}
Modified: trunk/fs/ocfs2/alloc.h
===================================================================
--- trunk/fs/ocfs2/alloc.h 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/alloc.h 2005-05-17 22:00:36 UTC (rev 2265)
@@ -51,14 +51,20 @@
return fe->id2.i_list.l_tree_depth + 2;
}
+int ocfs2_truncate_log_init(ocfs_super *osb);
+void ocfs2_truncate_log_shutdown(ocfs_super *osb);
+int ocfs2_begin_truncate_log_recovery(ocfs_super *osb,
+ int slot_num,
+ ocfs2_dinode **tl_copy);
+int ocfs2_complete_truncate_log_recovery(ocfs_super *osb,
+ ocfs2_dinode *tl_copy);
+
typedef struct _ocfs2_truncate_context {
- struct inode *tc_bitmap_inode;
- struct buffer_head *tc_bitmap_bh;
struct inode *tc_ext_alloc_inode;
struct buffer_head *tc_ext_alloc_bh;
+ int tc_ext_alloc_locked; /* is it cluster locked? */
/* these get destroyed once it's passed to ocfs_commit_truncate. */
struct buffer_head *tc_last_eb_bh;
- ocfs_journal_handle *tc_handle;
} ocfs2_truncate_context;
void ocfs_free_truncate_context(ocfs2_truncate_context *tc);
Modified: trunk/fs/ocfs2/file.c
===================================================================
--- trunk/fs/ocfs2/file.c 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/file.c 2005-05-17 22:00:36 UTC (rev 2265)
@@ -538,9 +538,6 @@
if (fe_bh)
brelse(fe_bh);
- if (tc)
- ocfs_free_truncate_context(tc);
-
mlog_exit (status);
return status;
} /* ocfs_truncate_file */
Modified: trunk/fs/ocfs2/inode.c
===================================================================
--- trunk/fs/ocfs2/inode.c 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/inode.c 2005-05-17 22:00:36 UTC (rev 2265)
@@ -491,9 +491,6 @@
if (handle)
ocfs_commit_trans(handle);
- if (tc)
- ocfs_free_truncate_context(tc);
-
mlog_exit (status);
return status;
}
Modified: trunk/fs/ocfs2/journal.c
===================================================================
--- trunk/fs/ocfs2/journal.c 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/journal.c 2005-05-17 22:00:36 UTC (rev 2265)
@@ -845,7 +845,8 @@
struct ocfs2_la_recovery_item {
struct list_head lri_list;
int lri_slot;
- ocfs2_dinode *lri_dinode;
+ ocfs2_dinode *lri_la_dinode;
+ ocfs2_dinode *lri_tl_dinode;
};
/* Does the second half of the recovery process. By this point, the
@@ -863,7 +864,7 @@
int ret;
ocfs_super *osb = data;
ocfs_journal *journal = osb->journal;
- ocfs2_dinode *la_dinode;
+ ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item;
struct list_head *p, *n;
LIST_HEAD(tmp_la_list);
@@ -882,7 +883,7 @@
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
- la_dinode = item->lri_dinode;
+ la_dinode = item->lri_la_dinode;
if (la_dinode) {
mlog(0, "Clean up local alloc %"MLFu64"\n",
la_dinode->i_blkno);
@@ -895,6 +896,19 @@
kfree(la_dinode);
}
+ tl_dinode = item->lri_tl_dinode;
+ if (tl_dinode) {
+ mlog(0, "Clean up truncate log %"MLFu64"\n",
+ tl_dinode->i_blkno);
+
+ ret = ocfs2_complete_truncate_log_recovery(osb,
+ tl_dinode);
+ if (ret < 0)
+ mlog_errno(ret);
+
+ kfree(tl_dinode);
+ }
+
ret = ocfs_recover_orphans(osb, item->lri_slot);
if (ret < 0)
mlog_errno(ret);
@@ -907,11 +921,13 @@
mlog_exit_void();
}
-/* NOTE: This function always eats the reference to la_dinode, either
- * manually on error, or by passing it to ocfs2_complete_recovery */
+/* NOTE: This function always eats your references to la_dinode and
+ * tl_dinode, either manually on error, or by passing them to
+ * ocfs2_complete_recovery */
static void ocfs2_queue_recovery_completion(ocfs_journal *journal,
int slot_num,
- ocfs2_dinode *la_dinode)
+ ocfs2_dinode *la_dinode,
+ ocfs2_dinode *tl_dinode)
{
struct ocfs2_la_recovery_item *item;
@@ -923,13 +939,17 @@
if (la_dinode)
kfree(la_dinode);
+ if (tl_dinode)
+ kfree(tl_dinode);
+
mlog_errno(-ENOMEM);
return;
}
INIT_LIST_HEAD(&item->lri_list);
- item->lri_dinode = la_dinode;
+ item->lri_la_dinode = la_dinode;
item->lri_slot = slot_num;
+ item->lri_tl_dinode = tl_dinode;
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -944,9 +964,12 @@
ocfs_journal *journal = osb->journal;
if (osb->dirty) {
+ /* No need to queue up our truncate_log as regular
+ * cleanup will catch that. */
ocfs2_queue_recovery_completion(journal,
osb->slot_num,
- osb->local_alloc_copy);
+ osb->local_alloc_copy,
+ NULL);
osb->local_alloc_copy = NULL;
osb->dirty = 0;
}
@@ -997,7 +1020,8 @@
/* We always run recovery on our own orphan dir - the dead
* node(s) may have voted "no" on an inode delete earlier. A
* revote is therefore required. */
- ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL);
+ ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
+ NULL);
bail:
down(&osb->recovery_lock);
@@ -1171,16 +1195,13 @@
* Do the most important parts of node recovery:
* - Replay it's journal
* - Stamp a clean local allocator file
+ * - Stamp a clean truncate log
* - Mark the node clean
*
* If this function completes without error, a node in OCFS2 can be
* said to have been safely recovered. As a result, failure during the
* second part of a nodes recovery process (local alloc recovery) is
* far less concerning.
- *
- * A copy of the nodes local alloc file is passed back so unused space
- * can be reclaimed once all nodes are recovered. This must be kfree'd
- * by the caller.
*/
static int ocfs_recover_node(ocfs_super *osb,
int node_num)
@@ -1189,11 +1210,12 @@
int slot_num;
ocfs2_slot_info *si = osb->slot_info;
ocfs2_dinode *la_copy = NULL;
+ ocfs2_dinode *tl_copy = NULL;
mlog_entry("(node_num=%d, osb->node_num = %d)\n",
node_num, osb->node_num);
- mlog(0, "ocfs2_recover_node: checking node %d\n", node_num);
+ mlog(0, "checking node %d\n", node_num);
/* Should not ever be called to recover ourselves -- in that
* case we should've called ocfs_journal_load instead. */
@@ -1203,13 +1225,11 @@
slot_num = ocfs2_node_num_to_slot(si, node_num);
if (slot_num == OCFS_INVALID_NODE_NUM) {
status = 0;
- mlog(0, "ocfs2_recover_node: no slot for this node, so "
- "no recovery required.\n");
+ mlog(0, "no slot for this node, so no recovery required.\n");
goto done;
}
- mlog(0, "ocfs2_recover_node: node %d was using slot %d\n",
- node_num, slot_num);
+ mlog(0, "node %d was using slot %d\n", node_num, slot_num);
status = ocfs2_replay_journal(osb, node_num, slot_num);
if (status < 0) {
@@ -1224,15 +1244,23 @@
goto done;
}
- /* This would be a strange but ultimately not so harmful place
- * to get an error... */
+ /* An error from begin_truncate_log_recovery is not
+ * serious enough to warrant halting the rest of
+ * recovery. */
+ status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
+ if (status < 0)
+ mlog_errno(status);
+
+ /* Likewise, this would be a strange but ultimately not so
+ * harmful place to get an error... */
ocfs2_clear_slot(si, slot_num);
status = ocfs2_update_disk_slots(osb, si);
if (status < 0)
mlog_errno(status);
- /* This will gobble the memory pointed to by la_copy */
- ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy);
+ /* This will kfree the memory pointed to by la_copy and tl_copy */
+ ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
+ tl_copy);
status = 0;
done:
Modified: trunk/fs/ocfs2/journal.h
===================================================================
--- trunk/fs/ocfs2/journal.h 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/journal.h 2005-05-17 22:00:36 UTC (rev 2265)
@@ -333,6 +333,10 @@
/* dinode + group descriptor update. We don't relink on free yet. */
#define OCFS_SUBALLOC_FREE (2)
+#define OCFS_TRUNCATE_LOG_UPDATE OCFS_INODE_UPDATE_CREDITS
+#define OCFS_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS_SUBALLOC_FREE \
+ + OCFS_TRUNCATE_LOG_UPDATE)
+
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
@@ -422,9 +426,9 @@
ocfs2_dinode *fe,
ocfs2_extent_list *last_el)
{
- /* for file entry + all headers in this pass + update to next leaf */
+ /* for dinode + all headers in this pass + update to next leaf */
int credits = 1 + fe->id2.i_list.l_tree_depth + 1;
- int bitmap_blocks, i;
+ int i;
i = last_el->l_next_free_rec - 1;
OCFS_ASSERT(i >= 0);
@@ -436,11 +440,9 @@
&& ((last_el->l_recs[i].e_clusters - clusters_to_del) == 0))
credits += 1 + fe->id2.i_list.l_tree_depth;
- /* bitmap fe + group descriptor */
- bitmap_blocks = OCFS_SUBALLOC_FREE;
+ /* update to the truncate log. */
+ credits += OCFS_TRUNCATE_LOG_UPDATE;
- credits += bitmap_blocks;
-
return credits;
}
Modified: trunk/fs/ocfs2/ocfs.h
===================================================================
--- trunk/fs/ocfs2/ocfs.h 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/ocfs.h 2005-05-17 22:00:36 UTC (rev 2265)
@@ -400,6 +400,10 @@
wait_queue_head_t osb_okp_pending_wq;
wait_queue_head_t osb_mount_event;
+
+ /* Truncate log info */
+ struct inode *osb_tl_inode;
+ struct buffer_head *osb_tl_bh;
};
#define NAMEI_RA_CHUNKS 2
Modified: trunk/fs/ocfs2/ocfs2_fs.h
===================================================================
--- trunk/fs/ocfs2/ocfs2_fs.h 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/ocfs2_fs.h 2005-05-17 22:00:36 UTC (rev 2265)
@@ -116,6 +116,7 @@
#define OCFS2_JOURNAL_FL (0x00000100) /* Node journal */
#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
+#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
/*
* Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
@@ -163,6 +164,7 @@
INODE_ALLOC_SYSTEM_INODE,
JOURNAL_SYSTEM_INODE,
LOCAL_ALLOC_SYSTEM_INODE,
+ TRUNCATE_LOG_SYSTEM_INODE,
NUM_SYSTEM_INODES
};
@@ -182,7 +184,8 @@
[EXTENT_ALLOC_SYSTEM_INODE] = { "extent_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
[INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
[JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
- [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }
+ [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
+ [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
};
/* Parameter passed from mount.ocfs2 to module */
@@ -253,6 +256,11 @@
__u64 c_blkno; /* Physical disk offset (blocks) of 1st group */
} ocfs2_chain_rec;
+typedef struct _ocfs2_truncate_rec {
+ __u32 t_start; /* 1st cluster in this log */
+ __u32 t_clusters; /* Number of total clusters covered */
+} ocfs2_truncate_rec;
+
/*
* On disk extent list for OCFS2 (node in the tree). Note that this
* is contained inside ocfs2_dinode or ocfs2_extent_block, so the
@@ -287,6 +295,18 @@
} ocfs2_chain_list;
/*
+ * On disk deallocation log for OCFS2. Note that this is
+ * contained inside ocfs2_dinode, so the offsets are relative to
+ * ocfs2_dinode.id2.i_dealloc.
+ */
+typedef struct _ocfs2_truncate_log {
+/*00*/ __u16 tl_count; /* Total records in this log */
+ __u16 tl_used; /* Number of records in use */
+ __u32 tl_reserved1;
+/*08*/ ocfs2_truncate_rec tl_recs[0]; /* Truncate records */
+} ocfs2_truncate_log;
+
+/*
* On disk extent block (indirect block) for OCFS2
*/
typedef struct _ocfs2_extent_block
@@ -403,11 +423,12 @@
} journal1;
} id1; /* Inode type dependant 1 */
/*C0*/ union {
- ocfs2_super_block i_super;
- ocfs2_local_alloc i_lab;
- ocfs2_chain_list i_chain;
- ocfs2_extent_list i_list;
- __u8 i_symlink[0];
+ ocfs2_super_block i_super;
+ ocfs2_local_alloc i_lab;
+ ocfs2_chain_list i_chain;
+ ocfs2_extent_list i_list;
+ ocfs2_truncate_log i_dealloc;
+ __u8 i_symlink[0];
} id2;
/* Actual on-disk size is one block */
} ocfs2_dinode;
@@ -503,6 +524,16 @@
return size;
}
+
+static inline int ocfs2_truncate_recs_per_inode(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_dealloc.tl_recs);
+
+ return size / sizeof(struct _ocfs2_truncate_rec);
+}
#else
static inline int ocfs2_fast_symlink_chars(int blocksize)
{
@@ -558,6 +589,16 @@
return size;
}
+
+static inline int ocfs2_truncate_recs_per_inode(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_dealloc.tl_recs);
+
+ return size / sizeof(struct _ocfs2_truncate_rec);
+}
#endif /* __KERNEL__ */
Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c 2005-05-17 21:57:00 UTC (rev 2264)
+++ trunk/fs/ocfs2/super.c 2005-05-17 22:00:36 UTC (rev 2265)
@@ -780,6 +780,12 @@
goto leave;
}
+ status = ocfs2_truncate_log_init(osb);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
/* This should be sent *after* we recovered our journal as it
* will cause other nodes to unmark us as needing
* recovery. However, we need to send it *before* dropping the
@@ -828,6 +834,8 @@
ocfs_shutdown_local_alloc(osb);
+ ocfs2_truncate_log_shutdown(osb);
+
/* disable any new recovery threads and wait for any currently
* running ones to exit. Do this before setting the vol_state. */
down(&osb->recovery_lock);
More information about the Ocfs2-commits
mailing list