[Ocfs2-commits] mfasheh commits r1549 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Wed Oct 6 20:38:02 CDT 2004
Author: mfasheh
Date: 2004-10-06 20:38:01 -0500 (Wed, 06 Oct 2004)
New Revision: 1549
Modified:
trunk/src/alloc.c
trunk/src/alloc.h
trunk/src/file.c
trunk/src/file.h
trunk/src/inode.c
trunk/src/ocfs_journal.h
Log:
* redo our truncate code. It's about a million times less embarrassing now,
though ocfs_do_truncate could probably be cleaned up a bit.
- we don't handle putting / removing truncating files in the orphan dir
yet, but the placeholders are all left there.
- truncate is now restartable, roll forward and can deal with any
size tree depth :)
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-10-05 22:33:26 UTC (rev 1548)
+++ trunk/src/alloc.c 2004-10-07 01:38:01 UTC (rev 1549)
@@ -53,11 +53,6 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_ALLOC
-static int ocfs_kill_this_tree(ocfs_super *osb,
- struct buffer_head *extent_grp_bh,
- ocfs_journal_handle *handle,
- struct inode *inode);
-
static int ocfs_create_new_meta_bhs(ocfs_super *osb,
ocfs_journal_handle *handle,
struct inode *inode,
@@ -83,26 +78,6 @@
struct inode *inode,
ocfs2_alloc_context *meta_ac);
-static int _squish_extent_entries(ocfs_super *osb,
- ocfs2_extent_rec *extarr,
- __u16 *freeExtent,
- ocfs_journal_handle *handle,
- u32 num_clusters, int flag,
- struct inode *inode);
-
-static int ocfs_fix_extent_block(ocfs_super *osb,
- struct buffer_head *eb_bh,
- struct inode *inode);
-
-static int ocfs_split_this_tree(ocfs_super * osb,
- struct buffer_head *eb_bh,
- ocfs_journal_handle *handle,
- ocfs2_dinode *fe,
- struct inode *inode);
-
-static int ocfs_update_last_eb_blk(ocfs_super *osb, ocfs2_dinode *fe,
- struct inode *inode);
-
static int ocfs_free_disk_bitmap (ocfs_super * osb, ocfs_free_rec *free_log);
static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
@@ -114,7 +89,27 @@
u32 min_bits,
u32 *bit_off,
u32 *num_bits);
+static int ocfs_free_clusters(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters);
+static int ocfs_find_new_last_ext_blk(ocfs_super *osb,
+ struct inode *inode,
+ ocfs2_dinode *fe,
+ unsigned int new_i_clusters,
+ struct buffer_head *old_last_eb,
+ struct buffer_head **new_last_eb);
+
+static int ocfs_do_truncate(ocfs_super *osb,
+ unsigned int clusters_to_del,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ struct buffer_head *old_last_eb_bh,
+ ocfs2_truncate_context *tc);
+
static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
u64 blkno)
{
@@ -455,6 +450,65 @@
return status;
} /* ocfs_free_disk_bitmap */
+static int ocfs_free_clusters(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters)
+{
+ ocfs_alloc_bm *bitmap;
+ int status;
+ unsigned int start_cluster, bitmap_blocks, bitmap_start;
+ ocfs2_dinode *fe;
+
+ LOG_ENTRY();
+
+ start_cluster = ocfs_blocks_to_clusters(osb->sb, start_blk);
+
+ bitmap = &osb->cluster_bitmap;
+
+ bitmap_blocks = ocfs_bitmap_blocks_affected(osb->sb,
+ start_cluster,
+ num_clusters,
+ &bitmap_start);
+
+ LOG_TRACE_ARGS("want to free %u clusters starting at block %llu\n",
+ num_clusters, start_blk);
+ LOG_TRACE_ARGS("bitmap_start = %u, bitmap_blkno = %llu, bitmap_blocks "
+ "= %u\n", bitmap_start, osb->bitmap_blkno,
+ bitmap_blocks);
+ status = ocfs_read_blocks(osb, (osb->bitmap_blkno + bitmap_start),
+ bitmap_blocks, &bitmap->chunk[bitmap_start],
+ OCFS_BH_CACHED, bitmap_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_journal_access(handle, bitmap_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ fe = (ocfs2_dinode *) bitmap_bh->b_data;
+
+ ocfs_clear_bits(osb->sb, handle, bitmap, start_cluster, num_clusters);
+ fe->id1.bitmap1.i_used -= num_clusters;
+
+ status = ocfs_journal_dirty(handle, bitmap_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return(0);
+}
+
/*
* How many free extents have we got before we need more meta data?
*/
@@ -486,7 +540,6 @@
OCFS_ASSERT(el->l_tree_depth == 0);
retval = el->l_count - el->l_next_free_rec;
-
bail:
if (eb_bh)
brelse(eb_bh);
@@ -1370,1014 +1423,6 @@
} /* ocfs_allocate_extent */
/*
- * _squish_extent_entries()
- * FileSize is the allocated size of the file after the truncate.
- * 'flag' seems to be an indicator that (if true) tells us that we already know
- * we're gonna have to clear out all of extarr.
- */
-static int _squish_extent_entries(ocfs_super *osb,
- ocfs2_extent_rec *extarr,
- __u16 *freeExtent,
- ocfs_journal_handle *handle,
- u32 num_clusters, int flag,
- struct inode *inode)
-{
- int status = 0;
- int FirstTime = 1;
- __u64 FileSize = (u64)num_clusters << osb->s_clustersize_bits; /* FIXME get rid of this and use num_clusters!! */
- ocfs2_extent_rec *ext;
- __u32 i, csize = osb->s_clustersize_bits,
- numBitsAllocated = 0, bitmapOffset = 0,
- firstfree = *freeExtent;
- __u64 bytes, foff, doff,
- diskOffsetTobeFreed, lengthTobeFreed = 0,
- actualSize = 0, origLength = 0;
-
- LOG_ENTRY_ARGS("(*freeExtent = %u, FileSize = %llu, flag = %d)\n",
- *freeExtent, FileSize, flag);
-
- firstfree = *freeExtent;
-
- /* loop through the used ocfs2_extent_recs */
- for (i = 0; i < firstfree; i++) {
- ext = &(extarr[i]);
- bytes = (u64)ext->e_clusters << osb->s_clustersize_bits;
- foff = (u64)ext->e_cpos << osb->s_clustersize_bits;
- doff = ext->e_blkno << osb->sb->s_blocksize_bits;
- actualSize = (bytes + foff);
- if (flag || actualSize > FileSize) {
- if (flag || foff >= FileSize) {
- if (!flag && FirstTime) {
- *freeExtent = i;
- FirstTime = 0;
- }
- numBitsAllocated = ext->e_clusters;
- bitmapOffset = (__u32) (doff >> csize);
- ext->e_clusters = ext->e_blkno =
- ext->e_cpos = 0;
- } else {
- if (FirstTime) {
- *freeExtent = i + 1;
- FirstTime = 0;
- }
- origLength = bytes;
- bytes = FileSize - foff;
- ext->e_clusters =
- (u32)(bytes >> osb->s_clustersize_bits);
- lengthTobeFreed = origLength - bytes;
- if (lengthTobeFreed == 0) {
- continue;
- }
- numBitsAllocated = (__u32) (lengthTobeFreed >> csize);
- diskOffsetTobeFreed = doff + bytes;
- bitmapOffset = (__u32) (diskOffsetTobeFreed >> csize);
- }
- status = ocfs_handle_add_commit_bits(handle,
- numBitsAllocated,
- bitmapOffset, -1, 0,
- DISK_ALLOC_VOLUME);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- break;
- }
- }
- }
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* _squish_extent_entries */
-
-/* used by ocfs_kill_this_tree and ocfs_split_this_tree */
-/* This value needs to be removed in a future version and set to
- * tree_depth + 1, dynamically */
-#define OCFS_TREE_STACK_SIZE 8
-
-/*
- * ocfs_kill_this_tree
- *
- * Given an extent_group (can be a DAT or header), delete everything,
- * including itself, it's children, and any data blocks they point to.
- * Works fine with any tree_depth (up to 4, in which case we'd need
- * more stack space)
- *
- * extent_grp_bh will be unchanged, though it will be marked for
- * deletion in free_head.
- */
-
-/*
- * We can't recurse, so we keep a simple stack of ocfs2_extent_blocks.
- */
-static int ocfs_kill_this_tree(ocfs_super *osb,
- struct buffer_head *extent_grp_bh,
- ocfs_journal_handle *handle,
- struct inode *inode)
-{
- int status = -EFAIL;
- int i;
- __u32 victim;
- u32 num_clusters = 0;
- __u32 bitmap_offset = 0;
- ocfs2_extent_rec *ext;
- struct buffer_head *tmp_bh = NULL;
- char * stack[OCFS_TREE_STACK_SIZE];
- ocfs2_extent_block *cur_eb; /* convenience, points to TOS */
- ocfs2_extent_list *cur_el;
- int tos = 0;
-
- LOG_ENTRY();
-
- for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
- stack[i] = NULL;
-
- stack[tos] = kmalloc(osb->sb->s_blocksize, GFP_KERNEL);
- memcpy(stack[tos], extent_grp_bh->b_data, osb->sb->s_blocksize);
-
- do {
- cur_eb = (ocfs2_extent_block *) stack[tos];
- if (!IS_VALID_EXTENT_BLOCK(cur_eb)) {
- LOG_ERROR_STR("Invalid extent block!");
- goto bail;
- }
-
- cur_el = &cur_eb->h_list;
-
- if (!cur_el->l_tree_depth) {
- LOG_TRACE_ARGS("found some data to free (%llu)\n", cur_eb->h_blkno);
- for(i = 0; i < cur_el->l_next_free_rec; i++) {
- /* Free the data associated with each header */
- ext = &cur_el->l_recs[i];
- num_clusters = ext->e_clusters;
- bitmap_offset =
- (u32)((ext->e_blkno << osb->sb->s_blocksize_bits) >> osb->s_clustersize_bits);
- status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, 0, DISK_ALLOC_VOLUME);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- }
- } else {
- /* Ok, we're a header. */
-
- /* Did we already kill all his children, or
- * are they already dead? */
- if (cur_el->l_next_free_rec == 0) {
- LOG_TRACE_ARGS("Popping this header (%llu)\n", cur_eb->h_blkno);
- goto free_meta;
- }
-
- /* We're gonna read in our last used extent
- * and put him at the top of the stack. We
- * also update our l_next_free_rec so that next
- * time we read in the next to last one and so
- * on until we've finished all of them
- */
-
- victim = cur_el->l_next_free_rec - 1;
-
- /* should already be null, but we can do this
- * just in case. */
- stack[tos] = kmalloc(osb->sb->s_blocksize,
- GFP_KERNEL);
-
- status = ocfs_read_block(osb,
- cur_el->l_recs[victim].e_blkno,
- &tmp_bh,
- OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- cur_el->l_next_free_rec--;
- cur_eb = NULL;
- cur_el = NULL;
- tos++;
-
- memcpy(stack[tos], tmp_bh->b_data,
- osb->sb->s_blocksize);
- brelse(tmp_bh);
- tmp_bh = NULL;
- /* We only want to free on our way back up the tree */
- continue;
- }
-
-free_meta:
- /* Free the metadata associated with this extent group */
- status = ocfs_handle_add_commit_bits(handle, 1,
- cur_eb->h_suballoc_bit,
- cur_eb->h_suballoc_node,
- cur_eb->h_blkno,
- DISK_ALLOC_EXTENT_NODE);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- /* Pop one off the stack */
- kfree(stack[tos]);
- stack[tos] = NULL;
- cur_eb = NULL;
- cur_el = NULL;
- tos--;
- } while (tos >= 0);
-
- status = 0;
-bail:
- for(i = 0; i < OCFS_TREE_STACK_SIZE; i++)
- if (stack[i])
- kfree(stack[i]);
-
- LOG_EXIT_STATUS (status);
- return(status);
-} /* ocfs_kill_this_tree */
-
-
-static int ocfs_fix_extent_block(ocfs_super *osb,
- struct buffer_head *eb_bh,
- struct inode *inode)
-{
- ocfs2_extent_block *eb;
- ocfs2_extent_list *el;
- int status = -EFAIL;
- int i;
-
- LOG_ENTRY();
-
- if (!eb_bh) {
- LOG_ERROR_STR("Invalid extent block bh (NULL)!");
- goto bail;
- }
-
- eb = (ocfs2_extent_block *) eb_bh->b_data;
-
- if (!IS_VALID_EXTENT_BLOCK(eb)) {
- LOG_ERROR_STR("Invalid extent block!");
- goto bail;
- }
-
- el = &eb->h_list;
-
- for(i = el->l_next_free_rec; i < el->l_count; i++) {
- el->l_recs[i].e_clusters = 0;
- el->l_recs[i].e_blkno = 0;
- el->l_recs[i].e_cpos = 0;
- }
-
- status = 0;
-bail:
- LOG_EXIT_STATUS (status);
- return(status);
-}
-
-
-/*
- * ocfs_split_this_tree
- *
- * Given an extent_group (DAT or HDR) takes the new alloc_size from fe
- * and splits this tree into two parts, one of which is deleted.
- *
- * TODO: This function can likely be combined with the above, we will try to
- * write it so that it can.
- * TODO: This function should be split up into a couple smaller ones.
- */
-static int ocfs_split_this_tree(ocfs_super *osb,
- struct buffer_head *eb_bh,
- ocfs_journal_handle *handle,
- ocfs2_dinode *fe,
- struct inode *inode)
-{
- int status = -EFAIL;
- ocfs2_extent_rec *rec;
- struct buffer_head * bh_stack[OCFS_TREE_STACK_SIZE];
- ocfs2_extent_block *alloc_eb = NULL; /* convenience, points to TOS */
- ocfs2_extent_block *eb;
- ocfs2_extent_list *el;
- struct buffer_head *tmp_bh = NULL, *tmp_bh2 = NULL;
- int tos = 0;
- int i, victim;
- __u64 bytes, doff, orig_bytes;
- __u64 total_bytes; /* FIXME needs to be clusters!!! */
- __u32 num_clusters, bitmap_offset;
- int done = 0;
- int depth = fe->id2.i_list.l_tree_depth;
- int needs_brelse = 0;
-
- LOG_ENTRY();
-
- /* This is a similar hack to the one below, untested for depth
- = 4 files because I can't recreate one. */
- if (depth == 4) {
- LOG_ERROR_STR("Truncating file with tree_depth 4, this is not tested and may be unsafe!");
- LOG_TRACE_STR("Found a tree_depth 4 tree, trimming it.\n");
-
- status = ocfs_journal_access(handle, eb_bh,
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- eb = (ocfs2_extent_block *) eb_bh->b_data;
- el = &eb->h_list;
- for (i = (el->l_next_free_rec - 1); i >= 0; i--) {
- rec = &el->l_recs[i];
-
- if (tmp_bh2)
- brelse(tmp_bh2);
- tmp_bh2 = NULL;
- status = ocfs_read_block(osb,
- rec->e_blkno,
- &tmp_bh2,
- OCFS_BH_CACHED, inode);
- if (status < 0) {
- eb = NULL;
- brelse(tmp_bh2);
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- if (rec->e_cpos >= fe->i_clusters) {
- /* Trim this whole subtree */
- status = ocfs_kill_this_tree(osb,
- tmp_bh2,
- handle,
- inode);
- if (status < 0) {
- eb = NULL;
- el = NULL;
- brelse(tmp_bh2);
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- rec->e_cpos = 0;
- rec->e_blkno = 0;
- rec->e_clusters = 0;
- el->l_next_free_rec = i;
- } else { /* This is the one we want to split. */
- rec->e_clusters =
- fe->i_clusters - rec->e_cpos;
- break;
- }
- }
-
- /* Write out our new top of the tree duder */
- eb = NULL;
- el = NULL;
-
- status = ocfs_journal_dirty(handle, eb_bh);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- /* Make our new TOS the header we want to split. */
- if (tmp_bh2 == NULL) {
- LOG_ERROR_STATUS(-EFAIL);
- goto bail;
- }
- eb_bh = tmp_bh2;
-
- /* We want to do the next bit of stuff too */
- depth = 3;
- needs_brelse = 1;
- }
-
- /* This is a hack, but i have little time to make this function right*/
- /* get rid of everything from the top level HDR that we can, then
- proceeed as if we're tree_depth 2 (which we know works) */
- if (depth == 3) {
- LOG_TRACE_STR("Found a tree_depth 3 tree, trimming it.\n");
-
- status = ocfs_journal_access(handle, eb_bh,
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- eb = (ocfs2_extent_block *) eb_bh->b_data;
- el = &eb->h_list;
- for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
- rec = &el->l_recs[i];
-
- if (tmp_bh)
- brelse(tmp_bh);
- tmp_bh = NULL;
-
- status = ocfs_read_block(osb,
- rec->e_blkno,
- &tmp_bh,
- OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- eb = NULL;
- el = NULL;
- brelse(tmp_bh);
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- if (rec->e_cpos >= fe->i_clusters) {
- /* Trim this whole subtree */
- status = ocfs_kill_this_tree(osb,
- tmp_bh,
- handle,
- inode);
- if (status < 0) {
- eb = NULL;
- el = NULL;
- brelse(tmp_bh);
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- rec->e_cpos = 0;
- rec->e_blkno = 0;
- rec->e_clusters = 0;
- el->l_next_free_rec = i;
- } else { /* This is the one we want to split. */
- rec->e_clusters =
- fe->i_clusters - rec->e_cpos;
- break;
- }
- }
- /* Write out our new top of the tree duder */
- eb = NULL;
- el = NULL;
-
- status = ocfs_journal_dirty(handle, eb_bh);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- /* Make our new TOS the header we want to split. */
- if (tmp_bh == NULL) {
- LOG_ERROR_STATUS(-EFAIL);
- goto bail;
- }
-
- eb_bh = tmp_bh;
-
- /* Right now, we don't use 'depth' below here, but just
- * in case */
- depth = 2;
- if (needs_brelse)
- brelse(tmp_bh2);
- needs_brelse = 1;
- }
-
- for (i =0; i < OCFS_TREE_STACK_SIZE; i++)
- bh_stack[i] = NULL;
-
- bh_stack[tos] = eb_bh;
-
- /* Ok, find the splitting point (can be a DAT or HDR) */
- do {
- /* it's perfectly legal to get_access a block but
- * never dirty it, so lets just pre-emptively do it
- * now. */
- status = ocfs_journal_access(handle, bh_stack[tos],
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc_eb = (ocfs2_extent_block *) bh_stack[tos]->b_data;
- if (!IS_VALID_EXTENT_BLOCK(alloc_eb)) {
- LOG_ERROR_STR("Invalid extent block!");
- goto bail;
- }
-
- el = &alloc_eb->h_list;
-
- if (!el->l_tree_depth) {
- /* shall we just do away with him? */
- if (el->l_recs[0].e_cpos >= fe->i_clusters) {
- LOG_TRACE_ARGS("Killing this data extent (%llu)\n", alloc_eb->h_blkno);
- /* Boundary case - what if this guy is
- * the last DAT we should delete
- * (i.e., split no more ;) */
- alloc_eb = NULL;
- el = NULL;
- status = ocfs_kill_this_tree(osb,
- bh_stack[tos],
- handle,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- /* silly, but what to do? */
- alloc_eb = (ocfs2_extent_block *) bh_stack[tos]->b_data;
- } else {
- /* Alright, we know for sure that
- * we're splitting in this guy. */
- LOG_TRACE_ARGS("Splitting this data extent (%llu)\n", alloc_eb->h_blkno);
- fe->i_last_eb_blk = alloc_eb->h_blkno;
- alloc_eb->h_next_leaf_blk = 0;
- /* total_bytes is used below to know
- * how much total we've whacked off
- * this extent*/
- total_bytes = 0;
-
- /* there is a chance the split is at a
- * header boundary. this will catch
- * it: */
- rec = &el->l_recs[el->l_next_free_rec - 1];
- if ((rec->e_cpos + rec->e_clusters) == fe->i_clusters) {
- LOG_TRACE_STR("Ok, hit that boundary in the DAT");
- goto fix_headers;
- }
-
- /* Either kill the data or resize it */
- for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
- rec = &el->l_recs[i];
-
- /* changed this from > to >= */
- /* Do we delete it completely? */
- if (rec->e_cpos >= fe->i_clusters) {
- total_bytes += (u64)rec->e_clusters << osb->s_clustersize_bits;
-
- num_clusters = rec->e_clusters;
- bitmap_offset = (u32)(((rec->e_blkno << osb->sb->s_blocksize_bits)) >> osb->s_clustersize_bits);
- rec->e_cpos = 0;
- rec->e_clusters = 0;
- rec->e_blkno = 0;
- } else if ((rec->e_cpos + rec->e_clusters) > fe->i_clusters) {
- /* Do we shrink it? */
- /* FIXME
- * this is soooo a
- * hodge-podge of bytes
- * and clusters
- */
- orig_bytes = (u64)rec->e_clusters << osb->s_clustersize_bits;
- num_clusters = rec->e_clusters;
- doff = rec->e_blkno << osb->sb->s_blocksize_bits;
- rec->e_clusters = fe->i_clusters - rec->e_cpos;
- bytes = (u64)rec->e_clusters << osb->s_clustersize_bits;
- num_clusters -= rec->e_clusters;
- bitmap_offset = (u32)((doff + bytes) >> osb->s_clustersize_bits);
- /* we want to exit the
- * for loop now */
- total_bytes += (orig_bytes - bytes);
- done = 1;
- } else {
- /* if we get here,
- * then we don't want
- * to actually delete
- * *anything* from
- * this extent. */
- LOG_TRACE_ARGS("Not deleting extent %d, e_blkno = %llu, e_clusters = %u, e_cpos = %u\n", i, rec->e_blkno, rec->e_clusters, rec->e_cpos);
- done = 1;
- goto skip_bitmap_add;
- }
- status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, 0, DISK_ALLOC_VOLUME);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
-skip_bitmap_add:
- if (done) {
- el->l_next_free_rec =
- i + 1;
- break;
- }
- } /* For loop */
-
- /* Either way, we need to write this back out*/
- alloc_eb = NULL;
- el = NULL;
-
- status = ocfs_journal_dirty(handle,
- bh_stack[tos]);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- LOG_TRACE_ARGS("Fixing the headers above us! (tos=%d)\n", tos);
-fix_headers:
- /*And here we should fix the headers above us*/
- tos--;
- while (tos >= 0) {
- LOG_TRACE_ARGS("at top of loop, tos=%d\n", tos);
- status = ocfs_journal_access(handle, bh_stack[tos], OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc_eb = (ocfs2_extent_block *) bh_stack[tos]->b_data;
- el = &alloc_eb->h_list;
- victim = el->l_next_free_rec;
- el->l_next_free_rec++;
- /* need to also update
- * numbytes on these guys */
- rec = &el->l_recs[victim];
- rec->e_clusters -=
- (u32)(total_bytes >> osb->s_clustersize_bits);
- alloc_eb = NULL;
- el = NULL;
- status = ocfs_fix_extent_block(osb,
- bh_stack[tos], inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- status = ocfs_journal_dirty(handle,
- bh_stack[tos]);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- tos--;
- }
- LOG_TRACE_STR("breaking to end function now!");
- /* Ok, done! */
- break;
- }
- } else { /* It's a header extent */
-
- /* Did we already kill all his children, or
- * are they already dead? */
- if (el->l_next_free_rec == 0) {
- /*Ok, we're done with this guy, pop the stack*/
- LOG_TRACE_ARGS("Popping this header (%llu)\n",
- alloc_eb->h_blkno);
-
- status = ocfs_handle_add_commit_bits(handle, 1, alloc_eb->h_suballoc_bit, alloc_eb->h_suballoc_node, alloc_eb->h_blkno, DISK_ALLOC_EXTENT_NODE);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- brelse(bh_stack[tos]);
- alloc_eb = NULL;
- el = NULL;
- bh_stack[tos] = NULL;
- tos--;
- continue;
- }
- /* changed this from > to >= */
- /* Do we just delete this whole part of the tree? */
- if (el->l_recs[0].e_cpos >= fe->i_clusters) {
- LOG_TRACE_ARGS("whacking this tree: (%llu)\n",
- alloc_eb->h_blkno);
-
- if (el->l_recs[0].e_cpos == fe->i_clusters)
- done = 1;
-
- alloc_eb = NULL;
- ocfs_kill_this_tree(osb, bh_stack[tos],
- handle, inode);
- brelse(bh_stack[tos]);
- alloc_eb = NULL;
- el = NULL;
- bh_stack[tos] = NULL;
- tos--;
- if (tos < 0) {
- LOG_ERROR_STR("End of stack reached.");
- goto bail;
- }
- /* I just have to fix my parent,
- * right? Yes, but only because our
- * max tree_depth is 3. if it were
- * more, we'd have to fix his
- * parents parent. */
- status = ocfs_journal_access(handle, bh_stack[tos], OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc_eb = (ocfs2_extent_block *) bh_stack[tos]->b_data;
- el = &alloc_eb->h_list;
-
- victim = el->l_next_free_rec;
- el->l_recs[victim].e_cpos = 0;
- el->l_recs[victim].e_clusters = 0;
- el->l_recs[victim].e_blkno = 0;
- el->l_next_free_rec--;
- alloc_eb = NULL;
- el = NULL;
- /* Here's an interesting boundary
- * case. What if we're truncating on a
- * boundary between two headers and
- * this is the one we just deleted. In
- * that case we're done, but need to
- * write the parent out before we leave
- * again, this bit of code depends on
- * tree_depth of 3. */
- if (done) {
- LOG_TRACE_STR("Found a boundary " \
- "header, almost done " \
- " (gonna quit)");
- status = ocfs_fix_extent_block(osb,
- bh_stack[tos], inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- status = ocfs_journal_dirty(handle,
- bh_stack[tos]);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- /* decrement tos so we dont
- * trigger an error
- * condition */
- brelse(bh_stack[tos]);
- bh_stack[tos] = NULL;
- tos--;
- break;
- }
-
- status = ocfs_journal_dirty(handle,
- bh_stack[tos]);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
-
- /* Ok, we're not a boundary case, continue */
- continue;
- }
-
- el->l_next_free_rec--;
- victim = el->l_next_free_rec;
- rec = &el->l_recs[victim];
- alloc_eb = NULL;
- el = NULL;
-
- status = ocfs_journal_dirty(handle,
- bh_stack[tos]);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- /* grow the stack. */
- tos++;
- /* should never be true. */
- if (bh_stack[tos] != NULL)
- LOG_ERROR_STR("uhoh, not brelsing a buffer " \
- "on our stack!\n");
-
- status = ocfs_read_block(osb,
- rec->e_blkno,
- &bh_stack[tos],
- OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- /* We only want to free on our way up the tree */
- continue;
- }
- brelse(bh_stack[tos]);
- bh_stack[tos] = NULL;
- alloc_eb = NULL;
- el = NULL;
- tos--;
- } while (tos >= 0);
-
- if (tos >= 0)
- LOG_ERROR_ARGS("Quitting main loop while top of stack >= 0 " \
- " (tos=%d)\n", tos);
-
- status=0;
-bail:
- /* brelse the stack. We only brelse the bottom of the stack if
- * we know for sure that it wasn't passed from the caller */
- if (needs_brelse)
- brelse(bh_stack[0]);
- for(i = 1; i < OCFS_TREE_STACK_SIZE; i++)
- if (bh_stack[i])
- brelse(bh_stack[i]);
-
- LOG_EXIT_STATUS (status);
- return(status);
-} /* ocfs_split_this_tree */
-
-
-/*
- * ocfs_update_last_eb_blk
- *
- * Travel all the way to the rightmost DAT and set fe->i_last_eb_blk
- * to it.
- *
- * We do cached reads here because we ought to have already read the
- * various ext headers and dats off the system previously in the
- * truncate path.
- */
-static int ocfs_update_last_eb_blk(ocfs_super *osb,
- ocfs2_dinode *fe,
- struct inode *inode)
-{
- int status = -EFAIL;
- struct buffer_head *eb_bh = NULL;
- ocfs2_extent_block *eb = NULL;
- ocfs2_extent_list *el, *fel;
- u64 next_blk;
- int victim;
-
- LOG_ENTRY ();
-
- fel = &fe->id2.i_list;
- if (fel->l_next_free_rec == 0) {
- LOG_TRACE_STR("setting to zero as there isn't any used extents");
- fe->i_last_eb_blk = 0;
- status = 0;
- goto bail;
- }
-
- /* Can't be called with local extents */
- if (!fel->l_tree_depth)
- BUG();
-
- /* Ugly magic -1 */
- victim = fel->l_next_free_rec - 1;
- status = ocfs_read_block(osb,
- fel->l_recs[victim].e_blkno,
- &eb_bh,
- OCFS_BH_CACHED, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- eb = (ocfs2_extent_block *) eb_bh->b_data;
-
- if (!IS_VALID_EXTENT_BLOCK(eb)) {
- LOG_ERROR_STR("Invalid extent block!");
- goto bail;
- }
-
- el = &eb->h_list;
-
- while (el->l_tree_depth) {
- if (!IS_VALID_EXTENT_BLOCK(eb)) {
- LOG_ERROR_STR("Invalid extent block!");
- goto bail;
- }
-
- el = &eb->h_list;
-
- next_blk = el->l_recs[el->l_next_free_rec - 1].e_blkno;
-
- brelse(eb_bh);
- eb = NULL;
- eb_bh = NULL;
-
- status = ocfs_read_block(osb,
- next_blk,
- &eb_bh,
- OCFS_BH_CACHED, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- eb = (ocfs2_extent_block *) eb_bh->b_data;
- }
-
- fe->i_last_eb_blk = eb->h_blkno;
- status = 0;
-bail:
- if (eb_bh)
- brelse(eb_bh);
-
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_update_last_eb_blk */
-
-/*
- * ocfs_free_extents_for_truncate()
- *
- * You know, it's funny -- you'd expect that we'd flush out the fe
- * before leaving this function, but that's pretty much up to the
- * caller!
- */
-int ocfs_free_extents_for_truncate(ocfs_super *osb,
- ocfs2_dinode *fe,
- ocfs_journal_handle *handle,
- struct inode *inode)
-{
- int status = 0;
- struct buffer_head *extent_bh = NULL;
- int i, j;
- ocfs2_extent_list *fel;
- int updated_leb; /* used to mark whether fe->i_last_eb_blk has
- * been updated */
-
- LOG_ENTRY ();
-
- fel = &fe->id2.i_list;
-
- /* local extents */
- if (!fel->l_tree_depth) {
- status = _squish_extent_entries(osb, fel->l_recs,
- &fel->l_next_free_rec,
- handle, fe->i_clusters,
- 0, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- goto finally;
- }
-
- LOG_TRACE_ARGS("non-local extents. taking that code path, truncating to i_clusters of (%u)\n", fe->i_clusters);
- /* non-local extents */
-
- updated_leb = 0;
-
- /* Loop backwards through only the used free extent block here */
- for (i = (fel->l_next_free_rec - 1); i >= 0; i--) {
- LOG_TRACE_ARGS("at top of loop, i = %d\n", i);
- /* Go ahead and read that bit of the tree - we'll need it. */
- status = ocfs_read_block(osb,
- fel->l_recs[i].e_blkno,
- &extent_bh, OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto finally;
- }
- /* Figure out, do we want to kill this whole tree? */
- if (fel->l_recs[i].e_cpos >= fe->i_clusters) {
- LOG_TRACE_ARGS("Found an entire tree to delete!\n");
-
- status = ocfs_kill_this_tree(osb, extent_bh,
- handle, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto finally;
- }
- /* Ok, update the fe */
- fel->l_recs[i].e_cpos = 0;
- fel->l_recs[i].e_blkno = 0;
- fel->l_recs[i].e_clusters = 0;
- fel->l_next_free_rec = i;
- } else { /* Ok, we only want part of it. */
- LOG_TRACE_ARGS("Splitting this tree!\n");
- status = ocfs_split_this_tree(osb, extent_bh,
- handle, fe,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto finally;
- }
-
- /* Ok, update the FileEntry */
- LOG_TRACE_ARGS("Alright. e_clusters = (%u), i_clusters = (%u) e_cpos = (%u)\n",
- fel->l_recs[i].e_clusters,
- fe->i_clusters,
- fel->l_recs[i].e_cpos);
- fel->l_recs[i].e_clusters = fe->i_clusters;
- for (j=0; j < i; j++)
- fel->l_recs[i].e_clusters +=
- fel->l_recs[j].e_clusters;
-
- fel->l_next_free_rec = i + 1;
- /* We're done - we can't split more than one
- * parts of the tree. */
- updated_leb = 1;
- break;
- }
- brelse(extent_bh);
- extent_bh = NULL;
- }
-
- /* Ok, trunc to zero is a special case, doofus */
- if (fe->i_clusters == 0) {
- fe->i_last_eb_blk = 0;
- fel->l_tree_depth = 0;
- updated_leb = 1;
- }
-
- if (!updated_leb) {
- status = ocfs_update_last_eb_blk(osb, fe, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto finally;
- }
- }
-
-finally:
- if (extent_bh)
- brelse(extent_bh);
-
- LOG_EXIT_INT (status);
- return status;
-} /* ocfs_free_extents_for_truncate */
-
-
-/*
* ocfs_lookup_file_allocation()
*
* This routine looks up the existing mapping of VBO to LBO for a file.
@@ -2989,74 +2034,551 @@
return(status);
}
+/* This function will figure out whether the currently last extent
+ * block will be deleted, and if it will, what the new last extent
+ * block will be so we can update his h_next_leaf_blk field, as well
+ * as the dinodes i_last_eb_blk */
+static int ocfs_find_new_last_ext_blk(ocfs_super *osb,
+ struct inode *inode,
+ ocfs2_dinode *fe,
+ unsigned int new_i_clusters,
+ struct buffer_head *old_last_eb,
+ struct buffer_head **new_last_eb)
+{
+ int i, status = 0;
+ u64 block = 0;
+ ocfs2_extent_block *eb;
+ ocfs2_extent_list *el;
+ struct buffer_head *bh = NULL;
+
+ OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
+
+ *new_last_eb = NULL;
+
+ /* we have no tree, so of course, no last_eb. */
+ if (!fe->id2.i_list.l_tree_depth)
+ goto bail;
+
+ /* trunc to zero special case - this makes tree_depth = 0
+ * regardless of what it is. */
+ if (!new_i_clusters)
+ goto bail;
+
+ eb = (ocfs2_extent_block *) old_last_eb->b_data;
+ el = &(eb->h_list);
+ OCFS_ASSERT(el->l_next_free_rec);
+
+ /* Make sure that this guy will actually be empty after we
+ * clear away the data. */
+ if (el->l_recs[0].e_cpos < new_i_clusters)
+ goto bail;
+
+ /* Ok, at this point, we know that last_eb will definitely
+ * change, so lets traverse the tree and find the second to
+ * last extent block. */
+ el = &(fe->id2.i_list);
+ /* go down the tree, */
+ do {
+ for(i = (el->l_next_free_rec - 1); i >= 0; i--) {
+ if (el->l_recs[i].e_cpos < new_i_clusters) {
+ block = el->l_recs[i].e_blkno;
+ break;
+ }
+ }
+ OCFS_ASSERT(i >= 0);
+
+ if (bh) {
+ brelse(bh);
+ bh = NULL;
+ }
+
+ status = ocfs_read_block(osb, block, &bh, OCFS_BH_CACHED,
+ inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ eb = (ocfs2_extent_block *) bh->b_data;
+ el = &(eb->h_list);
+ OCFS_ASSERT(IS_VALID_EXTENT_BLOCK(eb));
+ } while (el->l_tree_depth);
+
+ *new_last_eb = bh;
+ get_bh(*new_last_eb);
+ LOG_TRACE_ARGS("returning block %llu\n", eb->h_blkno);
+bail:
+ if (bh)
+ brelse(bh);
+
+ return status;
+}
+
+static int ocfs_do_truncate(ocfs_super *osb,
+ unsigned int clusters_to_del,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ struct buffer_head *old_last_eb_bh,
+ ocfs2_truncate_context *tc)
+{
+ int status, i, depth;
+ ocfs_journal_handle *handle;
+ ocfs2_dinode *fe;
+ ocfs2_extent_block *eb;
+ ocfs2_extent_block *last_eb = NULL;
+ ocfs2_extent_list *el;
+ struct buffer_head *eb_bh = NULL;
+ struct buffer_head *last_eb_bh = NULL;
+ u64 next_eb = 0;
+ u64 delete_blk = 0;
+
+ handle = tc->tc_handle;
+ fe = (ocfs2_dinode *) fe_bh->b_data;
+
+ status = ocfs_find_new_last_ext_blk(osb,
+ inode,
+ fe,
+ fe->i_clusters - clusters_to_del,
+ old_last_eb_bh,
+ &last_eb_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ if (last_eb_bh)
+ last_eb = (ocfs2_extent_block *) last_eb_bh->b_data;
+
+ status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ el = &(fe->id2.i_list);
+
+ down (&(OCFS_I(inode)->ip_sem));
+ OCFS_I(inode)->ip_alloc_size =
+ (u64)(fe->i_clusters - clusters_to_del) << osb->s_clustersize_bits;
+ up (&(OCFS_I(inode)->ip_sem));
+ fe->i_clusters -= clusters_to_del;
+ fe->i_mtime = OCFS_CURRENT_TIME;
+
+ i = el->l_next_free_rec - 1;
+
+ OCFS_ASSERT(el->l_recs[i].e_clusters >= clusters_to_del);
+ el->l_recs[i].e_clusters -= clusters_to_del;
+ /* tree depth zero, we can just delete the clusters, otherwise
+ * we need to record the offset of the next level extent block
+ * as we may overwrite it. */
+ if (!el->l_tree_depth)
+ delete_blk = el->l_recs[i].e_blkno + el->l_recs[i].e_clusters;
+ else
+ next_eb = el->l_recs[i].e_blkno;
+
+ if (!el->l_recs[i].e_clusters) {
+ /* if we deleted the whole extent record, then clear
+ * out the other fields and update the extent
+ * list. For depth > 0 trees, we've already recorded
+ * the extent block in 'next_eb' */
+ el->l_recs[i].e_cpos = 0;
+ el->l_recs[i].e_blkno = 0;
+ OCFS_ASSERT(el->l_next_free_rec);
+ el->l_next_free_rec--;
+ }
+
+ depth = el->l_tree_depth;
+ if (!fe->i_clusters) {
+ /* trunc to zero is a special case. */
+ el->l_tree_depth = 0;
+ fe->i_last_eb_blk = 0;
+ } else if (last_eb)
+ fe->i_last_eb_blk = last_eb->h_blkno;
+
+ status = ocfs_journal_dirty(handle, fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (last_eb) {
+ /* If there will be a new last extent block, then by
+ * definition, there cannot be any leaves to the right of
+ * him. */
+ status = ocfs_journal_access(handle, last_eb_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ last_eb->h_next_leaf_blk = 0;
+ status = ocfs_journal_dirty(handle, last_eb_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+
+ /* if our tree depth > 0, update all the tree blocks below us. */
+ while(depth) {
+ LOG_TRACE_ARGS("traveling tree (depth = %d, next_eb = %llu)\n",
+ depth, next_eb);
+ status = ocfs_read_block(osb, next_eb, &eb_bh,
+ OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ eb = (ocfs2_extent_block *) eb_bh->b_data;
+ OCFS_ASSERT(IS_VALID_EXTENT_BLOCK(eb));
+ el = &(eb->h_list);
+
+ status = ocfs_journal_access(handle, eb_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ OCFS_ASSERT(el->l_next_free_rec);
+ OCFS_ASSERT(depth == (el->l_tree_depth + 1));
+
+ i = el->l_next_free_rec - 1;
+
+ LOG_TRACE_ARGS("extent block %llu, before: record %d: "
+ "(%u, %u, %llu), next = %u\n", eb->h_blkno, i,
+ el->l_recs[i].e_cpos, el->l_recs[i].e_clusters,
+ el->l_recs[i].e_blkno, el->l_next_free_rec);
+
+ OCFS_ASSERT(el->l_recs[i].e_clusters >= clusters_to_del);
+ el->l_recs[i].e_clusters -= clusters_to_del;
+
+ next_eb = el->l_recs[i].e_blkno;
+ /* bottom-most block requires us to delete data.*/
+ if (!el->l_tree_depth)
+ delete_blk = el->l_recs[i].e_blkno +
+ el->l_recs[i].e_clusters;
+ if (!el->l_recs[i].e_clusters) {
+ el->l_recs[i].e_cpos = 0;
+ el->l_recs[i].e_blkno = 0;
+ OCFS_ASSERT(el->l_next_free_rec);
+ el->l_next_free_rec--;
+ }
+ LOG_TRACE_ARGS("extent block %llu, after: record %d: "
+ "(%u, %u, %llu), next = %u\n", eb->h_blkno, i,
+ el->l_recs[i].e_cpos, el->l_recs[i].e_clusters,
+ el->l_recs[i].e_blkno, el->l_next_free_rec);
+
+ status = ocfs_journal_dirty(handle, eb_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (!el->l_next_free_rec) {
+ LOG_TRACE_ARGS("deleting this extent block.\n");
+ OCFS_ASSERT(!eb->h_suballoc_node);
+ OCFS_ASSERT(!el->l_recs[0].e_clusters);
+ OCFS_ASSERT(!el->l_recs[0].e_cpos);
+ OCFS_ASSERT(!el->l_recs[0].e_blkno);
+ status = ocfs_free_suballoc_bits(osb,
+ handle,
+ tc->tc_ext_alloc_inode,
+ tc->tc_ext_alloc_bh,
+ eb->h_suballoc_bit,
+ eb->h_blkno,
+ 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+ brelse(eb_bh);
+ eb_bh = NULL;
+ depth--;
+ }
+
+ OCFS_ASSERT(delete_blk);
+ status = ocfs_free_clusters(osb, handle, tc->tc_bitmap_inode,
+ tc->tc_bitmap_bh, delete_blk,
+ clusters_to_del);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
/*
- * ocfs_free_file_extents()
+ * It is expected, that by the time you call this function,
+ * inode->i_size and fe->i_size have been adjusted.
*
+ * This will start, restart and commit your handle for you.
+ *
+ * WARNING: This will gobble the contexts reference to last_eb_bh.
*/
-int ocfs_free_file_extents(ocfs_super *osb, struct buffer_head *fe_bh,
- ocfs_journal_handle *handle,
- struct inode *inode)
+int ocfs_commit_truncate(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ ocfs2_truncate_context *tc)
{
- int status = 0;
- __u32 i;
- __u32 numBitsAllocated = 0, bitmapOffset = 0;
- ocfs2_extent_block *extent;
- struct buffer_head *extent_bh = NULL;
- ocfs2_extent_list *fel;
+ int status, i, credits;
+ unsigned int clusters_to_del, target_i_clusters;
+ u64 last_eb = 0;
ocfs2_dinode *fe;
+ ocfs2_extent_block *eb;
+ ocfs2_extent_list *el;
+ struct buffer_head *last_eb_bh;
+ ocfs_journal_handle *handle;
- LOG_ENTRY ();
+ LOG_ENTRY();
- fe = (ocfs2_dinode *) fe_bh->b_data;
+ last_eb_bh = tc->tc_last_eb_bh;
+ tc->tc_last_eb_bh = NULL;
+ handle = tc->tc_handle;
+ target_i_clusters = ocfs_clusters_for_bytes(osb->sb, inode->i_size);
fe = (ocfs2_dinode *) fe_bh->b_data;
- fel = &fe->id2.i_list;
- if (!fel->l_tree_depth) {
- for (i = 0; i < fel->l_next_free_rec; i++) {
- numBitsAllocated = fel->l_recs[i].e_clusters;
+ if (fe->id2.i_list.l_tree_depth) {
+ eb = (ocfs2_extent_block *) last_eb_bh->b_data;
+ el = &(eb->h_list);
+ } else
+ el = &(fe->id2.i_list);
+ last_eb = fe->i_last_eb_blk;
+start:
+ LOG_TRACE_ARGS("ocfs_commit_truncate: fe->i_clusters = %u, "
+ "last_eb = %llu, fe->i_last_eb_blk = %llu, "
+ "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
+ fe->i_clusters, last_eb, fe->i_last_eb_blk,
+ fe->id2.i_list.l_tree_depth, last_eb_bh);
- bitmapOffset =
- (__u32)(((fel->l_recs[i].e_blkno << osb->sb->s_blocksize_bits)) >>
- osb->s_clustersize_bits);
+ if (last_eb != fe->i_last_eb_blk) {
+ LOG_TRACE_ARGS("last_eb changed!\n");
+ OCFS_ASSERT(fe->id2.i_list.l_tree_depth);
+ last_eb = fe->i_last_eb_blk;
+ /* i_last_eb_blk may have changed, read it if
+ * necessary. We don't have to worry about the
+ * truncate to zero case here (where there becomes no
+ * last_eb) because we never loop back after our work
+ * is done. */
+ if (last_eb_bh) {
+ brelse(last_eb_bh);
+ last_eb_bh = NULL;
+ }
- ocfs_handle_add_commit_bits(handle, numBitsAllocated,
- bitmapOffset, -1, 0,
- DISK_ALLOC_VOLUME);
+ status = ocfs_read_block(osb, last_eb,
+ &last_eb_bh, OCFS_BH_CACHED,
+ inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
+ eb = (ocfs2_extent_block *) last_eb_bh->b_data;
+ OCFS_ASSERT(IS_VALID_EXTENT_BLOCK(eb));
+ el = &(eb->h_list);
+ }
+
+ /* by now, el will point to the extent list on the bottom most
+ * portion of this tree. */
+ i = el->l_next_free_rec - 1;
+ if (el->l_recs[i].e_cpos >= target_i_clusters)
+ clusters_to_del = el->l_recs[i].e_clusters;
+ else
+ clusters_to_del = (el->l_recs[i].e_clusters
+ + el->l_recs[i].e_cpos) - target_i_clusters;
+
+ LOG_TRACE_ARGS("clusters_to_del = %u in this pass\n", clusters_to_del);
+ credits = ocfs_calc_tree_trunc_credits(osb->sb, clusters_to_del,
+ fe, el);
+ if (!ocfs_handle_started(handle)) {
+ handle = ocfs_start_trans(osb, handle, credits);
+ if (!handle) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
} else {
- for (i = 0; i < fel->l_next_free_rec; i++) {
- status = ocfs_read_block(osb,
- fel->l_recs[i].e_blkno,
- &extent_bh,
- OCFS_BH_CACHED,
- inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- extent = (ocfs2_extent_block *) extent_bh->b_data;
- if (!IS_VALID_EXTENT_BLOCK(extent)) {
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto leave;
- }
+ status = ocfs_extend_trans(handle, credits);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
- status = ocfs_kill_this_tree(osb, extent_bh, handle, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- brelse(extent_bh);
- extent_bh = NULL;
+ status = ocfs_do_truncate(osb, clusters_to_del, inode, fe_bh,
+ last_eb_bh, tc);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ OCFS_ASSERT(fe->i_clusters >= target_i_clusters);
+ if (fe->i_clusters > target_i_clusters)
+ goto start;
+bail:
+ ocfs_commit_trans(handle);
+ tc->tc_handle = NULL;
+
+ if (last_eb_bh)
+ brelse(last_eb_bh);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+
+/*
+ * Expects the inode to already be locked. This will figure out which
+ * inodes need to be locked and will put them on the returned truncate
+ * context.
+ */
+int ocfs_prepare_truncate(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ ocfs2_truncate_context **tc)
+{
+ int status, metadata_delete;
+ unsigned int new_i_clusters;
+ ocfs_journal_handle *handle = NULL;
+ ocfs2_dinode *fe;
+ ocfs2_extent_block *eb;
+ ocfs2_extent_list *el;
+ struct buffer_head *last_eb_bh = NULL;
+ struct inode *ext_alloc_inode = NULL;
+ struct buffer_head *ext_alloc_bh = NULL;
+ struct inode *data_alloc_inode = NULL;
+ struct buffer_head *data_alloc_bh = NULL;
+
+ LOG_ENTRY();
+
+ *tc = NULL;
+
+ new_i_clusters = ocfs_clusters_for_bytes(osb->sb, inode->i_size);
+ fe = (ocfs2_dinode *) fe_bh->b_data;
+
+ LOG_TRACE_ARGS("fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
+ "%llu\n", fe->i_clusters, new_i_clusters, fe->i_size);
+
+ OCFS_ASSERT(fe->i_clusters > new_i_clusters);
+
+ *tc = kmalloc(sizeof(ocfs2_truncate_context), GFP_KERNEL);
+ if (!(*tc)) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(*tc, 0, sizeof(ocfs2_truncate_context));
+
+ handle = ocfs_alloc_handle(osb);
+ if (handle == NULL) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ metadata_delete = 0;
+ if (fe->id2.i_list.l_tree_depth) {
+ /* If we have a tree, then the truncate may result in
+ * metadata deletes. Figure this out from the
+ * rightmost leaf block.*/
+ status = ocfs_read_block(osb, fe->i_last_eb_blk,
+ &last_eb_bh, OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
+ eb = (ocfs2_extent_block *) last_eb_bh->b_data;
+ OCFS_ASSERT(IS_VALID_EXTENT_BLOCK(eb));
+ el = &(eb->h_list);
+ if (el->l_recs[0].e_cpos >= new_i_clusters)
+ metadata_delete = 1;
}
-leave:
- if (extent_bh)
- brelse(extent_bh);
-
- LOG_EXIT_STATUS (status);
+ if (metadata_delete) {
+ LOG_TRACE_STR("Will have to delete metadata for this trunc. "
+ "locking allocator.\n");
+ ext_alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
+ if (!ext_alloc_inode) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &ext_alloc_bh, ext_alloc_inode);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, ext_alloc_inode);
+ ocfs_handle_add_inode(handle, ext_alloc_inode);
+ }
+
+ data_alloc_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+ if (!data_alloc_inode) {
+ status = -EINVAL;
+ LOG_ERROR_STR("Could not get bitmap inode!");
+ goto bail;
+ }
+
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &data_alloc_bh, data_alloc_inode);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, data_alloc_inode);
+ ocfs_handle_add_inode(handle, data_alloc_inode);
+
+ (*tc)->tc_bitmap_inode = data_alloc_inode;
+ (*tc)->tc_bitmap_bh = data_alloc_bh;
+ (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
+ (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
+ (*tc)->tc_last_eb_bh = last_eb_bh;
+ (*tc)->tc_handle = handle;
+bail:
+ if (status < 0) {
+ if (handle)
+ ocfs_commit_trans(handle);
+ if (last_eb_bh)
+ brelse(last_eb_bh);
+ if (ext_alloc_inode)
+ iput(ext_alloc_inode);
+ if (data_alloc_inode)
+ iput(data_alloc_inode);
+ if (ext_alloc_bh)
+ brelse(ext_alloc_bh);
+ if (data_alloc_bh)
+ brelse(data_alloc_bh);
+ if (*tc)
+ ocfs_free_truncate_context(*tc);
+ *tc = NULL;
+ }
+ LOG_EXIT();
return status;
-} /* ocfs_free_file_extents */
+}
+void ocfs_free_truncate_context(ocfs2_truncate_context *tc)
+{
+ if (tc->tc_bitmap_inode)
+ iput(tc->tc_bitmap_inode);
+ if (tc->tc_bitmap_bh)
+ brelse(tc->tc_bitmap_bh);
+ if (tc->tc_ext_alloc_inode)
+ iput(tc->tc_ext_alloc_inode);
+ if (tc->tc_ext_alloc_bh)
+ brelse(tc->tc_ext_alloc_bh);
+ if (tc->tc_last_eb_bh)
+ brelse(tc->tc_last_eb_bh);
+ if (tc->tc_handle) {
+ OCFS_ASSERT(!ocfs_handle_started(tc->tc_handle));
+ ocfs_commit_trans(tc->tc_handle);
+ }
+ kfree(tc);
+}
Modified: trunk/src/alloc.h
===================================================================
--- trunk/src/alloc.h 2004-10-05 22:33:26 UTC (rev 1548)
+++ trunk/src/alloc.h 2004-10-07 01:38:01 UTC (rev 1549)
@@ -43,9 +43,6 @@
ocfs2_dinode *fe,
ocfs_journal_handle *handle,
struct inode *inode);
-int ocfs_free_file_extents(ocfs_super *osb, struct buffer_head *fe_bh,
- ocfs_journal_handle *handle,
- struct inode *inode);
int ocfs_get_leaf_extent(ocfs_super *osb, ocfs2_dinode *fe,
__s64 Vbo, struct buffer_head **data_extent_bh,
struct inode *inode);
@@ -114,4 +111,25 @@
u32 bits_wanted,
ocfs2_alloc_context *ac);
+typedef struct _ocfs2_truncate_context {
+ struct inode *tc_bitmap_inode;
+ struct buffer_head *tc_bitmap_bh;
+ struct inode *tc_ext_alloc_inode;
+ struct buffer_head *tc_ext_alloc_bh;
+ /* these get destroyed once it's passed to ocfs_commit_truncate. */
+ struct buffer_head *tc_last_eb_bh;
+ ocfs_journal_handle *tc_handle;
+} ocfs2_truncate_context;
+
+void ocfs_free_truncate_context(ocfs2_truncate_context *tc);
+
+int ocfs_prepare_truncate(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ ocfs2_truncate_context **tc);
+int ocfs_commit_truncate(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ ocfs2_truncate_context *tc);
+
#endif /* OCFS2_ALLOC_H */
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-10-05 22:33:26 UTC (rev 1548)
+++ trunk/src/file.c 2004-10-07 01:38:01 UTC (rev 1549)
@@ -55,6 +55,10 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_FILE
static int ocfs2_zero_extend(struct inode *inode);
+static int ocfs_orphan_for_truncate(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ u64 new_i_size);
static unsigned int ocfs_calc_overalloc_bits(ocfs_super *osb,
struct file *filp,
@@ -816,127 +820,176 @@
.ioctl = ocfs_ioctl
};
+int ocfs_set_inode_size(ocfs_journal_handle *handle,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ u64 new_i_size)
+{
+ int status;
+ struct super_block *sb = inode->i_sb;
+ ocfs_inode_private *oip = OCFS_I(inode);
+
+ LOG_ENTRY();
+
+ inode->i_size = new_i_size;
+ OCFS_SET_INODE_TIME(inode, i_mtime, OCFS_CURRENT_TIME);
+ inode->i_blocks = (new_i_size + sb->s_blocksize - 1)
+ >> sb->s_blocksize_bits;
+ status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ down(&oip->ip_sem);
+ oip->ip_mmu_private = inode->i_size;
+ ocfs_extent_map_destroy(&oip->ip_ext_map);
+ ocfs_extent_map_init (&oip->ip_ext_map);
+ up(&oip->ip_sem);
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static int ocfs_orphan_for_truncate(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ u64 new_i_size)
+{
+ int status;
+ ocfs_journal_handle *handle = NULL;
+
+ LOG_ENTRY();
+
+ /* TODO: This needs to actually orphen the inode in this
+ * transaction. */
+
+ handle = ocfs_start_trans(osb, handle, 1);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
+ status = ocfs_set_inode_size(handle, inode, fe_bh, new_i_size);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+bail:
+ if (handle)
+ ocfs_commit_trans(handle);
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
/*
* ocfs_truncate_file()
*
*/
-static int ocfs_truncate_file(ocfs_super *osb, __u64 file_size,
+static int ocfs_truncate_file(ocfs_super *osb,
+ u64 new_i_size,
struct inode *inode)
{
int status = 0;
ocfs2_dinode *fe = NULL;
- __u64 new_alloc_size;
- struct buffer_head *bh = NULL;
+ struct buffer_head *fe_bh = NULL;
ocfs_journal_handle *handle = NULL;
+ ocfs2_truncate_context *tc = NULL;
- LOG_ENTRY_ARGS("(inode = %llu, file_size = %llu\n",
- OCFS_I(inode)->ip_blkno, file_size);
+ LOG_ENTRY_ARGS("(inode = %llu, new_i_size = %llu\n",
+ OCFS_I(inode)->ip_blkno, new_i_size);
- new_alloc_size = ocfs_align_bytes_to_clusters(osb->sb,
- file_size);
-
-#ifdef PURE_EVIL
- if (evil_filename_check(EVIL_INODE, inode)) {
- LOG_ERROR_ARGS("EVIL TRUNCATE: file_size=%llu, new_alloc=%llu, old=%llu\n",
- file_size, new_alloc_size, inode->i_size);
- }
-#endif
-
-
- LOG_TRACE_ARGS("new_alloc_size = %llu\n", new_alloc_size);
-
handle = ocfs_alloc_handle(osb);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
- goto leave;
+ goto bail;
}
status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
FLAG_FILE_TRUNCATE|FLAG_FILE_UPDATE_OIN,
- &bh, inode);
+ &fe_bh, inode);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
- goto leave;
+ goto bail;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
FLAG_FILE_TRUNCATE|FLAG_FILE_UPDATE_OIN, inode);
+ ocfs_handle_add_inode(handle, inode);
- fe = (ocfs2_dinode *) bh->b_data;
- if (!IS_VALID_FILE_ENTRY(fe)) {
- LOG_ERROR_ARGS("Invalid fe at blkno %llu",
- OCFS_I(inode)->ip_blkno);
- status = -EFAIL;
- goto leave;
- }
+ fe = (ocfs2_dinode *) fe_bh->b_data;
+ OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
+ OCFS_ASSERT(fe->i_size == inode->i_size);
- handle = ocfs_start_trans(osb, handle, OCFS_FILE_TRUNCATE_CREDITS);
- if (handle == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto leave;
- }
-
- /* add this fe to the journal transaction */
- status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
- }
-
- down(&OCFS_I(inode)->ip_sem);
-
- /* the file entry might have changed underneath us (while
- * waiting on the lock). make sure the size is still a valid
- * one. This really ought to check for other things too, like
- * a valid bit, etc. */
- if (file_size > fe->i_size) {
+ if (new_i_size > fe->i_size) {
LOG_TRACE_ARGS("asked to truncate file with size (%llu) "
"to size (%llu)!\n", fe->i_size,
- file_size);
- up(&OCFS_I(inode)->ip_sem);
+ new_i_size);
status = -EINVAL;
LOG_ERROR_STATUS(status);
- goto leave;
+ goto bail;
}
- fe->i_size = file_size;
- fe->i_clusters = (u32)(new_alloc_size >> osb->s_clustersize_bits);
+ LOG_TRACE_ARGS("inode %llu, i_size = %llu, new_i_size = %llu\n",
+ fe->i_blkno, fe->i_size, new_i_size);
- status = ocfs_free_extents_for_truncate (osb, fe, handle, inode);
+ /* lets handle the simple truncate cases before doing any more
+ * cluster locking. */
+ if (new_i_size == fe->i_size)
+ goto bail;
+
+ if (fe->i_clusters
+ == ocfs_clusters_for_bytes(osb->sb, new_i_size)) {
+ LOG_TRACE_ARGS("fe->i_clusters = %u, so we do a simple "
+ "truncate\n", fe->i_clusters);
+ /* No allocation change is required, so lets fast path
+ * this truncate. */
+ handle = ocfs_start_trans(osb, handle, 1);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
+ status = ocfs_set_inode_size(handle, inode, fe_bh, new_i_size);
+ if (status < 0)
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ /* alright, we're going to need to do a full blown alloc size
+ * change. Orphan the inode so that recovery can complete the
+ * truncate if necessary. This does the task of marking
+ * i_size. */
+ status = ocfs_orphan_for_truncate(osb, inode, fe_bh, new_i_size);
if (status < 0) {
- up(&OCFS_I(inode)->ip_sem);
LOG_ERROR_STATUS (status);
- goto leave;
+ goto bail;
}
- fe->i_mtime = OCFS_CURRENT_TIME;
+ status = ocfs_prepare_truncate(osb, inode, fe_bh, &tc);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
- status = ocfs_journal_dirty(handle, bh);
+ status = ocfs_commit_truncate(osb, inode, fe_bh, tc);
if (status < 0) {
- up(&OCFS_I(inode)->ip_sem);
- LOG_ERROR_STATUS (status);
- goto leave;
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
- /* if we updated correctly then we can update the alloc_size */
- OCFS_I(inode)->ip_alloc_size = new_alloc_size;
- OCFS_I(inode)->ip_mmu_private = fe->i_size;
- ocfs_extent_map_destroy(&OCFS_I(inode)->ip_ext_map);
- ocfs_extent_map_init (&OCFS_I(inode)->ip_ext_map);
- up (&(OCFS_I(inode)->ip_sem));
-
-leave:
- if (handle && (status == 0))
+ /* TODO: orphan dir cleanup here. */
+bail:
+ if (handle)
ocfs_commit_trans(handle);
- else if (handle)
- ocfs_abort_trans(handle);
- if (bh != NULL)
- brelse(bh);
+ if (fe_bh != NULL)
+ brelse(fe_bh);
- if (status < 0)
- if (status != -ENOSPC && status != -EINTR)
- LOG_ERROR_STATUS (status);
+ if (tc)
+ ocfs_free_truncate_context(tc);
LOG_EXIT_STATUS (status);
return status;
Modified: trunk/src/file.h
===================================================================
--- trunk/src/file.h 2004-10-05 22:33:26 UTC (rev 1548)
+++ trunk/src/file.h 2004-10-07 01:38:01 UTC (rev 1549)
@@ -58,6 +58,11 @@
struct kstat *stat);
#endif
+int ocfs_set_inode_size(ocfs_journal_handle *handle,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ u64 new_i_size);
+
#ifdef PURE_EVIL
#define EVIL_FILENAME "libctx10.a"
#define EVIL_FILENAME_LEN 10
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-10-05 22:33:26 UTC (rev 1548)
+++ trunk/src/inode.c 2004-10-07 01:38:01 UTC (rev 1549)
@@ -77,6 +77,9 @@
#else /* 2.4 kernel */
static int ocfs_find_inode (struct inode *inode, unsigned long ino, void *opaque);
#endif
+static int ocfs_truncate_for_delete(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/*
@@ -529,6 +532,61 @@
#endif
}
+static int ocfs_truncate_for_delete(ocfs_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh)
+{
+ int status = 0;
+ ocfs_journal_handle *handle = NULL;
+ ocfs2_truncate_context *tc = NULL;
+ ocfs2_dinode *fe;
+
+ LOG_ENTRY();
+
+ fe = (ocfs2_dinode *) fe_bh->b_data;
+
+ /* zero allocation, zero truncate :) */
+ if (!fe->i_clusters)
+ goto bail;
+
+ handle = ocfs_start_trans(osb, handle, 1);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
+ status = ocfs_set_inode_size(handle, inode, fe_bh, 0ULL);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ ocfs_commit_trans(handle);
+ handle = NULL;
+
+ status = ocfs_prepare_truncate(osb, inode, fe_bh, &tc);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_commit_truncate(osb, inode, fe_bh, tc);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+bail:
+ if (handle)
+ ocfs_commit_trans(handle);
+
+ if (tc)
+ ocfs_free_truncate_context(tc);
+
+ LOG_EXIT_STATUS (status);
+ return status;
+}
+
/*
* ocfs_delete_inode()
*
@@ -640,6 +698,16 @@
orphan_dir_inode);
ocfs_handle_add_inode(handle, orphan_dir_inode);
+ /* we do this while holding the orphan dir lock because we
+ * don't want recovery being run from another node to vote for
+ * an inode delete on us -- this will result in two nodes
+ * truncating the same file! */
+ status = ocfs_truncate_for_delete(osb, inode, fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, fe->i_suballoc_node);
if (!inode_alloc_inode) {
status = -EEXIST;
@@ -690,13 +758,6 @@
status = ocfs_free_suballoc_bits(osb, handle, inode_alloc_inode,
inode_alloc_bh, fe->i_suballoc_bit,
fe->i_blkno, 1);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- /* actually delete the data and the inode */
- status = ocfs_free_file_extents(osb, fe_bh, handle, inode);
if (status < 0)
LOG_ERROR_STATUS(status);
Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h 2004-10-05 22:33:26 UTC (rev 1548)
+++ trunk/src/ocfs_journal.h 2004-10-07 01:38:01 UTC (rev 1549)
@@ -223,6 +223,11 @@
* buffers to journal_access! */
#define OCFS_HANDLE_ALWAYS_COMMITS 4
+static inline int ocfs_handle_started(ocfs_journal_handle *handle)
+{
+ return handle->flags & OCFS_HANDLE_STARTED;
+}
+
static inline void ocfs_handle_free_all_copyout(ocfs_journal_handle *handle)
{
while (handle->num_co) {
@@ -466,6 +471,33 @@
return(blocks);
}
+static inline int ocfs_calc_tree_trunc_credits(struct super_block *sb,
+ unsigned int clusters_to_del,
+ ocfs2_dinode *fe,
+ ocfs2_extent_list *last_el)
+{
+ /* for file entry + all headers in this pass + update to next leaf */
+ int credits = 1 + fe->id2.i_list.l_tree_depth + 1;
+ int bitmap_blocks, i;
+
+ i = last_el->l_next_free_rec - 1;
+ OCFS_ASSERT(i >= 0);
+
+ /* We may be deleting metadata blocks, so metadata alloc dinode +
+ one desc. block for each possible delete. */
+ if (fe->id2.i_list.l_tree_depth
+ && (last_el->l_next_free_rec == 1)
+ && ((last_el->l_recs[i].e_clusters - clusters_to_del) == 0))
+ credits += 1 + fe->id2.i_list.l_tree_depth;
+
+ /* bitmap fe + bitmap blocks covered by this extent */
+ bitmap_blocks = 1 + ocfs_blocks_for_bits(sb,
+ clusters_to_del);
+ credits += bitmap_blocks;
+
+ return(credits);
+}
+
/* fe, anything along new 'edge' of tree + fuzz*/
#define OCFS_FILE_TRUNCATE_CREDITS (1 + 14 + OCFS_JOURNAL_FUZZ_CREDITS)
More information about the Ocfs2-commits
mailing list