[Ocfs2-commits] mfasheh commits r1543 - trunk/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Fri Oct 1 20:08:09 CDT 2004
Author: mfasheh
Date: 2004-10-01 20:08:08 -0500 (Fri, 01 Oct 2004)
New Revision: 1543
Added:
trunk/src/localalloc.c
trunk/src/localalloc.h
trunk/src/suballoc.c
trunk/src/suballoc.h
Modified:
trunk/src/Makefile
trunk/src/alloc.c
trunk/src/alloc.h
trunk/src/aops.c
trunk/src/bitmap.c
trunk/src/bitmap.h
trunk/src/dcache.c
trunk/src/dir.c
trunk/src/dir.h
trunk/src/dlm.c
trunk/src/file.c
trunk/src/file.h
trunk/src/inode.c
trunk/src/inode.h
trunk/src/journal.c
trunk/src/namei.c
trunk/src/namei.h
trunk/src/ocfs.h
trunk/src/ocfs2_fs.h
trunk/src/ocfs_journal.h
trunk/src/ocfs_log.h
trunk/src/super.c
trunk/src/symlink.c
trunk/src/sysfile.c
trunk/src/sysfile.h
trunk/src/vote.c
Log:
* merge the dlm_changes branch back into trunk.
Modified: trunk/src/Makefile
===================================================================
--- trunk/src/Makefile 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/Makefile 2004-10-02 01:08:08 UTC (rev 1543)
@@ -77,10 +77,12 @@
inode.c \
ioctl.c \
journal.c \
+ localalloc.c \
lockres.c \
namei.c \
nm.c \
proc.c \
+ suballoc.c \
super.c \
symlink.c \
sysfile.c \
@@ -109,10 +111,12 @@
inode.h \
ioctl.h \
journal.h \
+ localalloc.h \
lockres.h \
namei.h \
nm.h \
proc.h \
+ suballoc.h \
super.h \
symlink.h \
sysfile.h \
Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/alloc.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -41,7 +41,9 @@
#include "dlm.h"
#include "extmap.h"
#include "inode.h"
+#include "localalloc.h"
#include "util.h"
+#include "suballoc.h"
#include "sysfile.h"
#include "file.h"
@@ -55,6 +57,14 @@
struct buffer_head *extent_grp_bh,
ocfs_journal_handle *handle,
struct inode *inode);
+
+static int ocfs_create_new_meta_bhs(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *inode,
+ int wanted,
+ ocfs2_alloc_context *meta_ac,
+ struct buffer_head *bhs[]);
+
static int ocfs_allocate_new_data_node(ocfs_super *osb,
ocfs2_dinode *fe,
u64 new_blkno,
@@ -62,13 +72,16 @@
struct buffer_head *eb_bh,
u64 *new_eb_blkno,
ocfs_journal_handle *handle,
- struct inode *inode);
+ struct inode *inode,
+ ocfs2_alloc_context *meta_ac);
static int ocfs_grow_extent_tree(ocfs_super *osb,
struct buffer_head *fe_bh,
ocfs_journal_handle *handle,
u64 blkno,
- u32 new_clusters, struct inode *inode);
+ u32 new_clusters,
+ struct inode *inode,
+ ocfs2_alloc_context *meta_ac);
static int _squish_extent_entries(ocfs_super *osb,
ocfs2_extent_rec *extarr,
@@ -90,34 +103,24 @@
static int ocfs_update_last_eb_blk(ocfs_super *osb, ocfs2_dinode *fe,
struct inode *inode);
-static int ocfs_free_vol_block (ocfs_super * osb, ocfs_journal_handle *handle,
- ocfs_free_rec * FreeLog, __u32 NodeNum,
- __u32 Type);
-
static int ocfs_free_disk_bitmap (ocfs_super * osb, ocfs_free_rec *free_log);
static inline int ocfs_free_main_bitmap(ocfs_super *osb,
ocfs_journal_handle *handle,
- ocfs_free_rec *freelog);
+ struct inode *bitmap_inode,
+ struct buffer_head *bh,
+ ocfs_free_rec *freelog);
-static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh,
- struct inode *bm_inode,
- ocfs_journal_handle *handle);
-static int ocfs_sync_local_from_shutdown(ocfs_super *osb,
- ocfs_bitmap_free_head **f,
- struct buffer_head *local_alloc_bh,
- int in_recovery);
-static __u32 ocfs_alloc_count_bits(ocfs2_dinode *alloc);
-static void ocfs_clear_local_alloc(ocfs2_dinode *alloc);
-static int ocfs_find_space_from_local(ocfs_super *osb, __u32 bitswanted,
- u32 *bitoff, u32 *bitcount,
- ocfs_journal_handle *handle);
-static int ocfs_local_find_clear_bits(ocfs_super *osb,
- ocfs2_dinode *alloc,
- __u32 numbits);
static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
u64 blkno);
+static int ocfs_claim_main_bitmap_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 min_bits,
+ u32 *bit_off,
+ u32 *num_bits);
+
static int ocfs_extent_contig(struct inode *inode, ocfs2_extent_rec *ext,
u64 blkno)
{
@@ -154,29 +157,32 @@
int ocfs_add_to_bitmap_free_head(ocfs_super *osb,
ocfs_bitmap_free_head *f,
- __u32 len, __u32 fileoff,
- __u32 nodenum, __u32 type)
+ u32 len, u32 fileoff,
+ u32 nodenum, u64 blkno, u32 type)
{
int status = 0, n;
ocfs_free_rec *log;
ocfs_bitmap_update *fb;
- LOG_ENTRY_ARGS("(len = %u, fileoff = %u, nodenum = %u, "
- "type=%d (\"%s\")\n", len, fileoff, nodenum, type,
+ LOG_ENTRY_ARGS("(len = %u, fileoff = %u, nodenum = %u, blk = %llu"
+ "type=%d (\"%s\")\n", len, fileoff, nodenum,
+ blkno, type,
(type == DISK_ALLOC_VOLUME) ? "DISK_ALLOC_VOLUME" :
- ( (type == DISK_ALLOC_EXTENT_NODE) ?
- "DISK_ALLOC_EXTENT_NODE" : "DISK_ALLOC_INODE" ));
+ "DISK_ALLOC_EXTENT_NODE");
if (len == 0) {
printk("ocfs2: Zero length delete!\n");
printk("(len = %u, fileoff = %u, nodenum = %u, "
"type=%d (\"%s\")\n", len, fileoff, nodenum, type,
(type == DISK_ALLOC_VOLUME) ? "DISK_ALLOC_VOLUME" :
- ( (type == DISK_ALLOC_EXTENT_NODE) ?
- "DISK_ALLOC_EXTENT_NODE" : "DISK_ALLOC_INODE" ));
+ "DISK_ALLOC_EXTENT_NODE");
BUG();
}
+ /* right now we don't support this. */
+ if ((type == DISK_ALLOC_EXTENT_NODE) && (nodenum != 0))
+ BUG();
+
log = f->tail;
/* need a new one? */
@@ -202,6 +208,7 @@
fb->file_off = fileoff;
fb->type = type;
fb->node_num = nodenum;
+ fb->blkno = blkno;
log->num_updates++;
done:
@@ -209,6 +216,21 @@
return(status);
}
+static inline void ocfs_copy_update(ocfs_free_rec *rec,
+ ocfs_bitmap_update *fb2)
+{
+ int idx = rec->num_updates;
+ ocfs_bitmap_update *fb1 = &(rec->update[idx]);
+
+ fb1->length = fb2->length;
+ fb1->file_off = fb2->file_off;
+ fb1->type = fb2->type;
+ fb1->node_num = fb2->node_num;
+ fb1->blkno = fb2->blkno;
+ rec->num_updates++;
+ return;
+}
+
/*
* ocfs_free_disk_bitmap()
*
@@ -216,336 +238,191 @@
static int ocfs_free_disk_bitmap (ocfs_super * osb, ocfs_free_rec *free_log)
{
int status = 0;
- __u32 num_upd;
- __u32 i;
- __u32 node_num;
- ocfs_free_rec **ext_alloc_free = NULL;
- ocfs_free_rec **inode_alloc_free = NULL;
- ocfs_free_rec *free_vol_bits = NULL;
- ocfs_free_rec *tmp_log;
- struct inode **ext_alloc_inode = NULL;
- struct inode **inode_alloc_inode = NULL;
+ int i;
+ ocfs_free_rec *ext_alloc_free = NULL;
+ ocfs_free_rec *vol_alloc_free = NULL;
+ struct inode *ext_alloc_inode = NULL;
struct inode *vol_inode = NULL;
- __u32 tmp_indx;
- struct buffer_head *globalbh = NULL;
- struct buffer_head *tmpbh = NULL;
+ struct buffer_head *vol_alloc_bh = NULL;
+ struct buffer_head *ext_alloc_bh = NULL;
ocfs_journal_handle *handle = NULL;
- int credits = 33; /* one for each potential sysfile fe. This
- * goes away when ocfs_ugly_hack goes
- * away. */
+ int credits = 0; /* sysfile fe's. */
+ ocfs_bitmap_update *tmp;
LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, free_log);
-#define ALLOC_BLOCK(ptr, len, err) \
- do { \
- (ptr) = ocfs_malloc (len); \
- if (!(ptr)) { \
- LOG_ERROR_STATUS ((err) = -ENOMEM); \
- goto finally; \
- } \
- } while (0)
+ for (i = 0; i < free_log->num_updates; i++) {
+// OCFS_ASSERT(
+// (free_log->update[i].type == DISK_ALLOC_EXTENT_NODE)
+// || (free_log->update[i].type == DISK_ALLOC_VOLUME));
+ if ((free_log->update[i].type != DISK_ALLOC_EXTENT_NODE)
+ && (free_log->update[i].type != DISK_ALLOC_VOLUME)) {
+ printk("num_updates = %u, i=%d\n",
+ free_log->num_updates, i);
+ printk("length: %llu\n", free_log->update[i].length);
+ printk("file_off: %llu\n",
+ free_log->update[i].file_off);
+ printk("type: %u\n", free_log->update[i].type);
+ printk("node_num: %d\n", free_log->update[i].node_num);
+ printk("blkno: %llu\n", free_log->update[i].blkno);
+ BUG();
+ }
- ALLOC_BLOCK(inode_alloc_free,
- osb->max_nodes * sizeof (ocfs_free_rec *), status);
- ALLOC_BLOCK(inode_alloc_inode,
- osb->max_nodes * sizeof (struct inode *), status);
- ALLOC_BLOCK(ext_alloc_free,
- osb->max_nodes * sizeof (ocfs_free_rec *), status);
- ALLOC_BLOCK(ext_alloc_inode,
- osb->max_nodes * sizeof (struct inode *), status);
- /* init */
- for (i = 0; i < osb->max_nodes; i++) {
- ext_alloc_free[i] = NULL;
- ext_alloc_inode[i] = NULL;
- inode_alloc_free[i] = NULL;
- inode_alloc_inode[i] = NULL;
- }
-
- num_upd = free_log->num_updates;
- for (i = 0; i < num_upd; i++) {
- switch (free_log->update[i].type) {
- case DISK_ALLOC_INODE:
- node_num = free_log->update[i].node_num;
- if (inode_alloc_free[node_num] == NULL) {
- inode_alloc_free[node_num] =
+ if (free_log->update[i].type == DISK_ALLOC_EXTENT_NODE) {
+ if (!ext_alloc_free) {
+ ext_alloc_free =
+ ocfs_malloc(sizeof(ocfs_free_rec));
+ if (!ext_alloc_free) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto finally;
+ }
+ ext_alloc_free->num_updates = 0;
+ credits++; /* for the fe updates */
+ }
+ credits++;
+ ocfs_copy_update(ext_alloc_free,
+ &(free_log->update[i]));
+ } else {
+ if (vol_alloc_free == NULL) {
+ vol_alloc_free =
ocfs_malloc (sizeof (ocfs_free_rec));
- if (inode_alloc_free[node_num] == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto finally;
- }
- inode_alloc_free[node_num]->num_updates = 0;
- }
- tmp_log = inode_alloc_free[node_num];
-
- credits++;
- break;
-
- case DISK_ALLOC_EXTENT_NODE:
- node_num = free_log->update[i].node_num;
- if (ext_alloc_free[node_num] == NULL) {
- ext_alloc_free[node_num] =
- ocfs_malloc (sizeof (ocfs_free_rec));
- if (ext_alloc_free[node_num] == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto finally;
- }
- ext_alloc_free[node_num]->num_updates = 0;
- }
- tmp_log = ext_alloc_free[node_num];
-
- credits++;
- break;
-
- case DISK_ALLOC_VOLUME:
- if (free_vol_bits == NULL) {
- free_vol_bits =
- ocfs_malloc (sizeof (ocfs_free_rec));
- if (free_vol_bits == NULL) {
- LOG_ERROR_STATUS (status = -ENOMEM);
- goto finally;
- }
- free_vol_bits->num_updates = 0;
- }
- tmp_log = free_vol_bits;
-
- credits += ocfs_blocks_for_bits(osb->sb,
- free_log->update[i].length);
- break;
-
- default:
- tmp_log = NULL;
- break;
+ if (vol_alloc_free == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto finally;
+ }
+ vol_alloc_free->num_updates = 0;
+ credits++; /* for the fe updates */
+ }
+ credits += ocfs_blocks_for_bits(osb->sb,
+ free_log->update[i].length);
+ ocfs_copy_update(vol_alloc_free,
+ &(free_log->update[i]));
}
-
-
- if (tmp_log) {
- ocfs_bitmap_update *fb1, *fb2;
-
- tmp_indx = tmp_log->num_updates;
-
- fb1 = &(tmp_log->update[tmp_indx]);
- fb2 = &(free_log->update[i]);
-
- fb1->length = fb2->length;
- fb1->file_off = fb2->file_off;
- fb1->type = fb2->type;
- fb1->node_num = fb2->node_num;
-
- tmp_log->num_updates++;
- }
}
- /* start the transaction here to preserve ordering with the
- * bitmap io_sems... */
- handle = ocfs_start_trans(osb, NULL, credits);
+ handle = ocfs_alloc_handle(osb);
if (!handle) {
status = -ENOMEM;
LOG_ERROR_STATUS(status);
goto finally;
}
- /* Get all the locks we need. do global bitmap last to
- * preserve lock ordering with extend/create */
- for (i = 0; i < osb->max_nodes; i++) {
- if (inode_alloc_free[i] != NULL) {
- inode_alloc_inode[i] =
- ocfs_get_system_file_inode(osb, INODE_ALLOC_BITMAP_SYSTEM_INODE, i);
- if (!inode_alloc_inode[i]) {
- status = -EINVAL;
- LOG_ERROR_STATUS (status);
- goto abort;
- }
- ocfs_handle_add_inode(handle, inode_alloc_inode[i]);
-
- status = ocfs_acquire_lock (osb,
- OCFS_LKM_EXMODE,
- 0,
- &tmpbh,
- inode_alloc_inode[i]);
- if (tmpbh) {
- brelse(tmpbh);
- tmpbh = NULL;
- }
- if (status < 0) {
- iput(inode_alloc_inode[i]);
- inode_alloc_inode[i] = NULL;
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- goto abort;
- }
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- 0,
- inode_alloc_inode[i]);
+ if (ext_alloc_free) {
+ ext_alloc_inode =
+ ocfs_get_system_file_inode(osb,
+ EXTENT_ALLOC_SYSTEM_INODE,
+ 0);
+ if (!ext_alloc_inode) {
+ status = -EINVAL;
+ LOG_ERROR_STATUS (status);
+ goto finally;
}
- }
- for (i = 0; i < osb->max_nodes; i++) {
- if (ext_alloc_free[i] != NULL) {
- ext_alloc_inode[i] =
- ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, i);
- if (!ext_alloc_inode[i]) {
- status = -EINVAL;
+ status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0,
+ &ext_alloc_bh, ext_alloc_inode);
+ if (status < 0) {
+ if (status != -EINTR)
LOG_ERROR_STATUS (status);
- goto abort;
- }
- ocfs_handle_add_inode(handle, ext_alloc_inode[i]);
-
- status = ocfs_acquire_lock (osb,
- OCFS_LKM_EXMODE,
- 0,
- &tmpbh,
- ext_alloc_inode[i]);
- if (tmpbh) {
- brelse(tmpbh);
- tmpbh = NULL;
- }
- if (status < 0) {
- iput(ext_alloc_inode[i]);
- ext_alloc_inode[i] = NULL;
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- goto abort;
- }
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- 0,
- ext_alloc_inode[i]);
+ goto finally;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
+ ext_alloc_inode);
+ ocfs_handle_add_inode(handle, ext_alloc_inode);
}
- if (free_vol_bits != NULL) {
- vol_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
+ if (vol_alloc_free) {
+ vol_inode =
+ ocfs_get_system_file_inode(osb,
+ GLOBAL_BITMAP_SYSTEM_INODE,
+ -1);
if (!vol_inode) {
status = -EINVAL;
LOG_ERROR_STATUS (status);
- goto abort;
+ goto finally;
}
- ocfs_handle_add_inode(handle, vol_inode);
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- 0,
- &globalbh, vol_inode);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &vol_alloc_bh, vol_inode);
if (status < 0) {
iput(vol_inode);
vol_inode = NULL;
if (status != -EINTR)
LOG_ERROR_STATUS (status);
- goto abort;
+ goto finally;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
0, vol_inode);
+ ocfs_handle_add_inode(handle, vol_inode);
}
- /* free vol block */
- if (free_vol_bits != NULL)
- ocfs_free_vol_block(osb, handle, free_vol_bits, -1,
- DISK_ALLOC_VOLUME);
-
- for (i = 0; i < osb->max_nodes; i++) {
- if (inode_alloc_free[i] != NULL)
- ocfs_free_vol_block(osb, handle,
- inode_alloc_free[i], i,
- DISK_ALLOC_INODE);
- if (ext_alloc_free[i] != NULL)
- ocfs_free_vol_block(osb, handle,
- ext_alloc_free[i], i,
- DISK_ALLOC_EXTENT_NODE);
+ handle = ocfs_start_trans(osb, handle, credits);
+ if (!handle) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto finally;
}
+ ocfs_handle_set_always_commits(handle, 1);
- if (free_vol_bits) {
- ocfs2_dinode *bm_lock;
+ if (vol_alloc_free)
+ ocfs_free_main_bitmap(osb, handle, vol_inode,
+ vol_alloc_bh, vol_alloc_free);
- status = ocfs_journal_access(handle, globalbh,
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto abort;
+ if (ext_alloc_free)
+ for (i = 0; i < ext_alloc_free->num_updates; i++) {
+ tmp = &(ext_alloc_free->update[i]);
+ status = ocfs_free_suballoc_bits(osb,
+ handle,
+ ext_alloc_inode,
+ ext_alloc_bh,
+ (unsigned int)
+ tmp->file_off,
+ tmp->blkno,
+ 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
}
- bm_lock = (ocfs2_dinode *) globalbh->b_data;
- bm_lock->id1.bitmap1.i_used =
- ocfs_count_bits(osb->sb, &osb->cluster_bitmap);
-
- status = ocfs_journal_dirty(handle, globalbh);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto abort;
- }
- }
-
- ocfs_commit_trans(handle);
-
- handle = NULL;
-
-abort:
+finally:
if (handle)
- ocfs_abort_trans(handle);
+ ocfs_commit_trans(handle);
-finally:
- if (inode_alloc_inode) {
- for (i = 0; i < osb->max_nodes; i++) {
- if (inode_alloc_inode[i])
- iput(inode_alloc_inode[i]);
- }
- kfree(inode_alloc_inode);
- }
-
- if (ext_alloc_inode) {
- for (i = 0; i < osb->max_nodes; i++) {
- if (ext_alloc_inode[i])
- iput(ext_alloc_inode[i]);
- }
- kfree(ext_alloc_inode);
- }
-
+ if (ext_alloc_inode)
+ iput(ext_alloc_inode);
if (vol_inode)
iput(vol_inode);
- if (globalbh)
- brelse(globalbh);
+ if (vol_alloc_bh)
+ brelse(vol_alloc_bh);
+ if (ext_alloc_bh)
+ brelse(ext_alloc_bh);
- if (ext_alloc_free) {
- for (i = 0; i < osb->max_nodes; i++) {
- if (ext_alloc_free[i])
- kfree(ext_alloc_free[i]);
- }
+ if (ext_alloc_free)
kfree(ext_alloc_free);
- }
+ if (vol_alloc_free)
+ kfree(vol_alloc_free);
- if (inode_alloc_free) {
- for (i = 0; i < osb->max_nodes; i++) {
- if (inode_alloc_free[i])
- kfree(inode_alloc_free[i]);
- }
- kfree(inode_alloc_free);
- }
-
- if (free_vol_bits)
- kfree(free_vol_bits);
-
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_free_disk_bitmap */
static inline int ocfs_free_main_bitmap(ocfs_super *osb,
ocfs_journal_handle *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bh,
ocfs_free_rec *freelog)
{
int i;
ocfs_alloc_bm *bitmap;
int status;
__u32 bitmapblocks; /* we only care about the valid blocks */
- struct inode *bitmap_inode = NULL;
+ ocfs2_dinode *bm_lock;
LOG_ENTRY();
- bitmap_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
- if (!bitmap_inode) {
- LOG_ERROR_STATUS (status = -EINVAL);
- goto bail;
- }
-
bitmap = &osb->cluster_bitmap;
bitmapblocks = ocfs_blocks_for_bits(osb->sb, bitmap->validbits);
@@ -558,120 +435,72 @@
goto bail;
}
- for (i = 0; i < freelog->num_updates; i++)
+ status = ocfs_journal_access(handle, bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ bm_lock = (ocfs2_dinode *) bh->b_data;
+
+ for (i = 0; i < freelog->num_updates; i++) {
ocfs_clear_bits(osb->sb, handle, bitmap,
freelog->update[i].file_off,
freelog->update[i].length);
+ bm_lock->id1.bitmap1.i_used -= freelog->update[i].length;
+ }
+ status = ocfs_journal_dirty(handle, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
status = 0;
bail:
- if (bitmap_inode)
- iput(bitmap_inode);
LOG_EXIT_STATUS(status);
return(0);
}
/*
- * ocfs_free_vol_block()
- *
+ * How many free extents have we got before we need more meta data?
*/
-static int ocfs_free_vol_block(ocfs_super *osb,
- ocfs_journal_handle *handle,
- ocfs_free_rec *FreeLog, __u32 NodeNum,
- __u32 Type)
+int ocfs_num_free_extents(ocfs_super *osb,
+ struct inode *inode,
+ ocfs2_dinode *fe)
{
- int status = 0;
- __u64 fileSize = 0;
- __u64 allocSize = 0;
- __u32 foundBit = -1;
- __u32 blockSize = 0, blockSizeBits = 0;
- int file_type;
- __u32 bitmapblocks = 0;
- ocfs_alloc_bm AllocBitmap;
- ocfs_alloc_bm *tmpbitmap = NULL;
- __u32 i;
- struct inode *inode = NULL;
+ int retval;
+ ocfs2_extent_list *el;
+ ocfs2_extent_block *eb;
+ struct buffer_head *eb_bh = NULL;
- LOG_ENTRY ();
+ LOG_ENTRY();
- LOG_TRACE_ARGS("Free Log Details (type = %d):\n", Type);
- LOG_TRACE_ARGS("num_updates = %u\n", FreeLog->num_updates);
- for(i = 0; i < FreeLog->num_updates; i++)
- LOG_TRACE_ARGS("(upd=%u, length=%llu, file_off=%llu, type=%d, node_num=%d)\n",
- i, FreeLog->update[i].length, FreeLog->update[i].file_off,
- FreeLog->update[i].type, FreeLog->update[i].node_num);
+ OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
- switch (Type) {
- case DISK_ALLOC_EXTENT_NODE:
- file_type = EXTENT_ALLOC_BITMAP_SYSTEM_INODE;
- blockSize = osb->sb->s_blocksize;
- blockSizeBits = osb->sb->s_blocksize_bits;
- break;
+ if (fe->i_last_eb_blk) {
+ retval = ocfs_read_bh(osb, fe->i_last_eb_blk << osb->sb->s_blocksize_bits, &eb_bh, OCFS_BH_CACHED, inode);
+ if (retval < 0) {
+ LOG_ERROR_STATUS(retval);
+ goto bail;
+ }
+ eb = (ocfs2_extent_block *) eb_bh->b_data;
+ el = &(eb->h_list);
+ } else
+ el = &(fe->id2.i_list);
- case DISK_ALLOC_INODE:
- file_type = INODE_ALLOC_BITMAP_SYSTEM_INODE;
- blockSize = osb->sb->s_blocksize;
- blockSizeBits = osb->sb->s_blocksize_bits;
- break;
+ OCFS_ASSERT(el->l_tree_depth == 0);
- case DISK_ALLOC_VOLUME:
- status = ocfs_free_main_bitmap(osb, handle, FreeLog);
- if (status < 0)
- LOG_ERROR_STATUS (status);
- goto leave;
- default:
- goto leave;
- }
+ retval = el->l_count - el->l_next_free_rec;
- if (NodeNum >= osb->max_nodes) {
- LOG_ERROR_STATUS(status = -EINVAL);
- goto leave;
- }
+bail:
+ if (eb_bh)
+ brelse(eb_bh);
- inode = ocfs_get_system_file_inode(osb, file_type, NodeNum);
- if (!inode) {
- LOG_ERROR_STATUS (status = -EINVAL);
- goto leave;
- }
- fileSize = inode->i_size;
- allocSize = OCFS_I(inode)->ip_alloc_size;
+ LOG_EXIT_STATUS(retval);
+ return(retval);
+}
- ocfs_initialize_bitmap(osb->sb, &AllocBitmap, fileSize * 8,
- allocSize * 8);
-
- tmpbitmap = &AllocBitmap;
- bitmapblocks = ocfs_blocks_for_bits(osb->sb,
- tmpbitmap->validbits);
-
- status = ocfs_read_system_file(osb, file_type, NodeNum,
- AllocBitmap.chunk,
- bitmapblocks << osb->sb->s_blocksize_bits);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- for (i = 0; i < FreeLog->num_updates; i++) {
- if (FreeLog->update[i].file_off == 0 && Type == 0) {
- LOG_ERROR_ARGS ("offset=0, type=%x, blksz=%d", Type,
- blockSize);
- }
-
- foundBit = (__u32) (FreeLog->update[i].file_off >> blockSizeBits);
- ocfs_clear_bits(osb->sb, handle, tmpbitmap, foundBit,
- (__u32) FreeLog->update[i].length);
- }
-
-leave:
- if (tmpbitmap)
- ocfs_uninitialize_bitmap(tmpbitmap);
- if (inode)
- iput(inode);
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_free_vol_block */
-
-
/* ocfs_allocate_new_data_node()
*
*/
@@ -682,26 +511,24 @@
struct buffer_head *eb_bh,
u64 *new_eb_blkno,
ocfs_journal_handle *handle,
- struct inode *inode)
+ struct inode *inode,
+ ocfs2_alloc_context *meta_ac)
{
int status = 0;
__u32 k, i;
__u32 depth;
- int allocSize;
u64 parent_blk;
- __u64 physicalOffset;
- u64 phys_blkno;
- __u64 fileOffset = 0;
int new_blocks = 0;
ocfs2_extent_block *eb = NULL;
ocfs2_extent_list *el1, *el2 = NULL;
struct buffer_head **eb_bhs = NULL;
struct buffer_head *bh = NULL;
- int bh_locked = 0;
int size;
LOG_ENTRY ();
-
+
+ OCFS_ASSERT(meta_ac);
+
if (eb_bh) {
status = ocfs_journal_access(handle, eb_bh,
OCFS_JOURNAL_ACCESS_WRITE);
@@ -713,7 +540,6 @@
eb = (ocfs2_extent_block *) eb_bh->b_data;
el1 = &eb->h_list;
- bh_locked = 1;
}
else
el1 = &fe->id2.i_list;
@@ -726,18 +552,7 @@
parent_blk = fe->i_blkno;
new_blocks = depth;
- allocSize = new_blocks << osb->sb->s_blocksize_bits;
- /* allocate contiguous blocks on disk */
- status = ocfs_alloc_node_block(osb, allocSize, &physicalOffset,
- &fileOffset, osb->node_num,
- DISK_ALLOC_EXTENT_NODE, handle);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- phys_blkno = physicalOffset >> osb->sb->s_blocksize_bits;
-
size = sizeof(struct buffer_head *) * new_blocks;
eb_bhs = kmalloc(size, GFP_KERNEL);
if (eb_bhs == NULL) {
@@ -747,57 +562,39 @@
}
memset(eb_bhs, 0, size);
- status = ocfs_read_bhs(osb,
- phys_blkno << osb->sb->s_blocksize_bits,
- (u64)new_blocks << osb->sb->s_blocksize_bits,
- eb_bhs, OCFS_BH_CACHED, inode);
+ status = ocfs_create_new_meta_bhs(osb, handle, inode, new_blocks, meta_ac, eb_bhs);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- /* zero them all out */
- for(i = 0; i < new_blocks; i++) {
- status = ocfs_journal_access(handle, eb_bhs[i],
- OCFS_JOURNAL_ACCESS_CREATE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto finally;
- }
-
- memset(eb_bhs[i]->b_data, 0, osb->sb->s_blocksize);
- set_buffer_uptodate(eb_bhs[i]);
- }
-
k = el1->l_next_free_rec;
el1->l_recs[k].e_cpos = fe->i_clusters;
el1->l_recs[k].e_clusters = new_clusters;
- el1->l_recs[k].e_blkno = phys_blkno;
+ el1->l_recs[k].e_blkno =
+ ((ocfs2_extent_block *) eb_bhs[0]->b_data)->h_blkno;
el1->l_next_free_rec++;
+ OCFS_ASSERT(el1->l_next_free_rec <= el1->l_count);
+
/* Fill in all the headers and the leaf */
for (i = 0; i < depth; i++) {
- ocfs2_extent_block *eb;
+ ocfs2_extent_block *eb, *tmpeb;
eb = (ocfs2_extent_block *) eb_bhs[i]->b_data;
eb->h_parent_blk = parent_blk;
- eb->h_suballoc_blkno =
- (fileOffset >> osb->sb->s_blocksize_bits) + i;
- eb->h_suballoc_node = osb->node_num;
- eb->h_blkno = phys_blkno + i;
- strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
el2 = &eb->h_list;
- el2->l_count = ocfs2_extent_recs_per_eb(osb->sb);
el2->l_next_free_rec = 1;
el2->l_recs[0].e_cpos = fe->i_clusters;
el2->l_recs[0].e_clusters = new_clusters;
el2->l_tree_depth = (depth - (i + 1));
if (el2->l_tree_depth) {
+ tmpeb = (ocfs2_extent_block *) eb_bhs[i+1]->b_data;
/* fill in each header */
- el2->l_recs[0].e_blkno = phys_blkno + (i + 1);
+ el2->l_recs[0].e_blkno = tmpeb->h_blkno;
} else {
/* fill in the leaf */
el2->l_recs[0].e_blkno = new_blkno;
@@ -821,7 +618,6 @@
u64 tmp_blk = eb->h_parent_blk;
int tree_depth = el1->l_tree_depth;
- bh_locked = 0;
eb = NULL;
el1 = &fe->id2.i_list;
@@ -894,6 +690,100 @@
return status;
} /* ocfs_allocate_new_data_node */
+/* expects array to already be malloced
+ *
+ * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_node, and
+ * l_count for you
+ */
+static int ocfs_create_new_meta_bhs(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *inode,
+ int wanted,
+ ocfs2_alloc_context *meta_ac,
+ struct buffer_head *bhs[])
+{
+ int count, status, i;
+ u16 suballoc_bit_start;
+ u32 num_got;
+ u64 first_blkno;
+ ocfs2_extent_block *eb;
+
+ LOG_ENTRY();
+
+ count = 0;
+ while (count < wanted) {
+ status = ocfs_claim_metadata(osb,
+ handle,
+ meta_ac,
+ wanted - count,
+ &suballoc_bit_start,
+ &num_got,
+ &first_blkno);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ for(i = count; i < (num_got + count); i++) {
+ bhs[i] = sb_getblk(osb->sb, first_blkno);
+ if (bhs[i] == NULL) {
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ set_buffer_uptodate(bhs[i]);
+ SET_BH_SEQNUM(inode, bhs[i]);
+
+ status = ocfs_journal_access(handle, bhs[i],
+ OCFS_JOURNAL_ACCESS_CREATE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
+ eb = (ocfs2_extent_block *) bhs[i]->b_data;
+ /* Ok, setup the minimal stuff here. */
+ strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+ eb->h_blkno = first_blkno;
+
+#ifndef OCFS_USE_ALL_METADATA_SUBALLOCATORS
+ /* we always use node zeros suballocator */
+ eb->h_suballoc_node = 0;
+#else
+ eb->h_suballoc_node = osb->node_num;
+#endif
+ eb->h_suballoc_bit = suballoc_bit_start;
+ eb->h_list.l_count = ocfs2_extent_recs_per_eb(osb->sb);
+
+ suballoc_bit_start++;
+ first_blkno++;
+
+ /* We'll also be dirtied by the caller, so
+ * this isn't absolutely necessary. */
+ status = ocfs_journal_dirty(handle, bhs[i]);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+
+ count += num_got;
+ }
+
+ status = 0;
+bail:
+ if (status < 0) {
+ for(i = 0; i < wanted; i++) {
+ if (bhs[i])
+ brelse(bhs[i]);
+ bhs[i] = NULL;
+ }
+ }
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
/* ocfs_grow_extent_tree()
*
*/
@@ -901,16 +791,15 @@
struct buffer_head *fe_bh,
ocfs_journal_handle *handle,
u64 blkno, u32 new_clusters,
- struct inode *inode)
+ struct inode *inode,
+ ocfs2_alloc_context *meta_ac)
{
int status = 0;
__s32 k, i;
ocfs2_extent_block *eb1 = NULL;
ocfs2_extent_block *eb2 = NULL;
ocfs2_extent_list *ebl, *fel;
- __u64 physicalOffset;
- __u64 fileOffset = 0;
- u64 phys_blkno, parent_blk, last_eb_blkno;
+ u64 parent_blk, last_eb_blkno;
u64 new_parent_blk = 0;
struct buffer_head **bhs = NULL;
int numbhs = 0;
@@ -919,6 +808,8 @@
LOG_ENTRY_ARGS("(0x%p, 0x%p, %llu, %u\n", osb, fe, blkno,
new_clusters);
+ OCFS_ASSERT(meta_ac);
+
fe = (ocfs2_dinode *) fe_bh->b_data;
fel = &fe->id2.i_list;
@@ -932,35 +823,12 @@
}
memset(bhs, 0, numbhs * sizeof(*bhs));
- /* Allocate the space from the Extent file. This function should */
- /* return contigous disk blocks requested. */
- status = ocfs_alloc_node_block(osb,
- numbhs << osb->sb->s_blocksize_bits,
- &physicalOffset, &fileOffset,
- osb->node_num,
- DISK_ALLOC_EXTENT_NODE, handle);
+ status = ocfs_create_new_meta_bhs(osb, handle, inode, numbhs, meta_ac, bhs);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto finally;
}
- phys_blkno = physicalOffset >> osb->sb->s_blocksize_bits;
- for (i = 0; i < numbhs; i++) {
- bhs[i] = sb_getblk(osb->sb, phys_blkno + i);
- if (bhs[i] == NULL) {
- status = -EIO;
- LOG_ERROR_STATUS(status);
- goto finally;
- }
- memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
- set_buffer_uptodate(bhs[i]);
- }
-
- if (phys_blkno == 0) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto finally;
- }
-
eb1 = (ocfs2_extent_block *) bhs[0]->b_data;
/* Copy the File Entry information in to the newly allocated sector */
ebl = &eb1->h_list;
@@ -971,56 +839,47 @@
}
last_eb_blkno = fe->i_last_eb_blk;
- eb1->h_blkno =
- physicalOffset >> osb->sb->s_blocksize_bits;
new_parent_blk = eb1->h_blkno;
- eb1->h_suballoc_blkno =
- fileOffset >> osb->sb->s_blocksize_bits;
- eb1->h_suballoc_node = osb->node_num;
eb1->h_next_leaf_blk = 0;
fel->l_tree_depth++;
LOG_TRACE_ARGS ("Tree depth is: %d\n", fel->l_tree_depth);
- /* If tree_depth is one now, the for loop will not execute. */
- /* First time a file is created, tree_depth = 0 */
-
parent_blk = fe->i_blkno;
+ /* If tree_depth is one now, the for loop will not execute. *
+ * First time a file is created, tree_depth = 0 */
for (i = 0; i < (fel->l_tree_depth - 1); i++) {
+ ocfs2_extent_block *tmpeb;
+
eb2 = (ocfs2_extent_block *) bhs[i]->b_data;
ebl = &eb2->h_list;
ebl->l_tree_depth = (fel->l_tree_depth - 1) - i;
ebl->l_count = ocfs2_extent_recs_per_eb(osb->sb);
- strcpy(eb2->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
-
if (i == 0) {
- ebl->l_recs[fel->l_count].e_blkno =
- phys_blkno + 1;
+ tmpeb = (ocfs2_extent_block *) bhs[1]->b_data;
+ ebl->l_recs[fel->l_count].e_blkno =
+ tmpeb->h_blkno;
ebl->l_recs[fel->l_count].e_cpos =
fe->i_clusters;
ebl->l_recs[fel->l_count].e_clusters =
new_clusters;
ebl->l_next_free_rec = fel->l_count + 1;
- eb2->h_blkno =
- physicalOffset >> osb->sb->s_blocksize_bits;
+ OCFS_ASSERT(ebl->l_next_free_rec <= ebl->l_count);
+
eb2->h_parent_blk = parent_blk;
parent_blk = last_eb_blkno = eb2->h_blkno;
} else {
- ebl->l_recs[0].e_blkno = phys_blkno + (i + 1);
+ tmpeb = (ocfs2_extent_block *) bhs[i + 1]->b_data;
+ ebl->l_recs[0].e_blkno = tmpeb->h_blkno;
ebl->l_recs[0].e_cpos = fe->i_clusters;
ebl->l_recs[0].e_clusters = new_clusters;
ebl->l_next_free_rec = 1;
- eb2->h_suballoc_blkno =
- (fileOffset >> osb->sb->s_blocksize_bits) + i;
- eb2->h_suballoc_node = osb->node_num;
- eb2->h_blkno =
- (physicalOffset >> osb->sb->s_blocksize_bits) + i;
eb2->h_parent_blk = parent_blk;
parent_blk = last_eb_blkno = eb2->h_blkno;
@@ -1038,7 +897,6 @@
/* For the time being we are assuming that the newly allocated Extent */
/* will have one more entry to accomodate the latest allocation */
- strcpy(eb1->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
ebl->l_tree_depth = 0;
ebl->l_count = ocfs2_extent_recs_per_eb(osb->sb);
@@ -1046,24 +904,19 @@
ebl->l_recs[i].e_clusters = new_clusters;
ebl->l_recs[i].e_blkno = blkno;
ebl->l_next_free_rec = i + 1;
+ OCFS_ASSERT(ebl->l_next_free_rec <= ebl->l_count);
- eb1->h_suballoc_blkno =
- (fileOffset >> osb->sb->s_blocksize_bits) +
- numbhs - 1;
- eb1->h_suballoc_node = osb->node_num;
- eb1->h_blkno =
- (physicalOffset >> osb->sb->s_blocksize_bits) +
- numbhs - 1;
eb1->h_parent_blk = parent_blk;
eb1->h_next_leaf_blk = 0;
- parent_blk = last_eb_blkno = eb1->h_blkno;
+ last_eb_blkno = eb1->h_blkno;
- /* This needs to be a sync write OR journalled to be safe. */
- status = ocfs_write_bhs(osb, bhs, numbhs, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
+ for(i = 0; i < numbhs; i++) {
+ status = ocfs_journal_dirty(handle, bhs[i]);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto finally;
+ }
}
/* Update the Previous Last Data Extent with this new Data
@@ -1151,7 +1004,8 @@
LOG_TRACE_ARGS("fe->i_clusters = %u\n", fe->i_clusters);
fel->l_recs[0].e_cpos = 0; /* FIXME: not needed */
fel->l_recs[0].e_clusters = fe->i_clusters + new_clusters;
- fel->l_recs[0].e_blkno = phys_blkno;
+ eb1 = (ocfs2_extent_block *) bhs[0]->b_data;
+ fel->l_recs[0].e_blkno = eb1->h_blkno;
fel->l_next_free_rec = 1;
fe->i_last_eb_blk = last_eb_blkno;
@@ -1169,7 +1023,8 @@
int ocfs_allocate_extent(ocfs_super *osb, struct buffer_head *fe_bh,
ocfs_journal_handle *handle,
u64 blkno, u32 new_clusters,
- struct inode *inode)
+ struct inode *inode,
+ ocfs2_alloc_context *meta_ac)
{
int status = 0;
int IncreaseTreeDepth = 0;
@@ -1295,6 +1150,7 @@
el1->l_recs[k].e_clusters = new_clusters;
el1->l_recs[k].e_blkno = blkno;
el1->l_next_free_rec++;
+ OCFS_ASSERT(el1->l_next_free_rec <= el1->l_count);
UpdateParent = 1;
} else {
/* Read the last extent and keep traversing
@@ -1307,13 +1163,6 @@
parent_blk = 0;
for (i = 1; i < fel->l_tree_depth; i++) {
- /* if we loop back around */
- if (eb2) {
- brelse(eb2_bh);
- eb2 = NULL;
- el2 = NULL;
- eb2_bh =NULL;
- }
status = ocfs_read_bh(osb,
parent_blk << osb->sb->s_blocksize_bits,
&eb2_bh,
@@ -1341,8 +1190,12 @@
break;
parent_blk = eb2->h_parent_blk;
+ brelse(eb2_bh);
+ eb2 = NULL;
+ el2 = NULL;
+ eb2_bh = NULL;
} /* for (i = 1; i < fe->i_tree_depth; i++) */
-
+
if (eb2) {
eb2 = NULL;
el2 = NULL;
@@ -1357,6 +1210,7 @@
IncreaseTreeDepth = 1;
goto increase_depth;
}
+
/* ok, we need to add a branch. pass in NULL
* if we need a whole branch, otherwise the
* extent which needs the new leaf */
@@ -1364,7 +1218,7 @@
new_clusters,
eb2_bh,
&new_eb_blkno,
- handle, inode);
+ handle, inode, meta_ac);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -1478,7 +1332,7 @@
status = ocfs_grow_extent_tree(osb, fe_bh, handle,
blkno, new_clusters,
- inode);
+ inode, meta_ac);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto finally;
@@ -1588,7 +1442,7 @@
}
status = ocfs_handle_add_commit_bits(handle,
numBitsAllocated,
- bitmapOffset, -1,
+ bitmapOffset, -1, 0,
DISK_ALLOC_VOLUME);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -1663,7 +1517,7 @@
num_clusters = ext->e_clusters;
bitmap_offset =
(u32)((ext->e_blkno << osb->sb->s_blocksize_bits) >> osb->s_clustersize_bits);
- status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, DISK_ALLOC_VOLUME);
+ status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, 0, DISK_ALLOC_VOLUME);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -1719,8 +1573,9 @@
free_meta:
/* Free the metadata associated with this extent group */
status = ocfs_handle_add_commit_bits(handle, 1,
- cur_eb->h_suballoc_blkno << osb->sb->s_blocksize_bits,
+ cur_eb->h_suballoc_bit,
cur_eb->h_suballoc_node,
+ cur_eb->h_blkno,
DISK_ALLOC_EXTENT_NODE);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -2084,7 +1939,7 @@
done = 1;
goto skip_bitmap_add;
}
- status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, DISK_ALLOC_VOLUME);
+ status = ocfs_handle_add_commit_bits(handle, num_clusters, bitmap_offset, -1, 0, DISK_ALLOC_VOLUME);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -2159,7 +2014,7 @@
LOG_TRACE_ARGS("Popping this header (%llu)\n",
alloc_eb->h_blkno);
- status = ocfs_handle_add_commit_bits(handle, 1, alloc_eb->h_suballoc_blkno << osb->sb->s_blocksize_bits, alloc_eb->h_suballoc_node, DISK_ALLOC_EXTENT_NODE);
+ status = ocfs_handle_add_commit_bits(handle, 1, alloc_eb->h_suballoc_bit, alloc_eb->h_suballoc_node, alloc_eb->h_blkno, DISK_ALLOC_EXTENT_NODE);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
@@ -2862,466 +2717,283 @@
return (status);
} /* ocfs_get_leaf_extent */
+void ocfs_free_alloc_context(ocfs2_alloc_context *ac)
+{
+ if (ac->ac_inode)
+ iput(ac->ac_inode);
+ if (ac->ac_bh)
+ brelse(ac->ac_bh);
+ kfree(ac);
+}
+
/*
- * ocfs_find_contiguous_space_from_bitmap()
- *
- * This function looks for free space in the volume based on the bitmap.
- * It looks for contiguous space only and if it finds the space available
- * it returns a cluster bitmap offset. Each bit in Cluster bitmap represents
- * memory equal to cluster size (specified during format).
- *
- * TODO: The Bitmap stuff needs to be changed for handling more than 32 bits...
- * Although we can go upto 4k(clustersize) * 8 * 4M(max 32 bits for now...)
- *
- * Returns 0 on success, < 0 on error.
- *
- * Pass in 'lock_bh' and bitmap_inode only if you've already taken the
- * vol_alloc semaphore, and you've done the acquire_lock on the bitmap.
+ * min_bits - minimum contiguous chunk from this total allocation we
+ * can handle. set to what we asked for originally for a full
+ * contig. allocation, set to '1' to indicate we can deal with extents
+ * of any size.
*/
-static int ocfs_find_contiguous_space_from_bitmap(ocfs_super *osb,
- ocfs_journal_handle *handle,
- __u64 file_size,
- u32 *cluster_off,
- u32 *cluster_count,
- int sysfile,
- struct buffer_head *lock_bh,
- struct inode *bitmap_inode)
+int ocfs_claim_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 min_bits,
+ u32 *bit_off,
+ u32 *num_bits)
{
- int status = 0, startbh, numblocks;
- u32 bitoffset = 0, ClusterCount = 0;
- __u64 ByteCount = 0;
- __u32 LargeAlloc = 0;
- static __u32 LargeAllocOffset = 0;
- static __u32 SmallAllocOffset = 0;
- struct buffer_head *bh = NULL;
- ocfs2_dinode *bm_lock;
- __u32 bitmapblocks; /* we only care about the valid blocks */
- int local_lock = 0;
- int local_inode = 0;
- __u32 five_percent, free_bits;
+ int status;
- LOG_ENTRY ();
+ LOG_ENTRY();
- OCFS_ASSERT (osb);
+ OCFS_ASSERT(ac);
+ OCFS_ASSERT(ac->ac_bits_given < ac->ac_bits_wanted);
- if ((bitmap_inode && !lock_bh) || (lock_bh && !bitmap_inode))
- BUG();
+ OCFS_ASSERT(ac->ac_which == OCFS_AC_USE_LOCAL
+ || ac->ac_which == OCFS_AC_USE_MAIN);
+ OCFS_ASSERT(ac->ac_handle == handle);
- if (!bitmap_inode) {
- bitmap_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
- if (!bitmap_inode) {
- status = -EINVAL;
- LOG_ERROR_STR("Could not get bitmap inode!");
- goto leave;
- }
- local_inode = 1;
+ if (ac->ac_which == OCFS_AC_USE_LOCAL) {
+ status = ocfs_claim_local_alloc_bits(osb,
+ handle,
+ ac,
+ min_bits,
+ bit_off,
+ num_bits);
+ } else {
+ status = ocfs_claim_main_bitmap_bits(osb,
+ handle,
+ ac,
+ min_bits,
+ bit_off,
+ num_bits);
}
-
- if (lock_bh) {
- bh = lock_bh;
- } else { /* local lock */
- local_lock = 1;
-
- ocfs_handle_add_inode(handle, bitmap_inode);
-
- /* Get the allocation lock here */
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- 0, &bh, bitmap_inode);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- 0, bitmap_inode);
- }
-
- status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
+ /* If you're not asking for contiguous blocks, then
+ * the reserve function should've given you enough,
+ * and ENOSPC this is a real error. */
+ if ((status != -ENOSPC) || (min_bits == 1))
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
- bm_lock = (ocfs2_dinode *) bh->b_data;
+ ac->ac_bits_given += *num_bits;
- ClusterCount = (u32) ((u64) (file_size + (osb->s_clustersize-1)) >>
- osb->s_clustersize_bits);
- if (ClusterCount == 0) {
- LOG_ERROR_STR ("DISK_FULL?: ClusterCount==0");
- status = 0;
- goto leave;
- }
- ByteCount = (u64)ClusterCount << osb->s_clustersize_bits;
- if (ByteCount == 0) {
- LOG_ERROR_STR ("DISK_FULL?: Bytecount==0");
- status = 0;
- goto leave;
- }
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
- free_bits = le32_to_cpu(bm_lock->id1.bitmap1.i_total) -
- le32_to_cpu(bm_lock->id1.bitmap1.i_used);
- if ( (sysfile && ClusterCount > free_bits) ||
- (!sysfile && ClusterCount >
- (free_bits - ((8 * ONE_MEGA_BYTE) >> osb->s_clustersize_bits))) ){
- LOG_ERROR_ARGS("Disk Full: ClusterCount=%u, free_bits=%u, sysfile=%s\n",
- ClusterCount, free_bits, sysfile?"yes":"no");
- status = -ENOSPC;
- goto leave;
- }
-
- /* This function will check for clear bits in the Bitmap for
- * consecutive */
- /* clear bits equal to ClusterCount */
+static int ocfs_claim_main_bitmap_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 min_bits,
+ u32 *bit_off,
+ u32 *num_bits)
+{
+ int status, num_blocks;
+ u32 bits_wanted;
+ u32 best_fit_bits;
+ ocfs2_dinode *fe;
+ struct inode *bitmap_inode;
+ struct buffer_head *bh;
- /* If we create a chunk that is larger than 5% of the
- * disksize, then start */
- /* allocation at 5%, so that small files stay in the beginning
- * as much as possible */
+ LOG_ENTRY();
+ OCFS_ASSERT(ac->ac_which == OCFS_AC_USE_MAIN);
- five_percent = le32_to_cpu(bm_lock->id1.bitmap1.i_total) / 20;
- if (ClusterCount > five_percent) {
- LargeAlloc = 1;
- LargeAllocOffset = five_percent;
- }
+ bitmap_inode = ac->ac_inode;
+ bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+ bh = ac->ac_bh;
+ fe = (ocfs2_dinode *) bh->b_data;
- bitmapblocks =
- ocfs_blocks_for_bits(osb->sb,
- osb->cluster_bitmap.validbits);
-
+ num_blocks = ocfs_blocks_for_bits(osb->sb,
+ osb->cluster_bitmap.validbits);
/* Ok, somewhat lame, but we submit the whole bitmap for reading here*/
- if (ocfs_read_bhs(osb, osb->bitmap_blkno << osb->sb->s_blocksize_bits,
- bitmapblocks << osb->sb->s_blocksize_bits,
- osb->cluster_bitmap.chunk, OCFS_BH_CACHED, bitmap_inode)) {
- LOG_ERROR_STATUS(-EIO);
- goto leave;
+ status = ocfs_read_bhs(osb, osb->bitmap_blkno << osb->sb->s_blocksize_bits, num_blocks << osb->sb->s_blocksize_bits, osb->cluster_bitmap.chunk, OCFS_BH_CACHED, bitmap_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
-
- bitoffset = ocfs_find_clear_bits (osb, &osb->cluster_bitmap,
- ClusterCount,
- LargeAlloc ? LargeAllocOffset :
- SmallAllocOffset, sysfile ? 0 :
- ((8 * ONE_MEGA_BYTE) >>
- osb->s_clustersize_bits));
- /* if fails we should try again from the beginning of the disk. */
- /* in the end we pass # of bits we want to keep for system
- * file extention only. */
- /* Right now if we run out of diskspace, we still have 8mb
- * free for a systemfile */
-
- if (bitoffset == -1 && LargeAlloc) {
- LOG_TRACE_STR("Running low on diskspace.");
+ best_fit_bits = bits_wanted;
+ status = ocfs_find_clear_bits (osb, &osb->cluster_bitmap,
+ bits_wanted, bit_off,
+ &best_fit_bits);
+ if (status < 0) {
osb->cluster_bitmap.failed++;
- bitoffset = ocfs_find_clear_bits (osb, &osb->cluster_bitmap,
- ClusterCount, 0,
- sysfile ? 0 :
- ((8 * ONE_MEGA_BYTE) >>
- osb->s_clustersize_bits));
+ LOG_TRACE_STR("Running out of space!");
+ goto bail;
}
- /* It returns -1 on failure, otherwise bitoffset points at the */
- /* location inb bitmap from where there are ClusterCount no of bits */
- /* are free. */
-
- if (bitoffset == -1) {
- if (sysfile)
- LOG_ERROR_ARGS ("Cannot allocate %u contiguous clusters for system file\n",
- ClusterCount);
+ if (best_fit_bits < min_bits) {
+ LOG_TRACE_ARGS ("Cannot allocate %u contiguous clusters for "
+ "system file\n", min_bits);
status = -ENOSPC;
- goto leave;
+ goto bail;
}
- LOG_TRACE_ARGS ("setting %u bits at bit offset=%u\n", ClusterCount, bitoffset);
+ if (best_fit_bits != bits_wanted)
+ LOG_TRACE_ARGS("discontiguous allocation done: wanted = %u, "
+ "best_fit = %u, bit_off = %u!\n",
+ bits_wanted, best_fit_bits, *bit_off);
- ocfs_set_bits(osb->sb, handle, &osb->cluster_bitmap, bitoffset,
- ClusterCount);
+ /* when we do discontig. just change this line. */
+ *num_bits = best_fit_bits;
- /* Ok, write out the bitmap now. We optimize only by writing
- * out the bitmap blocks which have changed, and not all of
- * them like before. */
- numblocks = ocfs_bitmap_blocks_affected(osb->sb,
- bitoffset,
- ClusterCount,
- &startbh);
+ /* cool, we've got some. set them now. */
+ status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
- LOG_TRACE_ARGS("bitoffset = %u, ClusterCount = %u, startbh = %u, numblocks = %u\n", bitoffset, ClusterCount, startbh, numblocks);
+ fe->id1.bitmap1.i_used += *num_bits;
- /* write the bitmap size info to the lock sector */
- bm_lock->id1.bitmap1.i_used =
- ocfs_count_bits(osb->sb, &osb->cluster_bitmap);
-
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
- goto leave;
+ goto bail;
}
- *cluster_off = bitoffset;
- *cluster_count = ClusterCount;
+ ocfs_set_bits(osb->sb, handle, &osb->cluster_bitmap, *bit_off,
+ *num_bits);
+ atomic_inc(&osb->alloc_stats.bitmap_data);
+
+#warning "implement this"
+ /* At this point, we should see if local alloc was switched
+ * off, and turn it back on if we have enough free bits in the
+ * main bitmap.*/
status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
-leave:
- if (local_lock && bh)
- brelse(bh);
-
- if (local_inode)
- iput(bitmap_inode);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_find_contiguous_space_from_bitmap */
-
-/*
- * ocfs_alloc_node_block()
- *
- * You need to be holding node_alloc_sem!
- */
-int ocfs_alloc_node_block(ocfs_super *osb, __u64 bytes_wanted,
- __u64 *DiskOffset, __u64 *file_off,
- __u32 NodeNum, __u32 Type,
- ocfs_journal_handle *handle)
+/* caller is responsible for freeing 'ac' when he's done with it. */
+int ocfs_reserve_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ u32 bits_wanted,
+ ocfs2_alloc_context **ac)
{
- int status = 0;
- int startbh, numblocks;
- __u64 fileSize = 0;
- __u64 numBytes = 0;
- __u64 allocSize = 0;
- __u64 prevFileSize = 0;
- __u64 extent;
- __u64 newFileSize;
- __u64 bitMapSize;
- ocfs_alloc_bm bitmap;
- __u32 numBits = 0;
- __u32 foundBit = -1;
- __u32 blockSize = 0, blockSizeBits = 0;
- int bm_file = 0;
- int alloc_file = 0;
- struct buffer_head *bh = NULL;
- struct buffer_head *alloc_bh = NULL;
- ocfs2_dinode *fe = NULL;
- ocfs2_dinode *alloc_fe = NULL;
- int needs_uninit = 0;
- int delay_lockrel = 0;
- struct inode *inode = NULL; /* alloc bitmap file inode */
- struct inode *alloc_inode = NULL; /* alloc file inode */
+ int status;
- LOG_ENTRY_ARGS("(bytes_wanted = (%llu), Type=%d)\n", bytes_wanted,Type);
+ LOG_ENTRY();
+ OCFS_ASSERT(handle);
- switch (Type) {
- case DISK_ALLOC_EXTENT_NODE:
- bm_file = EXTENT_ALLOC_BITMAP_SYSTEM_INODE;
- alloc_file = EXTENT_ALLOC_SYSTEM_INODE;
- blockSize = osb->sb->s_blocksize;
- blockSizeBits = osb->sb->s_blocksize_bits;
- atomic_inc(&osb->alloc_stats.ext_allocs);
- break;
- case DISK_ALLOC_INODE:
- bm_file = INODE_ALLOC_BITMAP_SYSTEM_INODE;
- alloc_file = INODE_ALLOC_SYSTEM_INODE;
- blockSize = osb->sb->s_blocksize;
- blockSizeBits = osb->sb->s_blocksize_bits;
- atomic_inc(&osb->alloc_stats.inode_allocs);
- break;
- default:
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto leave;
+ *ac = kmalloc(sizeof(ocfs2_alloc_context), GFP_KERNEL);
+ if (!(*ac)) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
+ memset(*ac, 0, sizeof(ocfs2_alloc_context));
+ (*ac)->ac_bits_wanted = bits_wanted;
+ (*ac)->ac_handle = handle;
- inode = ocfs_get_system_file_inode(osb, bm_file, NodeNum);
- if (!inode) {
- LOG_ERROR_STATUS(status=-EINVAL);
- goto leave;
+ status = -ENOSPC;
+ if (ocfs_alloc_should_use_local(osb, bits_wanted)) {
+ status = ocfs_reserve_local_alloc_bits(osb,
+ handle,
+ bits_wanted,
+ *ac);
+ if ((status < 0) && (status != -ENOSPC)) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ } else if (status == -ENOSPC) {
+#warning "need to deal with disabling local alloc better"
+ /* reserve_local_bits will return enospc with
+ * the local alloc inode still locked, so we
+ * can change this safely here. */
+ LOG_ERROR_STR("Disabling local alloc");
+ osb->have_local_alloc = 0;
+// ocfs_shutdown_local_alloc(osb);
+ } else
+ (*ac)->ac_which = OCFS_AC_USE_LOCAL;
}
- alloc_inode = ocfs_get_system_file_inode(osb, alloc_file, NodeNum);
- if (!alloc_inode) {
- LOG_ERROR_STATUS(status=-EINVAL);
- goto leave;
- }
- /* Allocate a block of size blocksize from the relevant file/bitmap */
- OCFS_ASSERT (blockSize);
-
- status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- 0, &bh, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- 0, inode);
-
- numBits = ((bytes_wanted + (blockSize-1)) >> blockSizeBits);
- numBytes = (u64)numBits << blockSizeBits;
-
- /* Read in the bitmap file for the alloc and look for the
- * required space, if found */
- fe = (ocfs2_dinode *) bh->b_data;
- prevFileSize = fileSize = fe->i_size;
- allocSize = (u64)fe->i_clusters << osb->s_clustersize_bits;
-
- if ((fileSize != 0) && (allocSize != 0)) {
- ocfs_initialize_bitmap(osb->sb, &bitmap,
- (__u32)fileSize * 8,
- (__u32)allocSize * 8);
- needs_uninit = 1;
-
- status = ocfs_read_system_file(osb, bm_file, NodeNum,
- bitmap.chunk, allocSize);
+ if (status == -ENOSPC) {
+ status = ocfs_reserve_main_bitmap_bits(osb,
+ handle,
+ bits_wanted,
+ *ac);
if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
+ (*ac)->ac_which = OCFS_AC_USE_MAIN;
+ }
- foundBit = ocfs_find_clear_bits(osb, &bitmap, numBits, 0, 0);
+ status = 0;
+bail:
+ if ((status < 0) && *ac) {
+ ocfs_free_alloc_context(*ac);
+ *ac = NULL;
}
- /* It returns -1 on failure , otherwise ByteOffset points at the */
- /* location in bitmap from where there are ClusterCount no of bits */
- /* are free. */
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
- if (foundBit == -1) {
- /* if not found add more allocation to the file and try again. */
- //extent = ONE_MEGA_BYTE;
- extent = ( ((numBits * blockSize) + (ONE_MEGA_BYTE-1)) >> 20 ) << 20;
+int ocfs_reserve_main_bitmap_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ u32 bits_wanted,
+ ocfs2_alloc_context *ac)
+{
+ int status = 0;
+ struct inode *bitmap_inode;
+ struct buffer_head *bh = NULL;
+ ocfs2_dinode *fe;
+ u32 free_bits;
-#warning maybe take this out and put a verifyupdateinode in here
- status = ocfs_read_bh(osb,
- OCFS_I(alloc_inode)->ip_blkno << alloc_inode->i_sb->s_blocksize_bits,
- &alloc_bh, OCFS_BH_CACHED, alloc_inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
+ LOG_ENTRY();
- alloc_fe = (ocfs2_dinode *) alloc_bh->b_data;
- newFileSize = alloc_fe->i_size;
- allocSize = (u64)alloc_fe->i_clusters << osb->s_clustersize_bits;
- if (newFileSize != alloc_inode->i_size ||
- allocSize != OCFS_I(alloc_inode)->ip_alloc_size) {
- LOG_ERROR_ARGS("aha! alloc inode was out of date! "
- "newFileSize=%llu, i_size=%llu, "
- "allocSize=%llu, ip_alloc_size=%llu\n",
- newFileSize, alloc_inode->i_size,
- allocSize, OCFS_I(alloc_inode)->ip_alloc_size);
- }
+ OCFS_ASSERT(!(handle->flags & OCFS_HANDLE_STARTED));
- //newFileSize = alloc_inode->i_size;
- //allocSize = OCFS_I(alloc_inode)->alloc_size;
+ bitmap_inode = ocfs_get_system_file_inode(osb,
+ GLOBAL_BITMAP_SYSTEM_INODE,
+ -1);
+ if (!bitmap_inode) {
+ status = -EINVAL;
+ LOG_ERROR_STR("Could not get bitmap inode!");
+ goto bail;
+ }
- /* This is for OUI optimzation to allocate more disk
- * space for directory allocations */
-
- if (allocSize > 0)
- extent *= 2;
-
- LOG_TRACE_ARGS("extending the alloc file to %llu\n",
- newFileSize + extent);
- status = ocfs_extend_file(osb, newFileSize + extent,
- handle, alloc_inode, NULL, 1,
- NULL);
- if (status < 0) {
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &bh, bitmap_inode);
+ if (status < 0) {
+ if (status != -EINTR)
LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- newFileSize += extent;
- bitMapSize = newFileSize >> (blockSizeBits+3);
-
- /* Does this need the buffer_head? if so, we need to
- * do a put_data first! */
- /* Calculate the new bitmap size */
-
- LOG_TRACE_ARGS("extending the bitmap file to %llu\n", bitMapSize);
- status = ocfs_extend_file(osb, bitMapSize,
- handle, inode, NULL, 1, bh);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- fe = (ocfs2_dinode *) bh->b_data;
- /* we wrote it back out in ocfs_extend_system_file so
- * we can trust the sizes here */
- fileSize = fe->i_size;
- allocSize = (u64)fe->i_clusters << osb->s_clustersize_bits;
- LOG_TRACE_ARGS("fileSize=%llu, allocSize=%llu\n",
- fileSize, allocSize);
-
- if (needs_uninit)
- ocfs_reinitialize_bitmap(osb->sb, &bitmap,
- fileSize * 8,
- allocSize * 8);
- else
- ocfs_initialize_bitmap(osb->sb, &bitmap,
- fileSize * 8,
- allocSize * 8);
- needs_uninit = 1;
-
- status = ocfs_read_system_file(osb, bm_file, NodeNum,
- bitmap.chunk, allocSize);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
- foundBit = ocfs_find_clear_bits(osb, &bitmap, numBits, 0, 0);
-
- delay_lockrel = 1;
- if (Type == DISK_ALLOC_EXTENT_NODE)
- atomic_inc(&osb->alloc_stats.ext_extends);
- else if (Type == DISK_ALLOC_INODE)
- atomic_inc(&osb->alloc_stats.inode_extends);
+ goto bail;
}
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, bitmap_inode);
+ ocfs_handle_add_inode(handle, bitmap_inode);
- LOG_TRACE_ARGS ("bit offset=%d, num=%d\n", foundBit, numBits);
+ fe = (ocfs2_dinode *) bh->b_data;
+ free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
+ le32_to_cpu(fe->id1.bitmap1.i_used);
- ocfs_set_bits(osb->sb, handle, &bitmap, foundBit, numBits);
-
- /* only write out what has changed... */
- numblocks = ocfs_bitmap_blocks_affected(osb->sb,
- foundBit,
- numBits,
- &startbh);
-
- LOG_TRACE_ARGS ("offset=%u, type=%x, blksz=%u, foundbit=%u, fileid=%u\n",
- foundBit * blockSize, Type, blockSize, foundBit, alloc_file);
-
- status = ocfs_lookup_file_allocation(osb, foundBit * blockSize,
- DiskOffset, osb->sb->s_blocksize, NULL,
- alloc_inode, 1);
- if (status < 0 || *DiskOffset == 0) {
- if (!status)
- status = -EINVAL;
- LOG_ERROR_STATUS(status);
- goto leave;
+ if (bits_wanted > free_bits) {
+ LOG_ERROR_ARGS("Disk Full: wanted=%u, free_bits=%u\n",
+ bits_wanted, free_bits);
+ status = -ENOSPC;
+ goto bail;
}
- *file_off = (__u64) ((__u64) foundBit * (__u64) blockSize);
+ /* Ok, done - we've determined that there's enough space in
+ * the bitmap. Actually finding it is the job of the
+ * allocation function now. We keep things locked so that the
+ * bitmap can't change underneath us. */
+ ac->ac_inode = igrab(bitmap_inode);
+ get_bh(bh);
+ ac->ac_bh = bh;
- /* this can just fall through */
- if (*file_off == 0) {
- LOG_TRACE_ARGS ("offset=%llu, type=%x, blksz=%u, foundbit=%u\n",
- *file_off, Type, blockSize, foundBit);
- }
-
-leave:
- if (needs_uninit)
- ocfs_uninitialize_bitmap(&bitmap);
-
- if (inode)
- iput(inode);
- if (alloc_inode)
- iput(alloc_inode);
-
- if (bh != NULL)
+bail:
+ if (bitmap_inode)
+ iput(bitmap_inode);
+ if (bh)
brelse(bh);
- if (alloc_bh != NULL)
- brelse(alloc_bh);
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_alloc_node_block */
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
/*
* ocfs_free_file_extents()
@@ -3334,7 +3006,7 @@
int status = 0;
__u32 i;
__u32 numBitsAllocated = 0, bitmapOffset = 0;
- ocfs2_extent_block *extent = NULL;
+ ocfs2_extent_block *extent;
struct buffer_head *extent_bh = NULL;
ocfs2_extent_list *fel;
ocfs2_dinode *fe;
@@ -3342,12 +3014,7 @@
LOG_ENTRY ();
fe = (ocfs2_dinode *) fe_bh->b_data;
- if (OCFS_I(inode)->ip_blkno != fe->i_blkno)
- BUG();
- if (inode)
- SET_BH_SEQNUM(inode, fe_bh);
-
fe = (ocfs2_dinode *) fe_bh->b_data;
fel = &fe->id2.i_list;
@@ -3360,7 +3027,7 @@
osb->s_clustersize_bits);
ocfs_handle_add_commit_bits(handle, numBitsAllocated,
- bitmapOffset, -1,
+ bitmapOffset, -1, 0,
DISK_ALLOC_VOLUME);
}
} else {
@@ -3380,7 +3047,7 @@
LOG_ERROR_STATUS(status);
goto leave;
}
- extent = NULL;
+
status = ocfs_kill_this_tree(osb, extent_bh, handle, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -3399,803 +3066,3 @@
return status;
} /* ocfs_free_file_extents */
-/* Some constants and functions that control how we allocate and use
- * local alloc bitmaps. These are intended to be easily
- * tunable. Possibly even remove them once we've found a good mix. */
-
-/* The largest cluster size where we even consider using local alloc. */
-#define OCFS_LOCAL_ALLOC_MAX_CSIZE (128 * 1024)
-/* The largest allocation to use the local bitmap for. */
-#define OCFS_LOCAL_ALLOC_MAX_ALLOC (2 * 1024 * 1024)
-
-/*
- * ocfs_local_alloc_window_bits
- *
- * Determine how large our local alloc window should be, in bits. This
- * is entirely changeable -- just replace this function. Right now as
- * a *testing* default, we have a function that takes cluster size
- * into account in the following manner:
- *
- * 4k -> 1024 bits, 8k -> 512 bits, 16k -> 256 bits,
- * 32/64/128k -> 64 bits
- */
-static inline int ocfs_local_alloc_window_bits(ocfs_super *osb)
-{
- int numbits;
-
- switch (osb->s_clustersize) {
- case (4*1024):
- numbits = 1024;
- break;
-
- case (8*1024):
- numbits = 512;
- break;
-
- case (16*1024):
- numbits = 256;
- break;
-
- default:
- numbits = 64;
- break;
- }
- return(numbits);
-} /* ocfs_local_alloc_window_bits */
-
-/*
- * ocfs_alloc_count_bits
- */
-static __u32 ocfs_alloc_count_bits(ocfs2_dinode *alloc)
-{
- int i;
- __u8 tmp;
- __u8 *buffer;
- __u32 count = 0;
-
- LOG_ENTRY();
-
- buffer = LOCAL_ALLOC(alloc)->la_bitmap;
- for (i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++) {
- memcpy(&tmp, buffer, 1);
- count+= BITCOUNT(tmp);
- buffer++;
- }
-
- LOG_EXIT_ULONG ((unsigned long)count);
- return(count);
-} /* ocfs_alloc_count_bits */
-
-/*
- * ocfs_clear_local_alloc
- */
-static void ocfs_clear_local_alloc(ocfs2_dinode *alloc)
-{
- int i;
- LOG_ENTRY();
-
- LOCAL_ALLOC(alloc)->la_bm_bits = 0;
- LOCAL_ALLOC(alloc)->la_bits_set = 0;
- LOCAL_ALLOC(alloc)->la_bm_off = 0;
- for(i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++)
- LOCAL_ALLOC(alloc)->la_bitmap[i] = 0;
-
- LOG_EXIT();
- return;
-} /* ocfs_clear_local_alloc */
-
-/*
- * ocfs_sync_local_to_main
- *
- * sync the local alloc to main bitmap.
- *
- * assumes you've already locked the main bitmap -- the bitmap inode
- * passed is used for caching.
- */
-static int ocfs_sync_local_to_main(ocfs_super *osb,
- ocfs_journal_handle *handle,
- ocfs2_dinode *alloc,
- struct inode *main_bm_inode)
-{
- int status = 0;
- int bit_off, left;
- void *bitmap;
- unsigned int start, numblocks, bitmapblocks;
-
- LOG_ENTRY_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n",
- LOCAL_ALLOC(alloc)->la_bm_bits,
- ocfs_alloc_count_bits(alloc),
- LOCAL_ALLOC(alloc)->la_bits_set);
-
- if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
- LOG_TRACE_STR("nothing to sync!");
- goto bail;
- }
-
- bitmapblocks =
- ocfs_blocks_for_bits(osb->sb,
- osb->cluster_bitmap.validbits);
-
- /* figure out which block in the bitmap to start on and the
- * maximum number of blocks we can span over -- we don't need
- * to read any more as that's the most we'll be touching... */
- numblocks = ocfs_bitmap_blocks_affected(osb->sb,
- LOCAL_ALLOC(alloc)->la_bm_off,
- LOCAL_ALLOC(alloc)->la_bits_set,
- &start);
-
- if ((start + numblocks) > bitmapblocks) {
- printk("uhoh, bitmap calculation is bad!\n");
- printk("alloc->la_bm_bits = %u, COUNT = %u, alloc->la_bits_set = %u"
- "start=%u, alloc->la_bm_off = %u, numblocks=%u, "
- "bitmapblocks = %u\n",
- LOCAL_ALLOC(alloc)->la_bm_bits, ocfs_alloc_count_bits(alloc),
- LOCAL_ALLOC(alloc)->la_bits_set, start, LOCAL_ALLOC(alloc)->la_bm_off, numblocks,
- bitmapblocks);
-
- BUG();
- }
-
- LOG_TRACE_ARGS("start=%u, alloc->la_bm_off = %u, numblocks=%u\n", start,
- LOCAL_ALLOC(alloc)->la_bm_off, numblocks);
- status = ocfs_read_bhs(osb,
- (osb->bitmap_blkno + start) << osb->sb->s_blocksize_bits,
- numblocks << osb->sb->s_blocksize_bits,
- &osb->cluster_bitmap.chunk[start], OCFS_BH_CACHED,
- main_bm_inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
- /* any unset bits in local alloc need to be unset in bitmap. */
- bit_off = 0;
- left = LOCAL_ALLOC(alloc)->la_bm_bits;
- while ((bit_off = find_next_zero_bit(bitmap, left, bit_off))
- != -1) {
- if (bit_off >= left) {
- /*LOG_TRACE_ARGS("bit_off (%d) >= left\n", bit_off);*/
- break;
- }
-
- LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n",
- bit_off + LOCAL_ALLOC(alloc)->la_bm_off);
- ocfs_clear_bits(osb->sb, handle, &osb->cluster_bitmap,
- bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
- 1);
- bit_off++;
- }
-
-bail:
-
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_sync_local_to_main */
-
-/*
- * This essentially does the same thing as sync_local_to_main, but
- * without a journal handle -- used during shutdown and recovery.
- */
-static int ocfs_sync_local_from_shutdown(ocfs_super *osb,
- ocfs_bitmap_free_head **f,
- struct buffer_head *local_alloc_bh,
- int in_recovery)
-{
- int status = 0;
- int bit_off, left;
- ocfs2_dinode *alloc = NULL;
- void *bitmap;
-
- LOG_ENTRY();
-
- if (!local_alloc_bh)
- BUG();
-
- alloc = (ocfs2_dinode *) local_alloc_bh->b_data;
- if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
- LOG_TRACE_STR("nothing to sync!");
- goto bail;
- }
-
- if (!(*f)) {
- *f = ocfs_alloc_bitmap_free_head();
- if (*f == NULL) {
- LOG_ERROR_STATUS(-ENOMEM);
- goto bail;
- }
- }
-
- alloc = (ocfs2_dinode *) local_alloc_bh->b_data;
-
- LOG_TRACE_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n",
- LOCAL_ALLOC(alloc)->la_bm_bits,
- ocfs_alloc_count_bits(alloc),
- LOCAL_ALLOC(alloc)->la_bits_set);
-
- bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
-
- /* any unset bits in local alloc need to be unset in bitmap. */
- bit_off = 0;
- left = LOCAL_ALLOC(alloc)->la_bm_bits;
- while ((bit_off = find_next_zero_bit(bitmap, left, bit_off))
- != -1) {
- if (bit_off >= left) {
- /*LOG_TRACE_ARGS("bit_off (%d) >= left\n", bit_off);*/
- break;
- }
- /* LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n", bit_off);*/
- status = ocfs_add_to_bitmap_free_head(osb,
- *f, 1,
- bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
- -1, DISK_ALLOC_VOLUME);
- if (status < 0) {
- ocfs_free_bitmap_free_head(*f);
- *f = NULL;
- }
- bit_off++;
- }
-
-bail:
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_sync_local_from_shutdown */
-
-/*
- * ocfs_alloc_new_window
- *
- * pass it the bitmap lock in lock_bh if you have it.
- */
-static int ocfs_alloc_new_window(ocfs_super *osb, struct buffer_head *lock_bh,
- struct inode *bm_inode,
- ocfs_journal_handle *handle)
-{
- int status = 0;
- __u64 alloc_bytes;
- u32 cluster_off, cluster_count;
- ocfs2_dinode *alloc = NULL;
-
- LOG_ENTRY();
-
- alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
- if (LOCAL_ALLOC(alloc)->la_bm_bits != 0)
- LOG_TRACE_STR("asking me to alloc a new window over a"
- " non-empty one");
-
- /* we try to use find_contig_space_from_bitmap here for now. */
- alloc_bytes = (u64)ocfs_local_alloc_window_bits(osb) << osb->s_clustersize_bits;
- LOG_TRACE_ARGS("Allocating %llu bytes (%u clusters) for a "
- "new window.\n", alloc_bytes,
- ocfs_local_alloc_window_bits(osb));
-
- status = ocfs_find_contiguous_space_from_bitmap(osb, handle,
- alloc_bytes,
- &cluster_off,
- &cluster_count, 0,
- lock_bh, bm_inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- atomic_inc(&osb->alloc_stats.bitmap_data);
-
- alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
-
- LOCAL_ALLOC(alloc)->la_bm_off = cluster_off;
- LOCAL_ALLOC(alloc)->la_bm_bits = cluster_count;
- /* just in case... In the future when we find space ourselves,
- * we don't have to get all contiguous -- but we'll have to
- * set all previously used bits in bitmap and update
- * la_bits_set before setting the bits in the main bitmap. */
- LOCAL_ALLOC(alloc)->la_bits_set = 0;
- memset(LOCAL_ALLOC(alloc)->la_bitmap, 0,
- LOCAL_ALLOC(alloc)->la_size);
-
- LOG_TRACE_STR("New window allocated:");
- LOG_TRACE_ARGS("window la_bm_off = %u\n",
- LOCAL_ALLOC(alloc)->la_bm_off);
- LOG_TRACE_ARGS("window la_bm_bits = %u\n",
- LOCAL_ALLOC(alloc)->la_bm_bits);
-
-bail:
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_alloc_new_window */
-
-/*
- * ocfs_local_find_clear_bits
- */
-static int ocfs_local_find_clear_bits(ocfs_super *osb,
- ocfs2_dinode *alloc,
- __u32 numbits)
-{
- int numfound, bitoff, left, startoff, lastzero;
- void *bitmap = NULL;
-
- LOG_ENTRY_ARGS("(numbits wanted = %u)\n", numbits);
-
- bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
-
- numfound = bitoff = startoff = 0;
- lastzero = -1;
- left = LOCAL_ALLOC(alloc)->la_bm_bits;
- while ((bitoff = find_next_zero_bit(bitmap, left, startoff)) != -1) {
- if (bitoff == left) {
- /* LOG_TRACE_ARGS("bitoff (%d) == left", bitoff); */
- break;
- }
- /* LOG_TRACE_ARGS("Found a zero: bitoff = %d, startoff = %d, "
- "numfound = %d\n", bitoff, startoff, numfound);*/
-
- /* Ok, we found a zero bit... is it contig. or do we
- * start over?*/
- if (bitoff == startoff) {
- /* we found a zero */
- numfound++;
- startoff++;
- } else {
- /* got a zero after some ones */
- numfound = 1;
- startoff = bitoff+1;
- }
- /* we got everything we needed */
- if (numfound == numbits) {
- /* LOG_TRACE_STR("Found it all!"); */
- break;
- }
- }
-
- LOG_TRACE_ARGS("Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
- numfound);
-
- if (numfound == numbits)
- bitoff = startoff - numfound;
- else
- bitoff = -1;
-
- LOG_EXIT_STATUS(bitoff);
- return(bitoff);
-} /* ocfs_local_find_clear_bits */
-
-/*
- * ocfs_find_space_from_local
- */
-static int ocfs_find_space_from_local(ocfs_super *osb, u32 bitswanted,
- u32 *bitoff, u32 *bitcount,
- ocfs_journal_handle *handle)
-{
- ocfs2_dinode *alloc;
- int status = 0, tmpstat;
- int startoff, tmpoff;
- __u32 tmpwanted;
- /* main bitmap variables. */
- struct buffer_head *main_bm_bh = NULL;
- struct inode *main_bm_inode = NULL;
- void *bitmap;
-
- LOG_ENTRY_ARGS("(bitswanted = %u)\n", bitswanted);
-
- if (!osb->have_local_alloc) {
- status = -ENOSPC;
- goto bail;
- }
-
- if (bitswanted > ocfs_local_alloc_window_bits(osb)) {
- LOG_TRACE_STR("Asking for more than my max window size!\n");
- status = -ENOSPC;
- goto bail;
- }
-
- status = ocfs_journal_access(handle, osb->local_alloc_bh,
- OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
-tryagain:
- /* If we need to initialize a new window, do so now. */
- if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
- LOG_TRACE_STR("Allocating a new window...");
-
- status = ocfs_alloc_new_window(osb, main_bm_bh, main_bm_inode,
- handle);
- if (status < 0) {
- if (status != -ENOSPC)
- LOG_ERROR_STATUS(status);
-
- /* it may not have been dirtied yet... */
- tmpstat = ocfs_journal_dirty(handle,
- osb->local_alloc_bh);
- if (tmpstat < 0)
- LOG_ERROR_STATUS(tmpstat);
-
- goto bail;
- }
- atomic_inc(&osb->alloc_stats.moves);
- }
-
- /* Alright, try to satisfy the request. */
- startoff = ocfs_local_find_clear_bits(osb, alloc, bitswanted);
- if (startoff == -1) {
- /* we couldn't get enough bits from the local
- * alloc. Lets sync what we've got to the main bitmap,
- * clear the local out and try again. */
- LOG_TRACE_STR("Could not find enough contiguous bits in local "
- "alloc bitmap, trying to move my window.");
-
- if (!main_bm_inode)
- main_bm_inode = ocfs_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, -1);
-
- if (!main_bm_inode) {
- status = -EINVAL;
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- /* lock bitmap here */
- ocfs_handle_add_inode(handle, main_bm_inode);
-
- /* Get the allocation lock here */
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- 0, &main_bm_bh, main_bm_inode);
- if (status < 0) {
- main_bm_bh = NULL;
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- goto bail;
- }
-
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- 0, main_bm_inode);
-
- status = ocfs_sync_local_to_main(osb, handle, alloc,
- main_bm_inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- ocfs_clear_local_alloc(alloc);
-
- goto tryagain;
- }
-
- LOG_TRACE_ARGS("Found %u bits, starting at local alloc offset %d\n",
- bitswanted, startoff);
-
- /* Ok, if we've got this far then the search suceeded and we
- * can mark the bitmap. */
- bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
- tmpoff = startoff;
- tmpwanted = bitswanted;
- while(tmpwanted--) {
- /* LOG_TRACE_ARGS("setting bit %d\n", tmpoff); */
- set_bit(tmpoff++, bitmap);
- }
- LOCAL_ALLOC(alloc)->la_bits_set += bitswanted;
-
- *bitoff = LOCAL_ALLOC(alloc)->la_bm_off + startoff;
- *bitcount = bitswanted;
-
- status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-bail:
- /* if we locked the main bitmap, cleanup after ourselves. */
- if (main_bm_bh)
- brelse(main_bm_bh);
-
- if (main_bm_inode)
- iput(main_bm_inode);
-
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_find_space_from_local */
-
-
-/*
- * ocfs_find_space
- *
- * A drop-in replacement for
- * ocfs_find_contiguous_space_from_bitmap. We will in fact, call
- * ocfs_find_contiguous_space_from_bitmap if you don't give us a
- * journal handle, or if the local bitmap isn't loaded, or if the
- * allocation is simply to big to fit in the local one. otherwise,
- * we'll try to use our local alloc instead.
- *
- */
-int ocfs_find_space(ocfs_super *osb, __u64 file_size,
- u32 *cluster_off, u32 *cluster_count, int sysfile,
- ocfs_journal_handle *handle)
-{
- int status = 0;
- u32 bitswanted;
- int use_global = 1;
- struct inode *local_alloc_inode = NULL;
-
- LOG_ENTRY_ARGS("(file_size = (%llu), handle = 0x%p, sysfile = %s)\n",
- file_size, handle, sysfile ? "true" : "false");
-
- if (file_size == 0) {
- LOG_ERROR_STR ("asking for an allocation of zero bytes...");
- status = 0;
- goto bail;
- }
-
- /* need to calculate a couple of things for below... */
- bitswanted = ocfs_clusters_for_bytes(osb->sb, file_size);
-
- /* Ok, now decide if we can use local alloc bitmap.
- * We *always* use global bitmap for clustersize > 128k,
- * file_size > 2mb, so force it under these conditions. */
- if (handle
- && osb->have_local_alloc
- && file_size <= OCFS_LOCAL_ALLOC_MAX_ALLOC) {
- use_global = 0;
-
- local_alloc_inode =
- ocfs_get_system_file_inode(osb,
- LOCAL_ALLOC_SYSTEM_INODE,
- osb->node_num);
-
- if (!local_alloc_inode) {
- status = -EFAIL;
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- ocfs_handle_add_inode(handle, local_alloc_inode);
-
- /* If the local alloc has been disabled while we were
- * waiting for another process to finish with it, then
- * find_space_from_local will return -ENOSPC and we'll
- * continue with the global. */
- status = ocfs_find_space_from_local(osb, bitswanted,
- cluster_off, cluster_count,
- handle);
- /* If we've run out of space for our local alloc, lets
- * try the global one just in case... */
- if (status == -ENOSPC)
- use_global = 1;
- else if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
- atomic_inc(&osb->alloc_stats.local_data);
- }
-
- if (use_global) {
- status = ocfs_find_contiguous_space_from_bitmap(osb, handle,
- file_size,
- cluster_off,
- cluster_count,
- sysfile, NULL,
- NULL);
- if (!status)
- atomic_inc(&osb->alloc_stats.bitmap_data);
-
- }
-
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
- LOG_TRACE_ARGS("Returning *cluster_off = %u, *cluster_count"
- "= %u\n", *cluster_off, *cluster_count);
-bail:
- if (local_alloc_inode)
- iput(local_alloc_inode);
-
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_find_space */
-
-/*
- * ocfs_load_local_alloc
- */
-int ocfs_load_local_alloc(ocfs_super *osb)
-{
- int status = 0;
- ocfs2_dinode *alloc = NULL;
- struct buffer_head *alloc_bh = NULL;
- __u32 num_used;
- struct inode *inode = NULL;
-
- LOG_ENTRY();
-
- /* we don't enable local alloc on cluster sizes >= 128k */
- if (osb->s_clustersize > OCFS_LOCAL_ALLOC_MAX_CSIZE)
- goto bail;
-
- /* read the alloc off disk */
- inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->node_num);
- if (!inode) {
- LOG_ERROR_STATUS(status=-EINVAL);
- goto bail;
- }
- status = ocfs_read_bh(osb,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- &alloc_bh, 0, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc = (ocfs2_dinode *) alloc_bh->b_data;
-
- /* do a little verification. */
- num_used = ocfs_alloc_count_bits(alloc);
-
- /* nowadays the local alloc has always been recovered before
- * we load it so there should be no bits used from the main
- * bitmap. */
- if (num_used
- || LOCAL_ALLOC(alloc)->la_bits_set
- || LOCAL_ALLOC(alloc)->la_bm_bits
- || LOCAL_ALLOC(alloc)->la_bm_off) {
- LOG_ERROR_ARGS("Local alloc hasn't been recovered!\n"
- "found = %u, set = %u, taken = %u, off = %u\n",
- num_used,
- LOCAL_ALLOC(alloc)->la_bits_set,
- LOCAL_ALLOC(alloc)->la_bm_bits,
- LOCAL_ALLOC(alloc)->la_bm_off);
- status = -EFAIL;
- goto bail;
- }
-
- if (!LOCAL_ALLOC(alloc)->la_size ||
- (LOCAL_ALLOC(alloc)->la_size > ocfs2_local_alloc_size(inode->i_sb))) {
- LOG_ERROR_ARGS("Local alloc size is invalid (la_size = %u)\n",
- LOCAL_ALLOC(alloc)->la_size);
- status = -EINVAL;
- goto bail;
- }
-
- osb->local_alloc_bh = alloc_bh;
- osb->have_local_alloc = 1;
-
-bail:
- if (status < 0)
- if (alloc_bh)
- brelse(alloc_bh);
- if (inode)
- iput(inode);
-
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_load_local_alloc */
-
-/*
- * ocfs_shutdown_local_alloc
- *
- * return any unused bits to the bitmap and write out a clean
- * local_alloc.
- *
- * local_alloc_bh is optional. If not passed, we will simply use the
- * one off osb. If you do pass it however, be warned that it *will* be
- * returned brelse'd and NULL'd out.*/
-void ocfs_shutdown_local_alloc(ocfs_super *osb)
-{
- int status;
- ocfs2_dinode *alloc = NULL;
- ocfs_bitmap_free_head *f = NULL;
- struct buffer_head *bh = NULL;
- ocfs_journal_handle *handle = NULL;
-
- LOG_ENTRY();
-
- if (!osb->have_local_alloc)
- goto bail;
-
- bh = osb->local_alloc_bh;
-
- status = ocfs_sync_local_from_shutdown(osb, &f, bh, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
- handle = ocfs_start_trans(osb, NULL, 1);
- if (!handle) {
- LOG_ERROR_STATUS(-ENOMEM);
- goto bail;
- }
- ocfs_handle_set_always_commits(handle, 1);
-
- status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc = (ocfs2_dinode *) bh->b_data;
- ocfs_clear_local_alloc(alloc);
-
- status = ocfs_journal_dirty(handle, bh);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- brelse(bh);
-
- ocfs_commit_trans(handle);
- handle = NULL;
-
- osb->local_alloc_bh = NULL;
- osb->have_local_alloc = 0;
-
- if (f)
- ocfs_process_bitmap_free_head(osb, f);
-
-bail:
- if (handle)
- ocfs_commit_trans(handle);
-
- if (f)
- ocfs_free_bitmap_free_head(f);
-
- LOG_EXIT();
- return;
-} /* ocfs_shutdown_local_alloc */
-
-/*
- * ocfs_recover_local_alloc
- *
- * We want to free the bitmap bits outside of any recovery context, so
- * it's allocated and passed back for you.
- */
-int ocfs_recover_local_alloc(ocfs_super *osb,
- int node_num,
- ocfs_bitmap_free_head **bits_to_free)
-{
- int status = 0;
- struct buffer_head *alloc_bh = NULL;
- struct inode *inode = NULL;
- ocfs2_dinode *alloc;
-
- LOG_ENTRY_ARGS("(node_num = %d)\n", node_num);
-
- inode = ocfs_get_system_file_inode(osb,
- LOCAL_ALLOC_SYSTEM_INODE,
- node_num);
- if (!inode) {
- LOG_ERROR_STATUS(status=-EINVAL);
- goto bail;
- }
-
- status = ocfs_read_bh(osb,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- &alloc_bh,
- 0, inode);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- status = ocfs_sync_local_from_shutdown(osb,
- bits_to_free,
- alloc_bh,
- 1);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
-
- alloc = (ocfs2_dinode *) alloc_bh->b_data;
- ocfs_clear_local_alloc(alloc);
-
- status = ocfs_write_bh(osb, alloc_bh, inode);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
-bail:
- if (alloc_bh)
- brelse(alloc_bh);
-
- if (inode)
- iput(inode);
-
- LOG_EXIT_STATUS(status);
- return(status);
-} /* ocfs_recover_local_alloc */
Modified: trunk/src/alloc.h
===================================================================
--- trunk/src/alloc.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/alloc.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -30,20 +30,15 @@
#define OCFS2_ALLOC_H
int ocfs_add_to_bitmap_free_head(ocfs_super *osb,
- ocfs_bitmap_free_head *f,
- __u32 len, __u32 fileoff,
- __u32 nodenum, __u32 type);
+ ocfs_bitmap_free_head *f,
+ u32 len, u32 fileoff,
+ u32 nodenum, u64 blkno, u32 type);
+struct _ocfs2_alloc_context;
int ocfs_allocate_extent(ocfs_super *osb, struct buffer_head *fe_bh,
ocfs_journal_handle *handle,
u64 blkno, u32 new_clusters,
- struct inode *inode);
-int ocfs_alloc_node_block(ocfs_super *osb, __u64 FileSize,
- __u64 *DiskOffset, __u64 *file_off,
- __u32 NodeNum, __u32 Type,
- ocfs_journal_handle *handle);
-int ocfs_find_space(ocfs_super *osb, __u64 file_size,
- u32 *cluster_off, u32 *cluster_count, int sysfile,
- ocfs_journal_handle *handle);
+ struct inode *inode,
+ struct _ocfs2_alloc_context *meta_ac);
int ocfs_free_extents_for_truncate(ocfs_super *osb,
ocfs2_dinode *fe,
ocfs_journal_handle *handle,
@@ -67,4 +62,56 @@
ocfs_bitmap_free_head **bits_to_free);
void ocfs_shutdown_local_alloc(ocfs_super *osb);
+typedef struct _ocfs2_alloc_context {
+ struct inode *ac_inode; /* which bitmap are we allocating from? */
+ struct buffer_head *ac_bh; /* file entry bh */
+ u32 ac_bits_wanted;
+ u32 ac_bits_given;
+#define OCFS_AC_USE_LOCAL 1
+#define OCFS_AC_USE_MAIN 2
+#define OCFS_AC_USE_INODE 3
+#define OCFS_AC_USE_META 4
+ u32 ac_which;
+ ocfs_journal_handle *ac_handle; /* debugging mostly. */
+} ocfs2_alloc_context;
+
+void ocfs_free_alloc_context(ocfs2_alloc_context *ac);
+static inline int ocfs_alloc_context_bits_left(ocfs2_alloc_context *ac)
+{
+ return(ac->ac_bits_wanted - ac->ac_bits_given);
+}
+int ocfs_reserve_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ u32 bits_wanted,
+ ocfs2_alloc_context **ac);
+int ocfs_claim_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 min_bits,
+ u32 *bit_off,
+ u32 *num_bits);
+int ocfs_num_free_extents(ocfs_super *osb,
+ struct inode *inode,
+ ocfs2_dinode *fe);
+/* how many new metadata chunks would an allocation need at maximum? */
+static inline int ocfs2_extend_meta_needed(ocfs2_dinode *fe)
+{
+ /*
+ * Rather than do all the work of determining how much we need
+ * (involves a ton of reads and locks), just ask for the
+ * maximal limit. That's a tree depth shift. So, one block for
+ * level of the tree (current l_tree_depth), one block for the
+ * new tree_depth==0 extent_block, and one block at the new
+ * top-of-the tree.
+ */
+ return(fe->id2.i_list.l_tree_depth + 2);
+}
+
+/* This is for local alloc ONLY. Others should use the generic apis
+ * above. */
+int ocfs_reserve_main_bitmap_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ u32 bits_wanted,
+ ocfs2_alloc_context *ac);
+
#endif /* OCFS2_ALLOC_H */
Modified: trunk/src/aops.c
===================================================================
--- trunk/src/aops.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/aops.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -539,8 +539,7 @@
blocks for the file. */
if (create && vbo_max > OCFS_I(inode)->ip_alloc_size) {
/* WARNING: How much do we really want to extend the file? */
- status = ocfs_extend_file(osb, vbo_max,
- NULL, inode, NULL, 0, NULL);
+ status = ocfs_extend_file(osb, inode, vbo_max);
if (status < 0) {
status = -ENOSPC;
LOG_ERROR_STR("ocfs_direct_IO_get_blocks: failed to extend the file!");
Modified: trunk/src/bitmap.c
===================================================================
--- trunk/src/bitmap.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/bitmap.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -165,27 +165,42 @@
/*
* ocfs_find_clear_bits()
*
- * sysonly is passed # bits in bitmap that are rserved for system file space
- * in case we have a disk full.
+ * if you don't pass in best_fit_bits, we only try contig and give you
+ * -ENOSPC otherwise.
*
+ * if you pass in best_fit_bits we return -ENOSPC only if we're
+ * completely full, otherwise we'll always set best_fit_bits to at max
+ * bits_wanted, and of course *bitoff to your starting offset.
*/
int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm * bitmap,
- __u32 numBits, __u32 offset, __u32 sysonly)
+ u32 bits_wanted, u32 *bitoff,
+ u32 *best_fit_bits)
{
+ int status = 0;
__u32 globalsize, globaloff, localstart, lastbh;
__u32 size = OCFS_BITS_IN_CHUNK(osb->sb);
- __u32 bitoff = 0, count = 0;
+ __u32 count = 0;
+ u32 largest_start, largest_size;
void *buffer;
int c;
struct buffer_head *currbh = NULL;
- LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u, %u)\n", osb, bitmap, numBits,
- offset, sysonly);
+ LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u)\n", osb, bitmap, bits_wanted);
- globalsize = bitmap->validbits - sysonly;
+ /* We keep track of the biggest contiguous chunk we've seen so
+ * far in these two variables. If we never completely fulfull
+ * the request, we can pass them back for a best try. */
+ largest_start = 0;
+ largest_size = 0;
+
+ if (best_fit_bits)
+ *best_fit_bits = 0;
+
+ *bitoff = 0;
+ globalsize = bitmap->validbits;
lastbh = ocfs_blocks_for_bits(osb->sb, globalsize) - 1;
- globaloff = offset;
+ globaloff = 0;
ocfs_bitmap_block_for_off(osb->sb, globaloff, &c, &localstart);
if (lastbh == 0)
@@ -199,11 +214,11 @@
"localstart=%u\n", c, lastbh, size, localstart);*/
buffer = currbh->b_data;
- while ((bitoff = find_next_zero_bit(buffer,
+ while ((*bitoff = find_next_zero_bit(buffer,
OCFS_BITS_IN_CHUNK(osb->sb),
localstart)) != -1) {
- /*LOG_TRACE_ARGS("c=%u, globaloff=%u, bitoff=%u, "
- "localstart=%u\n", c, globaloff, bitoff,
+ /*LOG_TRACE_ARGS("c=%u, globaloff=%u, *bitoff=%u, "
+ "localstart=%u\n", c, globaloff, *bitoff,
localstart);*/
/* find_next_zero_bit returns:
@@ -211,10 +226,10 @@
some number < size: at the next zero bit
localstart: if the current one is a zero
*/
- if (bitoff >= size) {
+ if (*bitoff >= size) {
nextbh:
/* we've hit the end of our bh. */
- /*LOG_TRACE_ARGS("bitoff >= size (%u)\n", bitoff,c);*/
+ /*LOG_TRACE_ARGS("*bitoff >= size (%u)\n", *bitoff,c);*/
/* if it's the last bh, then quit the loop */
if (c == lastbh) {
@@ -224,7 +239,7 @@
}
/* otherwise, reset localstart and switch bhs
* and continue */
- localstart = bitoff = 0;
+ localstart = *bitoff = 0;
c++;
currbh = bitmap->chunk[c];
buffer = currbh->b_data;
@@ -236,37 +251,42 @@
continue;
}
- if (!ocfs_test_allocatable(bitoff, currbh)) {
+ if (!ocfs_test_allocatable(*bitoff, currbh)) {
/* We found a zero, but we can't use it as it
* hasn't been put to disk yet! */
count = 0;
- localstart = bitoff + 1;
+ localstart = *bitoff + 1;
/* In doing this, we might go over our current bh. */
if (localstart >= size)
goto nextbh;
globaloff =
ocfs_bitmap_off_for_block(osb->sb, c,
- bitoff) + 1;
- } else if (bitoff == localstart) {
- /*LOG_TRACE_ARGS("bitoff == localstart (%u)\n",
- bitoff);*/
+ *bitoff) + 1;
+ } else if (*bitoff == localstart) {
+ /*LOG_TRACE_ARGS("*bitoff == localstart (%u)\n",
+ *bitoff);*/
/* cool, we have another zero! */
count++;
localstart++;
globaloff++;
} else {
- /*LOG_TRACE_ARGS("bitoff (%u) != localstart (%u)\n",
- bitoff, localstart);*/
+ /*LOG_TRACE_ARGS("*bitoff (%u) != localstart (%u)\n",
+ *bitoff, localstart);*/
/* we had to skip over some ones */
count = 1;
globaloff =
ocfs_bitmap_off_for_block(osb->sb, c,
- bitoff) + 1;
- localstart = bitoff + 1;
+ *bitoff) + 1;
+ localstart = *bitoff + 1;
}
- if (count == numBits) {
+ if (count > largest_size) {
+ largest_size = count;
+ largest_start = globaloff - count;
+ }
+
+ if (count == bits_wanted) {
/* we've found everything we wanted. */
LOG_TRACE_ARGS("Found it all! (count=%u)\n", count);
buffer = NULL;
@@ -274,13 +294,19 @@
}
}
- if (count == numBits)
- bitoff = globaloff - count;
- else
- bitoff = -1;
+ if (count == bits_wanted) {
+ *bitoff = globaloff - count;
+ if (best_fit_bits)
+ *best_fit_bits = bits_wanted;
+ } else if (best_fit_bits && largest_size) {
+#warning "can we go off the end of the bitmap here?"
+ *best_fit_bits = largest_size;
+ *bitoff = largest_start;
+ } else
+ status = -ENOSPC;
- LOG_EXIT_ULONG ((unsigned long)bitoff);
- return bitoff;
+ LOG_EXIT_STATUS(status);
+ return(status);
} /* ocfs_find_clear_bits */
/*
Modified: trunk/src/bitmap.h
===================================================================
--- trunk/src/bitmap.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/bitmap.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -33,8 +33,8 @@
void ocfs_clear_bits(struct super_block *sb,
ocfs_journal_handle *handle, ocfs_alloc_bm *bitmap,
__u32 start, __u32 num);
-int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm *bitmap,
- __u32 numBits, __u32 offset, __u32 sysonly);
+int ocfs_find_clear_bits(ocfs_super *osb, ocfs_alloc_bm * bitmap,
+ u32 numBits, u32 *bitoff, u32 *best_fit_bits);
void ocfs_initialize_bitmap(struct super_block *sb,
ocfs_alloc_bm *bitmap, __u32 validbits,
__u32 allocbits);
Modified: trunk/src/dcache.c
===================================================================
--- trunk/src/dcache.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/dcache.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -38,9 +38,10 @@
#include "ocfs_log.h"
#include "ocfs.h"
+#include "alloc.h"
#include "dcache.h"
+#include "file.h"
#include "vote.h"
-#include "file.h"
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DCACHE
Modified: trunk/src/dir.c
===================================================================
--- trunk/src/dir.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/dir.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -49,10 +49,14 @@
#include "ocfs_log.h"
#include "ocfs.h"
+#include "alloc.h"
#include "dir.h"
#include "dlm.h"
+#include "file.h"
#include "inode.h"
+#include "ocfs_journal.h"
#include "namei.h"
+#include "suballoc.h"
#include "util.h"
#include "buffer_head_io.h"
@@ -63,6 +67,10 @@
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
+static int ocfs_extend_dir(ocfs_super *osb,
+ struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ struct buffer_head **new_de_bh);
/*
* ocfs_readdir()
*
@@ -79,6 +87,7 @@
struct super_block * sb = inode->i_sb;
int have_disk_lock = 0;
ocfs_super *osb = OCFS_SB(sb);
+ int have_sem = 0;
LOG_SET_CONTEXT(READDIR);
@@ -87,11 +96,6 @@
stored = 0;
bh = NULL;
- /* NOTE: We only take a write lock here because of the
- * acquire_lock call. We should investigate whether taking a
- * read lock is such a bad idea in this case... */
- down_write(&OCFS_I(inode)->ip_io_sem);
-
error = ocfs_acquire_lock_ro(osb, inode);
if (error < 0) {
if (error != -EINTR)
@@ -101,12 +105,14 @@
goto bail;
}
have_disk_lock = 1;
+ down_read(&OCFS_I(inode)->ip_io_sem);
+ have_sem = 1;
offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) {
blk = (filp->f_pos) >> sb->s_blocksize_bits;
- bh = ocfs_bread (NULL, inode, blk, 0, &err, 0);
+ bh = ocfs_bread (inode, blk, &err, 0);
if (!bh) {
LOG_ERROR_ARGS ("directory #%llu contains a hole at offset %lu\n",
OCFS_I(inode)->ip_blkno,
@@ -121,8 +127,9 @@
if (!offset) {
for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
i > 0; i--) {
- tmp = ocfs_bread (NULL, inode, ++blk, 0, &err, 1);
- brelse (tmp);
+ tmp = ocfs_bread (inode, ++blk, &err, 1);
+ if (tmp)
+ brelse (tmp);
}
}
@@ -198,6 +205,8 @@
stored = 0;
bail:
+ if (have_sem)
+ up_read(&OCFS_I(inode)->ip_io_sem);
if (have_disk_lock) {
error = ocfs_release_lock_ro (osb, inode);
@@ -205,8 +214,6 @@
LOG_ERROR_STATUS (error);
}
- up_write(&OCFS_I(inode)->ip_io_sem);
-
LOG_EXIT_STATUS(stored);
LOG_CLEAR_CONTEXT();
return stored;
@@ -239,6 +246,7 @@
LOG_ERROR_STATUS (status);
goto leave;
}
+ down_read(&OCFS_I(inode)->ip_io_sem);
lock_acq = 1;
}
@@ -255,6 +263,7 @@
if (take_lock && lock_acq)
{
+ up_read(&OCFS_I(inode)->ip_io_sem);
tmpstat = ocfs_release_lock_ro (osb, inode);
if (tmpstat < 0) {
LOG_ERROR_STATUS (tmpstat);
@@ -278,7 +287,7 @@
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-int empty_dir(struct inode *inode)
+int ocfs_empty_dir(struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -289,7 +298,7 @@
sb = inode->i_sb;
if ((inode->i_size <
(OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
- !(bh = ocfs_bread (NULL, inode, 0, 0, &err, 0))) {
+ !(bh = ocfs_bread (inode, 0, &err, 0))) {
LOG_ERROR_ARGS ("bad directory (dir #%llu) - no data block\n",
OCFS_I(inode)->ip_blkno);
return 1;
@@ -312,8 +321,8 @@
while (offset < inode->i_size ) {
if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) {
brelse (bh);
- bh = ocfs_bread(NULL, inode,
- offset >> sb->s_blocksize_bits, 0, &err, 0);
+ bh = ocfs_bread(inode,
+ offset >> sb->s_blocksize_bits, &err, 0);
if (!bh) {
LOG_ERROR_ARGS ("directory #%llu contains a hole at offset %lu\n",
OCFS_I(inode)->ip_blkno, offset);
@@ -337,3 +346,279 @@
brelse (bh);
return 1;
}
+
+/* returns a bh of the 1st new block in the allocation. */
+int ocfs_do_extend_dir(struct super_block *sb,
+ ocfs_journal_handle *handle,
+ struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ ocfs2_alloc_context *data_ac,
+ ocfs2_alloc_context *meta_ac,
+ struct buffer_head **new_bh)
+{
+ int status;
+ s64 vbo, lbo;
+ int extend;
+
+ down(&OCFS_I(dir)->ip_sem);
+ extend = (dir->i_size == OCFS_I(dir)->ip_alloc_size);
+ up(&OCFS_I(dir)->ip_sem);
+
+ if (extend) {
+ status = ocfs_extend_allocation(OCFS_SB(sb), dir, 1,
+ parent_fe_bh, handle,
+ data_ac, meta_ac, NULL);
+ OCFS_ASSERT(status != -EAGAIN);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+
+ vbo = (s64) dir->i_size;
+ lbo = 0;
+
+ status = ocfs_lookup_file_allocation(OCFS_SB(sb), vbo, &lbo,
+ sb->s_blocksize, NULL, dir, 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ *new_bh = sb_getblk(sb, lbo >> sb->s_blocksize_bits);
+ if (!*new_bh) {
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
+/* assumes you already have a cluster lock on the directory. */
+static int ocfs_extend_dir(ocfs_super *osb,
+ struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ struct buffer_head **new_de_bh)
+{
+ int status = 0;
+ ocfs2_dinode *fe = (ocfs2_dinode *) parent_fe_bh->b_data;
+ int credits, num_free_extents;
+ ocfs2_alloc_context *data_ac = NULL;
+ ocfs2_alloc_context *meta_ac = NULL;
+ ocfs_journal_handle *handle = NULL;
+ struct buffer_head *new_bh = NULL;
+ struct ocfs2_dir_entry * de;
+ struct super_block *sb = osb->sb;
+
+ LOG_ENTRY();
+
+ LOG_TRACE_ARGS("extending dir %llu (i_size = %llu)\n",
+ OCFS_I(dir)->ip_blkno, dir->i_size);
+
+ handle = ocfs_alloc_handle(osb);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+
+ /* dir->i_size is always block aligned. */
+ down(&OCFS_I(dir)->ip_sem);
+ if (dir->i_size == OCFS_I(dir)->ip_alloc_size) {
+ up(&OCFS_I(dir)->ip_sem);
+ num_free_extents = ocfs_num_free_extents(osb, dir, fe);
+ if (num_free_extents < 0) {
+ status = num_free_extents;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (!num_free_extents) {
+ status = ocfs_reserve_new_metadata(osb, handle,
+ fe, &meta_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ }
+
+ status = ocfs_reserve_bits(osb, handle, 1, &data_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ credits = ocfs_calc_extend_credits(sb, 1);
+ } else {
+ up(&OCFS_I(dir)->ip_sem);
+ /* one for the dinode, one for the new block. */
+ credits = 2;
+ }
+
+ handle = ocfs_start_trans(osb, handle, credits);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto bail;
+ }
+
+ status = ocfs_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
+ data_ac, meta_ac, &new_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ set_buffer_uptodate(new_bh);
+ SET_BH_SEQNUM(dir, new_bh);
+ status = ocfs_journal_access(handle, new_bh,
+ OCFS_JOURNAL_ACCESS_CREATE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(new_bh->b_data, 0, sb->s_blocksize);
+ de = (struct ocfs2_dir_entry *) new_bh->b_data;
+ de->inode = 0;
+ de->rec_len = le16_to_cpu(sb->s_blocksize);
+ status = ocfs_journal_dirty(handle, new_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ dir->i_size += dir->i_sb->s_blocksize;
+ dir->i_blocks += 1;
+ status = ocfs_mark_inode_dirty(handle, dir, parent_fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ *new_de_bh = new_bh;
+ get_bh(*new_de_bh);
+bail:
+ if (handle) {
+ if (status < 0)
+ ocfs_abort_trans(handle);
+ else
+ ocfs_commit_trans(handle);
+ }
+ if (data_ac)
+ ocfs_free_alloc_context(data_ac);
+ if (meta_ac)
+ ocfs_free_alloc_context(meta_ac);
+
+ if ((status < 0) && new_bh)
+ brelse(new_bh);
+
+ LOG_EXIT_STATUS (status);
+ return status;
+} /* ocfs_extend_dir */
+
+/*
+ * Search the dir for a good spot, extending it if necessary. The
+ * block containing an appropriate record is returned in ret_de_bh.
+ */
+int ocfs_prepare_dir_for_insert(ocfs_super *osb,
+ struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ const char *name,
+ int namelen,
+ struct buffer_head **ret_de_bh)
+{
+ unsigned long offset;
+ struct buffer_head * bh = NULL;
+ unsigned short rec_len;
+ ocfs2_dinode *fe;
+ struct ocfs2_dir_entry * de;
+ struct super_block * sb;
+ int status;
+
+ LOG_ENTRY();
+
+ LOG_TRACE_ARGS("getting ready to insert namelen %d into dir %llu\n",
+ namelen, OCFS_I(dir)->ip_blkno);
+
+ OCFS_ASSERT(S_ISDIR(dir->i_mode));
+ fe = (ocfs2_dinode *) parent_fe_bh->b_data;
+ OCFS_ASSERT(fe->i_size == dir->i_size);
+
+ sb = dir->i_sb;
+
+ if (!namelen) {
+ status = -EINVAL;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bh = ocfs_bread (dir, 0, &status, 0);
+ if (!bh) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ rec_len = OCFS2_DIR_REC_LEN(namelen);
+ offset = 0;
+ de = (struct ocfs2_dir_entry *) bh->b_data;
+ while (1) {
+ if ((char *)de >= sb->s_blocksize + bh->b_data) {
+ brelse (bh);
+ bh = NULL;
+
+ if (dir->i_size <= offset) {
+ status = ocfs_extend_dir(osb,
+ dir,
+ parent_fe_bh,
+ &bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ OCFS_ASSERT(bh);
+ *ret_de_bh = bh;
+ get_bh(*ret_de_bh);
+ goto bail;
+ }
+ bh = ocfs_bread (dir,
+ offset >> sb->s_blocksize_bits,
+ &status,
+ 0);
+ if (!bh) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ /* move to next block */
+ de = (struct ocfs2_dir_entry *) bh->b_data;
+ }
+ if (!ocfs_check_dir_entry (dir, de, bh, offset)) {
+ status = -ENOENT;
+ goto bail;
+ }
+ if (ocfs_match (namelen, name, de)) {
+ status = -EEXIST;
+ goto bail;
+ }
+ if (((le64_to_cpu(de->inode) == 0) &&
+ (le16_to_cpu(de->rec_len) >= rec_len)) ||
+ (le16_to_cpu(de->rec_len) >=
+ (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
+ /* Ok, we found a spot. Return this bh and let
+ * the caller actually fill it in. */
+ *ret_de_bh = bh;
+ get_bh(*ret_de_bh);
+ status = 0;
+ goto bail;
+ }
+ offset += le16_to_cpu(de->rec_len);
+ de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
+ }
+
+ status = 0;
+bail:
+ if (bh)
+ brelse(bh);
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
Modified: trunk/src/dir.h
===================================================================
--- trunk/src/dir.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/dir.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -29,12 +29,24 @@
#ifndef OCFS2_DIR_H
#define OCFS2_DIR_H
-int empty_dir(struct inode *inode); /* FIXME: to namei.c */
+int ocfs_empty_dir(struct inode *inode); /* FIXME: to namei.c */
int ocfs_find_files_on_disk(ocfs_super *osb, const char *name,
int namelen, u64 *blkno,
struct inode *inode, int take_lock,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent);
int ocfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
-
+int ocfs_prepare_dir_for_insert(ocfs_super *osb,
+ struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ const char *name,
+ int namelen,
+ struct buffer_head **ret_de_bh);
+int ocfs_do_extend_dir(struct super_block *sb,
+ ocfs_journal_handle *handle,
+ struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ ocfs2_alloc_context *data_ac,
+ ocfs2_alloc_context *meta_ac,
+ struct buffer_head **new_bh);
#endif /* OCFS2_DIR_H */
Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/dlm.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -218,6 +218,7 @@
OCFS_ASSERT(lock_type != OCFS_LKM_NLMODE);
OCFS_ASSERT(inode);
OCFS_ASSERT(bh);
+ OCFS_ASSERT(!journal_current_handle());
lock_id = OCFS_I(inode)->ip_blkno;
LOG_TRACE_ARGS("lock_id = %llu\n", lock_id);
@@ -232,23 +233,31 @@
}
updated = 0;
+again:
+ /* yay, lock ordering. at least we don't hold io sem across
+ * the whole thing now. */
+ down_read(&OCFS_I(inode)->ip_io_sem);
ocfs_acquire_lockres_write (inode);
-again:
LOG_TRACE_ARGS("attempting to get lock, pass: %d\n", ++k);
+ /* if updated = 1 then we've read a valid bh so skip the
+ * update_lockres if we can trust it. */
+ if (updated && (lockres->master_node_num != osb->node_num))
+ updated = 0;
+
if (!updated) {
status = ocfs_update_lockres(osb, *bh, inode, 1);
if (status < 0) {
+ up_read(&OCFS_I(inode)->ip_io_sem);
ocfs_release_lockres_write (inode);
LOG_ERROR_STATUS (status);
goto finally;
}
+ updated = 1;
}
+ up_read(&OCFS_I(inode)->ip_io_sem);
- /* alright, if we own it then no more updates are necessary. */
- if (lockres->master_node_num == osb->node_num)
- updated = 1;
reevaluate:
no_owner = (lockres->master_node_num == OCFS_INVALID_NODE_NUM);
@@ -388,11 +397,6 @@
LOG_ERROR_ARGS("Timed out acquiring lock for inode "
"%llu, retrying...\n", OCFS_I(inode)->ip_blkno);
ocfs_sleep (50);
- ocfs_acquire_lockres_write(inode);
- /* if we're going to jump back up, we want to update
- * if we're not the master... */
- if (lockres->master_node_num != osb->node_num)
- updated = 0;
goto again;
}
goto finally;
Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/file.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -46,6 +46,7 @@
#include "sysfile.h"
#include "inode.h"
#include "ioctl.h"
+#include "suballoc.h"
#include "util.h"
#include "ocfs_journal.h"
@@ -55,6 +56,11 @@
static int ocfs2_zero_extend(struct inode *inode);
+static unsigned int ocfs_calc_overalloc_bits(ocfs_super *osb,
+ struct file *filp,
+ ocfs2_dinode *fe,
+ u64 new_size);
+
static void ocfs_fe_set_attributes(ocfs2_dinode *fe, struct iattr *attr)
{
if (attr->ia_valid & ATTR_SIZE)
@@ -403,7 +409,7 @@
struct inode *inode)
{
int status = 0;
- ocfs2_dinode *fileEntry = NULL;
+ ocfs2_dinode *fe = NULL;
struct buffer_head *bh = NULL;
ocfs_journal_handle *handle = NULL;
@@ -427,9 +433,9 @@
LOG_ERROR_STATUS (status);
goto leave;
}
-
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_FILE_UPDATE_OIN,
inode);
+ ocfs_handle_add_inode(handle, inode);
/* Start a transaction - need a minimal amount of block credits (1) */
handle = ocfs_start_trans(osb, handle, 1);
@@ -437,10 +443,11 @@
LOG_ERROR_STATUS(status);
goto leave;
}
+ ocfs_handle_set_always_commits(handle, 1);
- fileEntry = (ocfs2_dinode *) bh->b_data;
+ fe = (ocfs2_dinode *) bh->b_data;
- if (!IS_VALID_FILE_ENTRY(fileEntry)) {
+ if (!IS_VALID_FILE_ENTRY(fe)) {
LOG_ERROR_ARGS("Invalid fe at blkno %llu",
OCFS_I(inode)->ip_blkno);
status = -EFAIL;
@@ -453,11 +460,11 @@
goto leave;
}
- fileEntry = (ocfs2_dinode *) bh->b_data;
+ fe = (ocfs2_dinode *) bh->b_data;
- fileEntry->i_mtime = OCFS_CURRENT_TIME;
+ fe->i_mtime = OCFS_CURRENT_TIME;
- ocfs_fe_set_attributes(fileEntry, attr);
+ ocfs_fe_set_attributes(fe, attr);
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
@@ -466,14 +473,9 @@
}
leave:
+ if (handle)
+ ocfs_commit_trans(handle);
- if (handle) {
- if (status < 0)
- ocfs_abort_trans(handle);
- else
- ocfs_commit_trans(handle);
- }
-
if (bh != NULL)
brelse(bh);
@@ -626,10 +628,7 @@
LOG_TRACE_ARGS
("Writing at EOF, will need more allocation: have=%llu, "
"need=%llu\n", OCFS_I(inode)->ip_alloc_size, newsize);
- down_write(&OCFS_I(inode)->ip_io_sem);
- status = ocfs_extend_file(osb, newsize,
- NULL, inode, NULL, 0, NULL);
- up_write(&OCFS_I(inode)->ip_io_sem);
+ status = ocfs_extend_file(osb, inode, newsize);
if (status < 0) {
if (status != -EINTR && status != -ENOSPC) {
LOG_ERROR_STATUS (status);
@@ -922,6 +921,8 @@
/* if we updated correctly then we can update the alloc_size */
OCFS_I(inode)->ip_alloc_size = new_alloc_size;
OCFS_I(inode)->ip_mmu_private = fe->i_size;
+ ocfs_extent_map_destroy(&OCFS_I(inode)->ip_ext_map);
+ ocfs_extent_map_init (&OCFS_I(inode)->ip_ext_map);
up (&(OCFS_I(inode)->ip_sem));
leave:
@@ -980,264 +981,348 @@
return res;
}
-
-/* ocfs_extend_file()
- *
+/*
+ * extend allocation only here.
+ * we'll update all the disk stuff, and oip->alloc_size
+ *
+ * expect stuff to be locked, a transaction started and enough data /
+ * metadata reservations in the contexts. I'll return -EAGAIN, if we
+ * run out of transaction credits, so the caller can restart us.
*/
-int ocfs_extend_file(ocfs_super *osb, __u64 file_size,
- ocfs_journal_handle *passed_handle,
- struct inode *inode, struct iattr *attr,
- int system_file, struct buffer_head *fe_bh)
+int ocfs_extend_allocation(ocfs_super *osb,
+ struct inode *inode,
+ u32 clusters_to_add,
+ struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *data_ac,
+ ocfs2_alloc_context *meta_ac,
+ enum ocfs2_alloc_restarted *reason)
{
int status = 0;
- ocfs2_dinode *fe;
- __u64 tempOffset = 0;
- __u64 current_alloc;
- __u64 alloc_size;
- u32 bitmapOffset = 0;
- u32 numClustersAlloc = 0;
- u64 block_off;
- u64 num_blocks;
- struct buffer_head *bh = NULL;
- ocfs_journal_handle *handle = NULL;
- int credits;
- struct inode *ext_alloc_inode = NULL;
+ int credits_needed, free_extents, multi_pass;
+ ocfs2_dinode *fe = (ocfs2_dinode *) fe_bh->b_data;
+ u32 bit_off, num_bits;
+ u64 block;
- LOG_ENTRY_ARGS("(file_size=%llu, system=%s)\n",
- file_size, system_file?"yes":"no");
+ OCFS_ASSERT(clusters_to_add);
- if (!inode)
- BUG();
-
-#ifdef PURE_EVIL
- if (evil_filename_check(EVIL_INODE, inode)) {
- LOG_ERROR_ARGS("EVIL EXTEND: file_size=%llu, oldsize=%llu\n",
- file_size, inode->i_size);
+ multi_pass = 0;
+again:
+ free_extents = ocfs_num_free_extents(osb, inode, fe);
+ if (free_extents < 0) {
+ status = free_extents;
+ LOG_ERROR_STATUS(status);
+ goto leave;
}
-#endif
- if (file_size == 0)
+ /* there are two cases which could cause us to EAGAIN in the
+ * we-need-more-metadata case:
+ * 1) we haven't reserved *any*
+ * 2) we are so fragmented, we've needed to add metadata too
+ * many times. */
+ if (!free_extents && !meta_ac) {
+ LOG_TRACE_STR("we haven't reserved any metadata!");
+ status = -EAGAIN;
+ if (reason)
+ *reason = RESTART_META;
goto leave;
+ } else if ((!free_extents)
+ && (ocfs_alloc_context_bits_left(meta_ac)
+ < ocfs2_extend_meta_needed(fe))) {
+ LOG_TRACE_STR("filesystem is really fragmented...");
+ status = -EAGAIN;
+ if (reason)
+ *reason = RESTART_META;
+ goto leave;
+ }
- if (passed_handle == NULL) {
- handle = ocfs_alloc_handle(osb);
- if (handle == NULL) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto leave;
- }
+ /* do we have enough credits for another single extend, of
+ * what's left? */
+ /* fe + main bitmap fe + main bitmap bits */
+ if (!multi_pass)
+ credits_needed = 1 + 1 +
+ ocfs_blocks_for_bits(osb->sb, clusters_to_add);
+ else /* if we've already extended once, then we've already reserved. */
+ credits_needed = ocfs_blocks_for_bits(osb->sb, clusters_to_add);
+ if (!free_extents) {
+ /* will need to extend the file:
+ * metadata suballoc fe + metadata suballoc bitmap
+ * + actual metadata blocks. */
+ credits_needed += 1 + 1 + ocfs2_extend_meta_needed(fe);
+ }
+ multi_pass = 1;
- status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_FILE_EXTEND, &bh, inode);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- ocfs_handle_add_lock(handle,
- OCFS_LKM_EXMODE,
- FLAG_FILE_EXTEND|FLAG_FILE_UPDATE_OIN,
- inode);
- } else {
- handle = passed_handle;
- /* fe_bh is optional if you already have a transaction open. */
- if (fe_bh)
- bh = fe_bh;
- else {
- status = ocfs_read_bh(osb,
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- &bh, OCFS_BH_CACHED, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- }
+ if (credits_needed > (handle->max_buffs - handle->num_buffs)) {
+ LOG_TRACE_ARGS("Not enough credits for this extend: need %u, "
+ "have %u\n", credits_needed,
+ (handle->max_buffs - handle->num_buffs));
+ status = -EAGAIN;
+ if (reason)
+ *reason = RESTART_TRANS;
+ goto leave;
}
- fe = (ocfs2_dinode *) bh->b_data;
- OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
+ status = ocfs_claim_bits(osb, handle, data_ac, 1, &bit_off, &num_bits);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
- current_alloc = (u64)fe->i_clusters << osb->s_clustersize_bits;
- alloc_size = file_size - current_alloc;
- LOG_TRACE_ARGS("current_alloc=%llu, alloc_size=%llu\n",
- current_alloc, alloc_size);
+ OCFS_ASSERT(num_bits <= clusters_to_add);
- if (passed_handle == NULL) {
- credits = ocfs_calc_extend_credits(osb->sb,
- (__u32) alloc_size);
+ /* reserve our write early -- allocate_extent may update the inode */
+ status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
- handle = ocfs_start_trans(osb, handle, credits);
- if (handle == NULL) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto leave;
- }
+ block = ocfs_clusters_to_blocks(osb->sb, bit_off);
+ LOG_TRACE_ARGS("Allocating %u clusters at block %u for inode %llu\n",
+ num_bits, bit_off, OCFS_I(inode)->ip_blkno);
+ status = ocfs_allocate_extent(osb, fe_bh, handle, block, num_bits,
+ inode, meta_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
}
- status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ fe->i_clusters += num_bits;
+ down (&(OCFS_I(inode)->ip_sem));
+ OCFS_I(inode)->ip_alloc_size =
+ (u64)fe->i_clusters << osb->s_clustersize_bits;
+ up (&(OCFS_I(inode)->ip_sem));
+
+ status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- if (file_size <= (__s64)current_alloc)
- goto no_alloc;
+ clusters_to_add -= num_bits;
- alloc_size = file_size - current_alloc;
+ if (clusters_to_add) {
+ LOG_TRACE_ARGS("need to alloc once more, clusters = %u, "
+ "wanted = %u\n", fe->i_clusters,
+ clusters_to_add);
+ goto again;
+ }
- /* TODO: We can add something here so that after 2-3 allocations,
- * we give a lot more disk space to the file than the alloc_size so
- * in order to try to use the Extents of File Entry only and ofcourse
- * the file will have more contigous disk space. */
+leave:
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
- if (!system_file) {
- int one_percentish_bits = 7;
- __u64 tempSize = current_alloc;
+static unsigned int ocfs_calc_overalloc_bits(ocfs_super *osb,
+ struct file *filp,
+ ocfs2_dinode *fe,
+ u64 new_size)
+{
+#warning "finish this"
+ /* TODO: We will keep a small history of allocs on the filp
+ * and calculate a reasonable overalloc based on that data
+ * here. */
+ return(0);
+}
- if (tempSize > ONE_MEGA_BYTE)
- tempSize = ONE_MEGA_BYTE;
- alloc_size += (tempSize * 2);
+/* ocfs_extend_file()
+ *
+ * Ok, this function is heavy on the goto's - we need to clean it up a
+ * bit.
+ */
+int ocfs_extend_file(ocfs_super *osb,
+ struct inode *inode,
+ u64 new_i_size)
+{
+ int status = 0;
+ int restart_func = 0;
+ int skip_overalloc = 0;
+ int credits, num_free_extents;
+ unsigned int overalloc_bits = 0;
+ u32 clusters_to_add;
+ struct buffer_head *bh = NULL;
+ ocfs2_dinode *fe;
+ ocfs_journal_handle *handle = NULL;
+ ocfs2_alloc_context *data_ac = NULL;
+ ocfs2_alloc_context *meta_ac = NULL;
+ enum ocfs2_alloc_restarted why;
- if (alloc_size <
- (current_alloc >> one_percentish_bits)) {
- alloc_size = current_alloc >> one_percentish_bits;
- tempSize = alloc_size;
- // avoid using 64 bit mod
- while (tempSize > (10*ONE_MEGA_BYTE))
- tempSize -= (10*ONE_MEGA_BYTE);
- tempSize = (10*ONE_MEGA_BYTE) - tempSize;
- alloc_size += tempSize;
- }
+ LOG_ENTRY_ARGS("(new_i_size=%llu)\n", new_i_size);
+
+ /* setattr sometimes calls us like this. */
+ if (new_i_size == 0)
+ goto leave;
+
+restart_all:
+ handle = ocfs_alloc_handle(osb);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto leave;
}
- status = ocfs_find_space(osb, alloc_size, &bitmapOffset,
- &numClustersAlloc, system_file, handle);
- LOG_TRACE_ARGS("find_space: alloc_size=%llu, returned off=%u"
- ", num=%u\n", alloc_size, bitmapOffset,
- numClustersAlloc);
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_FILE_EXTEND,
+ &bh, inode);
if (status < 0) {
- if (status != -ENOSPC && status != -EINTR)
+ if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto leave;
}
+ ocfs_handle_add_lock(handle,
+ OCFS_LKM_EXMODE,
+ FLAG_FILE_EXTEND|FLAG_FILE_UPDATE_OIN,
+ inode);
+ ocfs_handle_add_inode(handle, inode);
- block_off = ocfs_clusters_to_blocks(osb->sb,
- bitmapOffset);
- num_blocks = ocfs_clusters_to_blocks(osb->sb,
- numClustersAlloc);
+ fe = (ocfs2_dinode *) bh->b_data;
+ OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
+ OCFS_ASSERT(new_i_size >= fe->i_size);
-#define OCFS_MAX_ZERO_BLOCKS (4096)
+ if (fe->i_size == new_i_size) {
+ OCFS_ASSERT(inode->i_size == new_i_size);
+ goto leave;
+ }
- if (system_file) {
- struct buffer_head **bhs = NULL;
- int i;
- sector_t block;
- u64 this_last;
+ clusters_to_add = ocfs_clusters_for_bytes(osb->sb, new_i_size)
+ - fe->i_clusters;
-// printk("Extending system inode %llu (from %u, adding %u)\n", fe->i_blkno, fe->i_clusters, numClustersAlloc);
- LOG_TRACE_ARGS("zeroing %llu blocks from offset %llu\n",
- num_blocks, block_off);
- bhs = kmalloc(OCFS_MAX_ZERO_BLOCKS * sizeof(struct buffer_head *),
- GFP_KERNEL);
- if (!bhs) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- memset(bhs, 0, OCFS_MAX_ZERO_BLOCKS *
- sizeof(struct buffer_head *));
+ LOG_TRACE_ARGS("extend inode %llu, new_i_size = %llu, i_size = %llu, "
+ "fe->i_clusters = %u, clusters_to_add = %u\n",
+ OCFS_I(inode)->ip_blkno, new_i_size, inode->i_size,
+ fe->i_clusters, clusters_to_add);
- block = block_off;
- while (block < (block_off + num_blocks)) {
- this_last = block + OCFS_MAX_ZERO_BLOCKS;
- if (this_last > (block_off + num_blocks))
- this_last = block_off + num_blocks;
+ if (!clusters_to_add)
+ goto do_start_trans;
- //LOG_TRACE_ARGS("block = %llu, this_last = %llu\n",
- // (unsigned long long) block, this_last);
- i = 0;
- while (block < this_last) {
- bhs[i] = sb_getblk(osb->sb, block);
- if (!bhs[i]) {
- status = -ENOMEM;
- LOG_ERROR_STATUS(status);
- break;
- }
- memset(bhs[i]->b_data, 0,
- osb->sb->s_blocksize);
- set_buffer_uptodate(bhs[i]);
- i++;
- block++;
- }
- if (status)
- break;
+ overalloc_bits = 0;
+ if (!skip_overalloc) {
+ overalloc_bits = ocfs_calc_overalloc_bits(osb,
+ NULL,
+ fe,
+ new_i_size);
+ clusters_to_add += overalloc_bits;
+ skip_overalloc = 1;
+ }
- //LOG_TRACE_ARGS("writing %d blocks\n", i);
- status = ocfs_write_bhs(osb, bhs, i, inode);
- if (status) {
- LOG_ERROR_STATUS(status);
- break;
- }
- for (i = 0; i < OCFS_MAX_ZERO_BLOCKS; i++)
- if (bhs[i])
- brelse(bhs[i]);
- memset(bhs, 0, OCFS_MAX_ZERO_BLOCKS *
- sizeof(struct buffer_head *));
+ num_free_extents = ocfs_num_free_extents(osb,
+ inode,
+ fe);
+ if (num_free_extents < 0) {
+ status = num_free_extents;
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ if (!num_free_extents) {
+ status = ocfs_reserve_new_metadata(osb,
+ handle,
+ fe,
+ &meta_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
}
+ }
- for (i = 0; i < OCFS_MAX_ZERO_BLOCKS; i++)
- if (bhs[i])
- brelse(bhs[i]);
- kfree(bhs);
+ status = ocfs_reserve_bits(osb,
+ handle,
+ clusters_to_add,
+ &data_ac);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
+ goto leave;
}
- ext_alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
- if (!ext_alloc_inode) {
- status = -EFAIL;
- LOG_ERROR_STATUS(status);
+do_start_trans:
+ credits = ocfs_calc_extend_credits(osb->sb, clusters_to_add);
+ handle = ocfs_start_trans(osb, handle, credits);
+ if (handle == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
goto leave;
}
- ocfs_handle_add_inode(handle, ext_alloc_inode);
- status = ocfs_allocate_extent(osb, bh, handle, block_off,
- numClustersAlloc, inode);
+restarted_transaction:
+ /* reserve a write to the file entry early on - that we if we
+ * run out of credits in the allocation path, we can still
+ * update i_size. */
+ status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
- LOG_ERROR_STATUS (status);
+ LOG_ERROR_STATUS(status);
goto leave;
}
- /* update the total allocation size here */
- fe->i_clusters += numClustersAlloc;
+ if (!clusters_to_add)
+ goto no_alloc;
- down (&(OCFS_I(inode)->ip_sem));
- OCFS_I(inode)->ip_alloc_size =
- (u64)fe->i_clusters << osb->s_clustersize_bits;
- up (&(OCFS_I(inode)->ip_sem));
+ status = ocfs_extend_allocation(osb,
+ inode,
+ clusters_to_add,
+ bh,
+ handle,
+ data_ac,
+ meta_ac,
+ &why);
+ if ((status < 0) && (status != -EAGAIN)) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
- /* no need to do OCFS_SECTOR_ALIGN once the allocation size is
- * correct. */
+ if (status == -EAGAIN
+ && (new_i_size >
+ (fe->i_clusters << osb->s_clustersize_bits))) {
+
+ if (why == RESTART_META) {
+ LOG_TRACE_ARGS("restarting function.\n");
+ restart_func = 1;
+ } else {
+ OCFS_ASSERT(why == RESTART_TRANS);
+
+ /* update i_size in case we crash after the
+ * extend_trans */
+ fe->i_size = (u64) (fe->i_clusters << osb->s_clustersize_bits);
+ fe->i_mtime = OCFS_CURRENT_TIME;
+
+ status = ocfs_journal_dirty(handle, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ clusters_to_add =
+ ocfs_clusters_for_bytes(osb->sb, new_i_size)
+ - fe->i_clusters + overalloc_bits;
+ LOG_TRACE_ARGS("restarting transaction.\n");
+ /* TODO: This can be more intelligent. */
+ credits = ocfs_calc_extend_credits(osb->sb,
+ clusters_to_add);
+ status = ocfs_extend_trans(handle, credits);
+ if (status < 0) {
+ /* handle still has to be committed /
+ * aborted at this point. */
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ goto leave;
+ }
+ goto restarted_transaction;
+ }
+ }
+ status = 0;
+
no_alloc:
- /* Update tha file size and add the new one to old one. */
- fe->i_size = file_size;
+ /* this may not be the end of our allocation so only update
+ * i_size to what's appropriate. */
+ if (new_i_size > (fe->i_clusters << osb->s_clustersize_bits))
+ fe->i_size = fe->i_clusters << osb->s_clustersize_bits;
+ else
+ fe->i_size = new_i_size;
+#warning "is there a reason why we don't update i_blocks here?"
LOG_TRACE_ARGS("fe: i_clusters = %u, i_size=%llu\n",
fe->i_clusters, fe->i_size);
- /* NOTE: this is a bit of a hack; unlike regular files,
- * system files do not have another opportunity to update
- * the inode/i_private fields */
- if (system_file) {
- OCFS_I(inode)->ip_alloc_size = (u64)fe->i_clusters << osb->s_clustersize_bits;
- inode->i_size = fe->i_size;
- inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
- }
LOG_TRACE_ARGS("inode: ip_alloc_size=%llu, i_size=%llu\n",
OCFS_I(inode)->ip_alloc_size, inode->i_size);
- if (attr)
- ocfs_fe_set_attributes(fe, attr);
-
fe->i_mtime = OCFS_CURRENT_TIME;
- tempOffset = fe->i_blkno << osb->sb->s_blocksize_bits;
-
status = ocfs_journal_dirty(handle, bh);
if (status < 0) {
LOG_ERROR_STATUS (status);
@@ -1245,22 +1330,29 @@
}
leave:
- if ((passed_handle == NULL) && handle) {
+ if (handle) {
if (status < 0)
ocfs_abort_trans(handle);
else
ocfs_commit_trans(handle);
+ handle = NULL;
}
-
- if (bh != NULL)
- LOG_TRACE_ARGS("bh->b_count = %d\n",
- atomic_read(&(bh->b_count)));
- if (bh != NULL && fe_bh == NULL)
+ if (data_ac) {
+ ocfs_free_alloc_context(data_ac);
+ data_ac = NULL;
+ }
+ if (meta_ac) {
+ ocfs_free_alloc_context(meta_ac);
+ meta_ac = NULL;
+ }
+ if (bh) {
brelse(bh);
-
- if (ext_alloc_inode)
- iput(ext_alloc_inode);
-
+ bh = NULL;
+ }
+ if ((!status) && restart_func) {
+ restart_func = 0;
+ goto restart_all;
+ }
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_extend_file */
@@ -1277,7 +1369,6 @@
int status;
ocfs_super *osb = NULL;
struct super_block *sb = inode->i_sb;
- int extended = 0;
LOG_SET_CONTEXT(SETATTR);
@@ -1292,8 +1383,6 @@
}
#endif
- down_write(&OCFS_I(inode)->ip_io_sem);
-
if (!dentry->d_parent || !dentry->d_parent->d_inode) {
LOG_ERROR_STR ("bad inode or root inode");
goto bail;
@@ -1327,7 +1416,9 @@
if (attr->ia_valid & ATTR_SIZE) {
if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
+ down_read(&OCFS_I(inode)->ip_io_sem);
status = ocfs_verify_update_inode (osb, inode);
+ up_read(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_ERROR_STATUS (status);
LOG_TRACE_STR ("TODO: disable volume");
@@ -1340,12 +1431,9 @@
ocfs_truncate_inode_pages(inode, newsize);
status = ocfs_truncate_file(osb, newsize,
inode);
+ } else {
+ status = ocfs_extend_file(osb, inode, newsize);
}
- else {
- status = ocfs_extend_file(osb, newsize, NULL,
- inode, attr, 0, NULL);
- extended = 1;
- }
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
@@ -1354,10 +1442,6 @@
}
down (&(OCFS_I(inode)->ip_sem));
- if (inode->i_size > newsize) {
- ocfs_extent_map_destroy(&OCFS_I(inode)->ip_ext_map);
- ocfs_extent_map_init (&OCFS_I(inode)->ip_ext_map);
- }
inode->i_size = newsize;
inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
if (OCFS_I(inode)->ip_open_flags &
@@ -1366,9 +1450,7 @@
OCFS_I(inode)->ip_mmu_private = inode->i_size;
}
up (&(OCFS_I(inode)->ip_sem));
- up_write(&OCFS_I(inode)->ip_io_sem);
status = ocfs2_zero_extend(inode);
- down_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1376,20 +1458,16 @@
}
status = -EFAIL;
- if (!extended) {
- status = ocfs_change_file_attrib(osb, attr, inode);
- if (status < 0) {
- if (status != -EINTR)
- LOG_ERROR_STATUS (status);
- error = -EIO;
- goto bail;
- }
+ status = ocfs_change_file_attrib(osb, attr, inode);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ error = -EIO;
+ goto bail;
}
error = inode_setattr (inode, attr);
bail:
- up_write(&OCFS_I(inode)->ip_io_sem);
-
LOG_EXIT_INT (error);
LOG_CLEAR_CONTEXT();
Modified: trunk/src/file.h
===================================================================
--- trunk/src/file.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/file.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -33,10 +33,21 @@
extern struct file_operations ocfs_dops;
extern struct inode_operations ocfs_file_iops;
-int ocfs_extend_file(ocfs_super * osb, __u64 file_size,
- ocfs_journal_handle *passed_handle,
- struct inode *inode, struct iattr *attr,
- int system_file, struct buffer_head *fe_bh);
+enum ocfs2_alloc_restarted {
+ RESTART_TRANS = 0,
+ RESTART_META
+};
+int ocfs_extend_allocation(ocfs_super *osb,
+ struct inode *inode,
+ u32 clusters_to_add,
+ struct buffer_head *fe_bh,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *data_ac,
+ ocfs2_alloc_context *meta_ac,
+ enum ocfs2_alloc_restarted *reason);
+int ocfs_extend_file(ocfs_super *osb,
+ struct inode *inode,
+ u64 new_i_size);
int ocfs_inode_fill_ext_map(ocfs_super *osb, struct buffer_head *fe_bh,
struct inode *inode);
int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/inode.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -47,6 +47,7 @@
#include "inode.h"
#include "lockres.h"
#include "namei.h"
+#include "suballoc.h"
#include "super.h"
#include "symlink.h"
#include "sysfile.h"
@@ -537,10 +538,12 @@
void ocfs_delete_inode(struct inode *inode)
{
struct inode *orphan_dir_inode = NULL;
+ struct inode *inode_alloc_inode = NULL;
ocfs_journal_handle *handle = NULL;
ocfs_super *osb = OCFS_SB(inode->i_sb);
int status = 0;
struct buffer_head *orphan_dir_bh = NULL;
+ struct buffer_head *inode_alloc_bh = NULL;
struct buffer_head *fe_bh = NULL;
ocfs2_dinode *fe;
@@ -550,18 +553,18 @@
if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE) {
LOG_TRACE_STR("Skipping system file delete.");
- goto clear_inode;
+ goto bail;
}
if (inode == osb->root_inode) {
LOG_TRACE_STR("Skipping root inode delete.");
- goto clear_inode;
+ goto bail;
}
if (OCFS_I(inode)->ip_flags & OCFS_INODE_SKIP_DELETE) {
LOG_TRACE_ARGS("Skipping delete of %lu because another node "
"has done this for us.\n", inode->i_ino);
- goto clear_inode;
+ goto bail;
}
/* If we're coming from process_vote we can't go into our own
@@ -572,17 +575,9 @@
if (osb->voting_ino == inode->i_ino) {
LOG_TRACE_ARGS("Skipping delete of %lu because we're currently"
"in process_vote\n", inode->i_ino);
- goto clear_inode;
+ goto bail;
}
- orphan_dir_inode = ocfs_get_system_file_inode(osb,
- ORPHAN_DIR_SYSTEM_INODE,
- -1);
- if (!orphan_dir_inode) {
- LOG_ERROR_STATUS(-EFAIL);
- goto clear_inode;
- }
-
/* acquire_lock and friends will igrab / iput this guy, so we
* take an extra ref. to avoid recursive calls to
* delete_inode. */
@@ -597,7 +592,7 @@
* about deleting it. */
if (status != -EBUSY)
LOG_ERROR_STATUS(status);
- goto clear_inode;
+ goto bail;
}
fe = (ocfs2_dinode *) fe_bh->b_data;
@@ -605,51 +600,84 @@
/* for lack of a better error? */
status = -EEXIST;
LOG_ERROR_STATUS(status);
- goto clear_inode;
+ goto bail;
}
/* has someone already deleted us?! baaad... */
if (fe->i_dtime) {
status = -EEXIST;
LOG_ERROR_STATUS(status);
- goto clear_inode;
+ goto bail;
}
if (fe->i_links_count) {
status = -EBUSY;
LOG_ERROR_STATUS(status);
- goto clear_inode;
+ goto bail;
}
/* Oop, lets be carefull of lock / trans ordering here... */
- handle = ocfs_start_trans(osb, NULL, OCFS_FILE_DELETE_CREDITS);
+ handle = ocfs_alloc_handle(osb);
if (handle == NULL) {
- unlock_kernel();
- LOG_ERROR_STATUS(-ENOMEM);
- goto clear_inode;
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
- ocfs_handle_add_inode(handle, orphan_dir_inode);
-
- lock_kernel();
-
+ orphan_dir_inode = ocfs_get_system_file_inode(osb,
+ ORPHAN_DIR_SYSTEM_INODE,
+ -1);
+ if (!orphan_dir_inode) {
+ status = -EEXIST;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
&orphan_dir_bh, orphan_dir_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto bail_locked;
+ goto bail;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
orphan_dir_inode);
+ ocfs_handle_add_inode(handle, orphan_dir_inode);
+ inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, fe->i_suballoc_node);
+ if (!inode_alloc_inode) {
+ status = -EEXIST;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
+ &inode_alloc_bh, inode_alloc_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
+ inode_alloc_inode);
+ ocfs_handle_add_inode(handle, inode_alloc_inode);
+
+ handle = ocfs_start_trans(osb, handle, OCFS_FILE_DELETE_CREDITS);
+ if (handle == NULL) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
status = ocfs_orphan_del(osb, handle, orphan_dir_inode, inode,
orphan_dir_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
/* set the inodes dtime */
status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto bail_locked;
+ goto bail;
}
fe->i_dtime = OCFS_CURRENT_TIME;
@@ -658,38 +686,35 @@
status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto bail_locked;
+ goto bail;
}
- /* actually delete the data and the inode */
- status = ocfs_free_file_extents(osb, fe_bh, handle, inode);
+ status = ocfs_free_suballoc_bits(osb, handle, inode_alloc_inode,
+ inode_alloc_bh, fe->i_suballoc_bit,
+ fe->i_blkno, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto bail_locked;
+ goto bail;
}
- status = ocfs_handle_add_commit_bits(handle, 1,
- fe->i_suballoc_blkno << osb->sb->s_blocksize_bits,
- fe->i_suballoc_node,
- DISK_ALLOC_INODE);
+ /* actually delete the data and the inode */
+ status = ocfs_free_file_extents(osb, fe_bh, handle, inode);
if (status < 0)
LOG_ERROR_STATUS(status);
-bail_locked:
- if (handle && (status == 0))
+bail:
+ if (handle)
ocfs_commit_trans(handle);
- else if (handle)
- ocfs_abort_trans(handle);
-
- unlock_kernel();
-
-clear_inode:
if (orphan_dir_bh)
brelse(orphan_dir_bh);
+ if (inode_alloc_bh)
+ brelse(inode_alloc_bh);
if (fe_bh)
brelse(fe_bh);
if (orphan_dir_inode)
iput(orphan_dir_inode);
+ if (inode_alloc_inode)
+ iput(inode_alloc_inode);
/* we must clear inode. */
clear_inode(inode);
@@ -733,7 +758,7 @@
/* blkno == 0 if this inode is newly created and hasn't been
* filled in yet. */
if (OCFS_I(inode)->ip_blkno == 0) {
- LOG_ERROR_STR("uhm, blkno = 0!");
+ LOG_TRACE_STR("uhm, blkno = 0!");
goto bail;
}
@@ -767,11 +792,11 @@
* stuff in ocfs_get_block (that is, ocfs_get_block pretty much
* expects never to extend).
*/
-struct buffer_head *ocfs_bread(ocfs_journal_handle *handle, struct inode * inode,
- int block, int create, int *err, int reada)
+struct buffer_head *ocfs_bread(struct inode * inode,
+ int block, int *err, int reada)
{
struct buffer_head * bh = NULL;
- int fatal = 0, tmperr, new = 0;
+ int tmperr;
ocfs_super *osb;
__s64 vbo, lbo;
int readflags = OCFS_BH_CACHED;
@@ -779,79 +804,32 @@
osb = OCFS_SB(inode->i_sb);
vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
- OCFS_ASSERT(!create || handle);
-
#warning only turn this on if we know we can deal with read_bh returning nothing
#if 0
if (reada)
readflags |= OCFS_BH_READAHEAD;
#endif
- if (vbo >= inode->i_size) {
- if (!create) {
- *err = -ENOSPC;
- return NULL;
- }
- new = 1;
- }
+ OCFS_ASSERT((vbo < inode->i_size) || reada);
+ if (vbo >= inode->i_size)
+ return(NULL);
- /* ???: do we need ip_sem? should have i_sem i think */
- if (vbo >= OCFS_I(inode)->ip_alloc_size) {
- int vbo_pad;
-
- vbo_pad = inode->i_sb->s_blocksize;
- vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
-
- *err = ocfs_extend_file(osb,
- vbo + vbo_pad,
- handle, inode, NULL, 0, NULL);
- if (*err < 0) {
- *err = -ENOSPC;
- return NULL;
- }
- /*
- * fe->i_size will be vbo + padding to blocksize here,
- * and i_blocks will be whatever is actually allocated.
- * i_size will be changed by caller (ocfs_add_entry) if
- * we return !NULL.
- */
- }
-
/* do we need extend sem? no extend dlm message for dirs */
/*
* UGLY: last argument to lookup_file_allocation() (locked) is
* forced to '1' here, even though we don't have the lock. This
* is to force fast, unlocked operation. Get A Real DLM.
*/
- tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo, osb->sb->s_blocksize, NULL,
+ tmperr = ocfs_lookup_file_allocation(osb, vbo, &lbo,
+ osb->sb->s_blocksize, NULL,
inode, 1);
if (tmperr < 0)
goto fail;
- if (new) {
- bh = sb_getblk(osb->sb, lbo >> osb->sb->s_blocksize_bits);
- if (!bh) {
- tmperr = -EIO;
- goto fail;
- }
- set_buffer_uptodate(bh);
- SET_BH_SEQNUM(inode, bh);
+ tmperr = ocfs_read_bh(osb, lbo, &bh, readflags, inode);
+ if (tmperr < 0)
+ goto fail;
- fatal = ocfs_journal_access(handle, bh,
- OCFS_JOURNAL_ACCESS_CREATE);
- if (fatal)
- goto fail;
-
- memset(bh->b_data, 0, osb->sb->s_blocksize);
- fatal = ocfs_journal_dirty(handle, bh);
- if (fatal)
- goto fail;
- } else {
- tmperr = ocfs_read_bh(osb, lbo, &bh, readflags, inode);
- if (tmperr < 0)
- goto fail;
- }
-
tmperr = 0;
*err = 0;
@@ -936,6 +914,67 @@
} /* ocfs_inode_revalidate */
/*
+ * ocfs_mark_inode_dirty
+ *
+ * Updates a disk inode from a
+ * struct inode.
+ * Only takes ip_sem.
+ */
+int ocfs_mark_inode_dirty(ocfs_journal_handle *handle,
+ struct inode *inode,
+ struct buffer_head *bh)
+{
+ int status;
+ ocfs2_dinode *fe = (ocfs2_dinode *) bh->b_data;
+ ocfs_super *osb = OCFS_SB(inode->i_sb);
+
+#warning "need to check the casts and the endian-ness in this function"
+ LOG_ENTRY_ARGS("(inode %llu)\n", OCFS_I(inode)->ip_blkno);
+
+ status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ down(&OCFS_I(inode)->ip_sem);
+ if (le32_to_cpu(fe->i_flags) & OCFS2_BITMAP_FL) {
+ fe->id1.bitmap1.i_used =
+ cpu_to_le32(OCFS_I(inode)->u.ip_bitinfo.used_bits);
+ fe->id1.bitmap1.i_total =
+ cpu_to_le32(OCFS_I(inode)->u.ip_bitinfo.total_bits);
+ }
+
+ fe->i_clusters =
+ (u32)OCFS_I(inode)->ip_alloc_size >> osb->s_clustersize_bits;
+ up(&OCFS_I(inode)->ip_sem);
+
+ fe->i_size = (u64)inode->i_size;
+ fe->i_links_count = inode->i_nlink;
+ fe->i_uid = inode->i_uid;
+ fe->i_gid = inode->i_gid;
+ fe->i_mode = inode->i_mode;
+ fe->i_atime = ocfs_get_seconds(inode->i_atime);
+ fe->i_ctime = ocfs_get_seconds(inode->i_ctime);
+ fe->i_mtime = ocfs_get_seconds(inode->i_mtime);
+#warning "do we want to update these here?"
+// fe->i_dtime = ocfs_get_seconds(inode->i_dtime);
+// fe->i_generation = inode->i_generation;
+
+ status = ocfs_journal_dirty(handle, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ status = 0;
+leave:
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_mark_inode_dirty */
+
+/*
* ocfs_refresh_inode
*
* Updates a struct inode from a disk inode.
Modified: trunk/src/inode.h
===================================================================
--- trunk/src/inode.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/inode.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -29,9 +29,8 @@
#ifndef OCFS2_INODE_H
#define OCFS2_INODE_H
-struct buffer_head *ocfs_bread(ocfs_journal_handle *handle,
- struct inode * inode, int block,
- int create, int *err, int reada);
+struct buffer_head *ocfs_bread(struct inode * inode, int block,
+ int *err, int reada);
void ocfs_clear_inode(struct inode *inode);
void ocfs_delete_inode(struct inode *inode);
struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
@@ -47,7 +46,10 @@
void ocfs_sync_blockdev(struct super_block *sb);
int ocfs_verify_update_inode(ocfs_super *osb, struct inode *inode);
int ocfs_refresh_inode(struct inode *inode,
- ocfs2_dinode *fe);
+ ocfs2_dinode *fe);
+int ocfs_mark_inode_dirty(ocfs_journal_handle *handle,
+ struct inode *inode,
+ struct buffer_head *bh);
#ifdef AIO_ENABLED
int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos);
int ocfs_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos);
Modified: trunk/src/journal.c
===================================================================
--- trunk/src/journal.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/journal.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -41,6 +41,7 @@
#include "extmap.h"
#include "inode.h"
#include "journal.h"
+#include "localalloc.h"
#include "lockres.h"
#include "namei.h"
#include "nm.h"
@@ -426,6 +427,7 @@
BUG();
OCFS_ASSERT(osb->journal->state != OCFS_JOURNAL_FREE);
+ OCFS_ASSERT(max_buffs > 0);
/* JBD might support this, but our journalling code doesn't yet. */
if (journal_current_handle()) {
@@ -492,29 +494,15 @@
{
OCFS_ASSERT(handle);
OCFS_ASSERT(inode);
- OCFS_ASSERT((handle->flags & OCFS_HANDLE_STARTED));
- if (OCFS_I(inode)->ip_handle == handle) {
- /* sanity check */
- if (list_empty(&OCFS_I(inode)->ip_handle_list))
- BUG();
-
- /* I think this can happen to the main bitmap inode if
- * we extend a regular file and also have to extend a
- * system file in the same transaction */
- LOG_TRACE_ARGS("Inode %lu already added to transaction!\n",
- inode->i_ino);
- return;
- }
-
atomic_inc(&inode->i_count);
/* we're obviously changing it... */
down_write(&OCFS_I(inode)->ip_io_sem);
/* sanity check */
- if (OCFS_I(inode)->ip_handle)
- BUG();
+ OCFS_ASSERT(!OCFS_I(inode)->ip_handle);
+ OCFS_ASSERT(list_empty(&OCFS_I(inode)->ip_handle_list));
OCFS_I(inode)->ip_handle = handle;
list_del(&(OCFS_I(inode)->ip_handle_list));
@@ -534,8 +522,7 @@
inode = ip->ip_inode;
OCFS_I(inode)->ip_handle = NULL;
- list_del(&OCFS_I(inode)->ip_handle_list);
- INIT_LIST_HEAD(&OCFS_I(inode)->ip_handle_list);
+ list_del_init(&OCFS_I(inode)->ip_handle_list);
up_write(&OCFS_I(inode)->ip_io_sem);
iput(inode);
@@ -571,9 +558,10 @@
OCFS_ASSERT(!handle->num_co);
OCFS_ASSERT(!handle->num_buffs);
- osb = handle->osb;
+ ocfs_handle_unlock_inodes(handle);
/* You are allowed to add journal locks before the transaction
* has started. */
+ osb = handle->osb;
ocfs_handle_move_locks(osb->journal, handle);
spin_lock(&osb->journal->cmt_lock);
osb->needs_flush = 1;
@@ -639,9 +627,11 @@
handle->k_handle = NULL; /* it's been free'd in journal_stop */
- for(i = 0; i < handle->num_buffs; i++) {
- brelse(handle->buffs[i]);
- handle->buffs[i] = NULL;
+ if (!(handle->flags & OCFS_HANDLE_ALWAYS_COMMITS)) {
+ for(i = 0; i < handle->num_buffs; i++) {
+ brelse(handle->buffs[i]);
+ handle->buffs[i] = NULL;
+ }
}
handle->num_buffs = 0;
if (handle->buffs) {
@@ -801,6 +791,115 @@
return;
} /* ocfs_abort_trans */
+/*
+ * 'nblocks' is what you want to add to the current
+ * transaction. extend_trans will either extend the current handle by
+ * nblocks, or commit it and start a new one with nblocks credits.
+ *
+ * WARNING: This will not release any semaphores or disk locks taken
+ * during the transaction, so make sure they were taken *before*
+ * start_trans or we'll have ordering deadlocks.
+ *
+ * This function would be alot simpler if we didn't have to worry
+ * about abort.
+ */
+int ocfs_extend_trans(ocfs_journal_handle *handle, int nblocks)
+{
+ int status, new_max_buffs, new_num_co, new_num_buffs, i;
+ int restarted = 0;
+ struct buffer_head **new_buffs = NULL;
+ ocfs_journal_copyout *new_co_buffs = NULL;
+
+ OCFS_ASSERT(handle);
+ OCFS_ASSERT(handle->flags & OCFS_HANDLE_STARTED);
+ OCFS_ASSERT(nblocks);
+
+ LOG_ENTRY();
+
+ printk("Trying to extend transaction by %d blocks\n", nblocks);
+
+ status = journal_extend(handle->k_handle, nblocks);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ if (status > 0) {
+ printk("journal_extend failed, trying journal_restart\n");
+ status = journal_restart(handle->k_handle, nblocks);
+ if (status < 0) {
+#warning we need to handle this better
+ handle->k_handle = NULL;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ restarted = 1;
+ new_num_co = 0;
+ new_num_buffs = 0;
+ new_max_buffs = nblocks;
+ } else {
+ new_num_co = handle->num_co;
+ new_num_buffs = handle->num_buffs;
+ new_max_buffs = handle->max_buffs + nblocks;
+ }
+
+ new_buffs = ocfs_malloc(sizeof(struct buffer_head *) * new_max_buffs);
+ if (!new_buffs) {
+ LOG_ERROR_STR("Failed to allocate memory for journal buffs!");
+ goto bail;
+ }
+ memset(new_buffs, 0, sizeof(struct buffer_head *) * new_max_buffs);
+
+ new_co_buffs = ocfs_malloc(sizeof(ocfs_journal_copyout)*new_max_buffs);
+ if (!new_co_buffs) {
+ kfree(new_buffs);
+ LOG_ERROR_STR("Failed to allocate memory for co_buffs!");
+ goto bail;
+ }
+ memset(new_co_buffs, 0, sizeof(ocfs_journal_copyout) * new_max_buffs);
+
+ if (!restarted) {
+ if (handle->num_buffs)
+ memcpy(new_buffs, handle->buffs,
+ sizeof(*new_buffs) * handle->num_buffs);
+ if (handle->num_co)
+ memcpy(new_co_buffs, handle->co_buffs,
+ sizeof(*new_co_buffs) * handle->num_co);
+ }
+
+ if (restarted) {
+ /* only brelse and free copyout buffers if we restarted. */
+ if (!(handle->flags & OCFS_HANDLE_ALWAYS_COMMITS)) {
+ for(i = 0; i < handle->num_buffs; i++) {
+ brelse(handle->buffs[i]);
+ handle->buffs[i] = NULL;
+ }
+ }
+
+ if (handle->buffs) {
+ kfree(handle->buffs);
+ handle->buffs = NULL;
+ }
+
+ ocfs_handle_free_all_copyout(handle);
+ } else {
+ kfree(handle->buffs);
+ kfree(handle->co_buffs);
+ }
+
+ handle->buffs = new_buffs;
+ handle->num_buffs = new_num_buffs;
+ handle->max_buffs = new_max_buffs;
+ handle->co_buffs = new_co_buffs;
+ handle->num_co = new_num_co;
+ status = 0;
+bail:
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
/*
* ocfs_journal_access
*/
@@ -927,21 +1026,16 @@
*/
int ocfs_journal_dirty(ocfs_journal_handle *handle, struct buffer_head *bh)
{
- int status = -1;
- int i;
+ int status = -EINVAL;
+ int i = 0;
OCFS_ASSERT((handle->flags & OCFS_HANDLE_STARTED));
LOG_ENTRY_ARGS("(bh->b_blocknr=%llu)\n",
(unsigned long long)bh->b_blocknr);
- if (handle->num_buffs >= handle->max_buffs) {
- LOG_ERROR_ARGS("Cannot add buffer to full transaction! "
- "num_buffs=%d, max_buffs=%d, block=%llu\n",
- handle->num_buffs, handle->max_buffs,
- (unsigned long long)bh->b_blocknr);
- goto done;
- }
+ if (handle->flags & OCFS_HANDLE_ALWAYS_COMMITS)
+ goto call_jbd;
/* First, make sure we aren't already in the list. If we've
* already been added, then that's OK as JBD knows how to
@@ -956,6 +1050,8 @@
}
}
+ OCFS_ASSERT(handle->num_buffs < handle->max_buffs);
+
i = handle->num_buffs;
/* Increase the ref count on this buffer. We
* do this because we still want to keep them
@@ -970,10 +1066,6 @@
LOG_ERROR_ARGS("Could not dirty metadata buffer. "
"(bh->b_blocknr=%llu)\n",
(unsigned long long)bh->b_blocknr);
- LOG_TRACE_ARGS("Setting handle->buffs[%d] = NULL\n", i);
- brelse(bh);
- handle->buffs[i] = NULL;
- handle->num_buffs--;
goto done;
}
@@ -1097,15 +1189,11 @@
goto done;
}
- down_write(&OCFS_I(inode)->ip_io_sem);
-
SET_INODE_JOURNAL(inode);
status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
0, &bh, inode);
if (status < 0) {
- up_write(&OCFS_I(inode)->ip_io_sem);
-
if (status != -EINTR)
LOG_ERROR_STR("Could not get lock on journal!");
goto done;
@@ -1118,9 +1206,6 @@
"You must run tuneocfs to add a journal for this node.\n",
fe->i_size, OCFS_JOURNAL_DEFAULT_SIZE);
status = -EINVAL;
- fe = NULL;
- up_write(&OCFS_I(inode)->ip_io_sem);
-
goto done;
}
@@ -1141,8 +1226,6 @@
OCFS_I(inode)->ip_open_cnt++;
- up_write(&OCFS_I(inode)->ip_io_sem);
-
/* call the kernels journal init function now */
k_journal = journal_init_inode(inode);
if (k_journal == NULL) {
@@ -1525,8 +1608,6 @@
goto done;
}
- down_write(&OCFS_I(inode)->ip_io_sem);
-
SET_INODE_JOURNAL(inode);
/* Should not ever be called to recover ourselves -- in that
@@ -1537,7 +1618,6 @@
status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, FLAG_FILE_RECOVERY,
&bh, inode);
- up_write(&OCFS_I(inode)->ip_io_sem);
if (status < 0) {
LOG_TRACE_ARGS("status returned from acquire_lock=%d\n",
status);
@@ -1684,7 +1764,6 @@
goto bail;
}
- down_write(&OCFS_I(orphan_dir_inode)->ip_io_sem);
status = ocfs_acquire_lock_ro(osb, orphan_dir_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1694,13 +1773,15 @@
offset = 0;
iter = NULL;
+ down_read(&OCFS_I(orphan_dir_inode)->ip_io_sem);
while(offset < orphan_dir_inode->i_size) {
blk = offset >> sb->s_blocksize_bits;
- bh = ocfs_bread(NULL, orphan_dir_inode, blk, 0, &status, 0);
+ bh = ocfs_bread(orphan_dir_inode, blk, &status, 0);
if (!bh)
status = -EINVAL;
if (status < 0) {
+ up_read(&OCFS_I(orphan_dir_inode)->ip_io_sem);
if (bh)
brelse(bh);
LOG_ERROR_STATUS(status);
@@ -1714,6 +1795,7 @@
if (!ocfs_check_dir_entry(orphan_dir_inode,
de, bh, local)) {
+ up_read(&OCFS_I(orphan_dir_inode)->ip_io_sem);
status = -EINVAL;
LOG_ERROR_STATUS(status);
brelse (bh);
@@ -1754,6 +1836,7 @@
}
brelse(bh);
}
+ up_read(&OCFS_I(orphan_dir_inode)->ip_io_sem);
status = ocfs_release_lock_ro(osb, orphan_dir_inode);
have_disk_lock = 0;
@@ -1762,7 +1845,6 @@
goto bail;
}
- up_write(&OCFS_I(orphan_dir_inode)->ip_io_sem);
iput(orphan_dir_inode);
orphan_dir_inode = NULL;
@@ -1781,10 +1863,9 @@
LOG_ERROR_STATUS(tmpstat);
}
- if (orphan_dir_inode) {
- up_write(&OCFS_I(orphan_dir_inode)->ip_io_sem);
+ if (orphan_dir_inode)
iput(orphan_dir_inode);
- }
+
return(status);
}
Added: trunk/src/localalloc.c
===================================================================
--- trunk/src/localalloc.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/localalloc.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -0,0 +1,1013 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * localalloc.c
+ *
+ * Node local data allocation
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_LOCALALLOC
+
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "bitmap.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "inode.h"
+#include "localalloc.h"
+#include "util.h"
+#include "suballoc.h"
+#include "sysfile.h"
+#include "file.h"
+
+#include "ocfs_journal.h"
+#include "buffer_head_io.h"
+
+/* The largest cluster size where we even consider using local alloc. */
+#define OCFS_LOCAL_ALLOC_MAX_CSIZE (128 * 1024)
+
+/* The largest allocation to use the local bitmap for. */
+#define OCFS_LOCAL_ALLOC_MAX_ALLOC (2 * 1024 * 1024)
+
+static inline int ocfs_local_alloc_window_bits(ocfs_super *osb);
+
+static __u32 ocfs_local_alloc_count_bits(ocfs2_dinode *alloc);
+
+static int ocfs_local_alloc_find_clear_bits(ocfs_super *osb,
+ ocfs2_dinode *alloc,
+ __u32 numbits);
+
+static void ocfs_clear_local_alloc(ocfs2_dinode *alloc);
+
+static int ocfs_sync_local_from_shutdown(ocfs_super *osb,
+ ocfs_bitmap_free_head **f,
+ struct buffer_head *local_alloc_bh,
+ int in_recovery);
+
+static int ocfs_sync_local_to_main(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_dinode *alloc,
+ struct inode *main_bm_inode,
+ struct buffer_head *main_bm_bh);
+
+static int ocfs_local_alloc_reserve_for_window(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context **ac,
+ struct inode **bitmap_inode,
+ struct buffer_head **bitmap_bh);
+
+static int ocfs_local_alloc_new_window(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac);
+
+static int ocfs_local_alloc_slide_window(ocfs_super *osb,
+ struct inode * local_alloc_inode);
+
+/*
+ * ocfs_local_alloc_window_bits
+ *
+ * Determine how large our local alloc window should be, in bits. This
+ * is entirely changeable -- just replace this function. Right now as
+ * a *testing* default, we have a function that takes cluster size
+ * into account in the following manner:
+ *
+ * 4k -> 1024 bits, 8k -> 512 bits, 16k -> 256 bits,
+ * 32k -> 128 bits, 64k -> 64 bits
+ */
+static inline int ocfs_local_alloc_window_bits(ocfs_super *osb)
+{
+ int numbits;
+
+ switch (osb->s_clustersize) {
+ case (4*1024):
+ numbits = 1024;
+ break;
+
+ case (8*1024):
+ numbits = 512;
+ break;
+
+ case (16*1024):
+ numbits = 256;
+ break;
+
+ case (32*1024):
+ numbits = 128;
+ break;
+
+ default:
+ numbits = 64;
+ break;
+ }
+ return(numbits);
+} /* ocfs_local_alloc_window_bits */
+
+int ocfs_alloc_should_use_local(ocfs_super *osb, u64 bits)
+{
+ if (osb->have_local_alloc
+ && ((bits<<osb->s_clustersize_bits) <= OCFS_LOCAL_ALLOC_MAX_ALLOC)
+ && (bits <= ocfs_local_alloc_window_bits(osb)))
+ return(1);
+ return(0);
+}
+
+/*
+ * ocfs_load_local_alloc
+ */
+int ocfs_load_local_alloc(ocfs_super *osb)
+{
+ int status = 0;
+ ocfs2_dinode *alloc = NULL;
+ struct buffer_head *alloc_bh = NULL;
+ __u32 num_used;
+ struct inode *inode = NULL;
+
+ LOG_ENTRY();
+
+ /* we don't enable local alloc on cluster sizes >= 128k */
+ if (osb->s_clustersize > OCFS_LOCAL_ALLOC_MAX_CSIZE)
+ goto bail;
+
+ /* read the alloc off disk */
+ inode = ocfs_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
+ osb->node_num);
+ if (!inode) {
+ LOG_ERROR_STATUS(status=-EINVAL);
+ goto bail;
+ }
+ status = ocfs_read_bh(osb,
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+ &alloc_bh, 0, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ alloc = (ocfs2_dinode *) alloc_bh->b_data;
+
+ /* do a little verification. */
+ num_used = ocfs_local_alloc_count_bits(alloc);
+
+ /* nowadays the local alloc has always been recovered before
+ * we load it so there should be no bits used from the main
+ * bitmap. */
+ if (num_used
+ || LOCAL_ALLOC(alloc)->la_bits_set
+ || LOCAL_ALLOC(alloc)->la_bm_bits
+ || LOCAL_ALLOC(alloc)->la_bm_off) {
+ LOG_ERROR_ARGS("Local alloc hasn't been recovered!\n"
+ "found = %u, set = %u, taken = %u, off = %u\n",
+ num_used,
+ LOCAL_ALLOC(alloc)->la_bits_set,
+ LOCAL_ALLOC(alloc)->la_bm_bits,
+ LOCAL_ALLOC(alloc)->la_bm_off);
+ status = -EFAIL;
+ goto bail;
+ }
+
+ if (!LOCAL_ALLOC(alloc)->la_size ||
+ (LOCAL_ALLOC(alloc)->la_size > ocfs2_local_alloc_size(inode->i_sb))) {
+ LOG_ERROR_ARGS("Local alloc size is invalid (la_size = %u)\n",
+ LOCAL_ALLOC(alloc)->la_size);
+ status = -EINVAL;
+ goto bail;
+ }
+
+ osb->local_alloc_bh = alloc_bh;
+ osb->have_local_alloc = 1;
+
+bail:
+ if (status < 0)
+ if (alloc_bh)
+ brelse(alloc_bh);
+ if (inode)
+ iput(inode);
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_load_local_alloc */
+
+/*
+ * ocfs_shutdown_local_alloc
+ *
+ * return any unused bits to the bitmap and write out a clean
+ * local_alloc.
+ *
+ * local_alloc_bh is optional. If not passed, we will simply use the
+ * one off osb. If you do pass it however, be warned that it *will* be
+ * returned brelse'd and NULL'd out.*/
+void ocfs_shutdown_local_alloc(ocfs_super *osb)
+{
+ int status;
+ ocfs2_dinode *alloc = NULL;
+ ocfs_bitmap_free_head *f = NULL;
+ struct buffer_head *bh = NULL;
+ ocfs_journal_handle *handle = NULL;
+ struct inode *local_alloc_inode = NULL;
+ ocfs_inode_private *oip;
+
+ LOG_ENTRY();
+
+ local_alloc_inode =
+ ocfs_get_system_file_inode(osb,
+ LOCAL_ALLOC_SYSTEM_INODE,
+ osb->node_num);
+ if (!local_alloc_inode) {
+ status = -ENOENT;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ oip = OCFS_I(local_alloc_inode);
+
+ /* Take io_sem here to turn off local alloc before another guy
+ * can come in and start using him. */
+ down_write(&oip->ip_io_sem);
+ if (!osb->have_local_alloc) {
+ up_write(&oip->ip_io_sem);
+ goto bail;
+ }
+ osb->have_local_alloc = 0;
+ up_write(&oip->ip_io_sem);
+
+ bh = osb->local_alloc_bh;
+
+ status = ocfs_sync_local_from_shutdown(osb, &f, bh, 0);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
+ handle = ocfs_start_trans(osb, NULL, 1);
+ if (!handle) {
+ LOG_ERROR_STATUS(-ENOMEM);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
+ status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ alloc = (ocfs2_dinode *) bh->b_data;
+ ocfs_clear_local_alloc(alloc);
+
+ status = ocfs_journal_dirty(handle, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ brelse(bh);
+
+ ocfs_commit_trans(handle);
+ handle = NULL;
+
+ osb->local_alloc_bh = NULL;
+ osb->have_local_alloc = 0;
+
+ if (f)
+ ocfs_process_bitmap_free_head(osb, f);
+
+bail:
+ if (handle)
+ ocfs_commit_trans(handle);
+
+ if (f)
+ ocfs_free_bitmap_free_head(f);
+
+ if (local_alloc_inode)
+ iput(local_alloc_inode);
+
+ LOG_EXIT();
+ return;
+} /* ocfs_shutdown_local_alloc */
+
+/*
+ * ocfs_recover_local_alloc
+ *
+ * We want to free the bitmap bits outside of any recovery context, so
+ * it's allocated and passed back for you.
+ */
+int ocfs_recover_local_alloc(ocfs_super *osb,
+ int node_num,
+ ocfs_bitmap_free_head **bits_to_free)
+{
+ int status = 0;
+ struct buffer_head *alloc_bh = NULL;
+ struct inode *inode = NULL;
+ ocfs2_dinode *alloc;
+
+ LOG_ENTRY_ARGS("(node_num = %d)\n", node_num);
+
+ inode = ocfs_get_system_file_inode(osb,
+ LOCAL_ALLOC_SYSTEM_INODE,
+ node_num);
+ if (!inode) {
+ LOG_ERROR_STATUS(status=-EINVAL);
+ goto bail;
+ }
+
+ status = ocfs_read_bh(osb,
+ OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
+ &alloc_bh,
+ 0, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_sync_local_from_shutdown(osb,
+ bits_to_free,
+ alloc_bh,
+ 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ alloc = (ocfs2_dinode *) alloc_bh->b_data;
+ ocfs_clear_local_alloc(alloc);
+
+ status = ocfs_write_bh(osb, alloc_bh, inode);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
+bail:
+ if (alloc_bh)
+ brelse(alloc_bh);
+
+ if (inode)
+ iput(inode);
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_recover_local_alloc */
+
+/*
+ * ocfs_reserve_local_alloc_bits
+ *
+ * make sure we've got at least bitswanted contiguous bits in the
+ * local alloc. You lose them when you drop ip_io_sem.
+ *
+ * We will add ourselves to the transaction passed in, but may start
+ * our own in order to shift windows.
+ *
+ * When we stop being lame and support multiple chunks of
+ * discontiguous space, we this turns into a really simple check of
+ * ->la_bits_set
+ */
+int ocfs_reserve_local_alloc_bits(ocfs_super *osb,
+ ocfs_journal_handle *passed_handle,
+ u32 bits_wanted,
+ ocfs2_alloc_context *ac)
+{
+ int status;
+ struct inode *local_alloc_inode = NULL;
+ int startoff;
+ ocfs2_dinode *alloc;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT(passed_handle);
+ OCFS_ASSERT(ac);
+ OCFS_ASSERT(!(passed_handle->flags & OCFS_HANDLE_STARTED));
+
+ local_alloc_inode =
+ ocfs_get_system_file_inode(osb,
+ LOCAL_ALLOC_SYSTEM_INODE,
+ osb->node_num);
+ if (!local_alloc_inode) {
+ status = -ENOENT;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ ocfs_handle_add_inode(passed_handle, local_alloc_inode);
+
+ if (!osb->have_local_alloc) {
+ status = -ENOSPC;
+ goto bail;
+ }
+
+#warning "isn't it about time we turned this check off?"
+ if (bits_wanted > ocfs_clusters_for_bytes(osb->sb,
+ OCFS_LOCAL_ALLOC_MAX_ALLOC)) {
+ LOG_TRACE_STR("Asking for more than max local alloction!\n");
+ status = -ENOSPC;
+ goto bail;
+ }
+
+ if (bits_wanted > ocfs_local_alloc_window_bits(osb)) {
+ LOG_TRACE_STR("Asking for more than my max window size!\n");
+ status = -ENOSPC;
+ goto bail;
+ }
+
+ alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
+ startoff = ocfs_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
+ if (startoff == -1) {
+ /* uhoh, window change time. */
+ status =
+ ocfs_local_alloc_slide_window(osb, local_alloc_inode);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+
+ ac->ac_inode = igrab(local_alloc_inode);
+ get_bh(osb->local_alloc_bh);
+ ac->ac_bh = osb->local_alloc_bh;
+
+ status = 0;
+bail:
+ if (local_alloc_inode)
+ iput(local_alloc_inode);
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
+int ocfs_claim_local_alloc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 min_bits,
+ u32 *bit_off,
+ u32 *num_bits)
+{
+ int status, start;
+ struct inode *local_alloc_inode;
+ u32 bits_wanted;
+ void *bitmap;
+ ocfs2_dinode *alloc;
+
+ LOG_ENTRY();
+ OCFS_ASSERT(ac->ac_which == OCFS_AC_USE_LOCAL);
+
+ bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+ local_alloc_inode = ac->ac_inode;
+ alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
+
+ start = ocfs_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
+ if (start == -1) {
+ /* TODO: Shouldn't we just BUG here? */
+ status = -ENOSPC;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
+ *bit_off = LOCAL_ALLOC(alloc)->la_bm_off + start;
+ /* local alloc is always contiguous by nature -- we never
+ * delete bits from it! */
+ *num_bits = bits_wanted;
+
+ status = ocfs_journal_access(handle, osb->local_alloc_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ while(bits_wanted--)
+ set_bit(start++, bitmap);
+
+ status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
+/*
+ * ocfs_local_alloc_count_bits
+ */
+static __u32 ocfs_local_alloc_count_bits(ocfs2_dinode *alloc)
+{
+ int i;
+ __u8 tmp;
+ __u8 *buffer;
+ __u32 count = 0;
+
+ LOG_ENTRY();
+
+ buffer = LOCAL_ALLOC(alloc)->la_bitmap;
+ for (i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++) {
+ memcpy(&tmp, buffer, 1);
+ count+= BITCOUNT(tmp);
+ buffer++;
+ }
+
+ LOG_EXIT_ULONG ((unsigned long)count);
+ return(count);
+} /* ocfs_local_alloc_count_bits */
+
+/*
+ * ocfs_local_alloc_find_clear_bits
+ */
+static int ocfs_local_alloc_find_clear_bits(ocfs_super *osb,
+ ocfs2_dinode *alloc,
+ __u32 numbits)
+{
+ int numfound, bitoff, left, startoff, lastzero;
+ void *bitmap = NULL;
+
+ LOG_ENTRY_ARGS("(numbits wanted = %u)\n", numbits);
+
+ if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
+ LOG_TRACE_STR("No bits in my window!");
+ bitoff = -1;
+ goto bail;
+ }
+
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
+
+ numfound = bitoff = startoff = 0;
+ lastzero = -1;
+ left = LOCAL_ALLOC(alloc)->la_bm_bits;
+ while ((bitoff = find_next_zero_bit(bitmap, left, startoff)) != -1) {
+ if (bitoff == left) {
+ /* LOG_TRACE_ARGS("bitoff (%d) == left", bitoff); */
+ break;
+ }
+ /* LOG_TRACE_ARGS("Found a zero: bitoff = %d, startoff = %d, "
+ "numfound = %d\n", bitoff, startoff, numfound);*/
+
+ /* Ok, we found a zero bit... is it contig. or do we
+ * start over?*/
+ if (bitoff == startoff) {
+ /* we found a zero */
+ numfound++;
+ startoff++;
+ } else {
+ /* got a zero after some ones */
+ numfound = 1;
+ startoff = bitoff+1;
+ }
+ /* we got everything we needed */
+ if (numfound == numbits) {
+ /* LOG_TRACE_STR("Found it all!"); */
+ break;
+ }
+ }
+
+ LOG_TRACE_ARGS("Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
+ numfound);
+
+ if (numfound == numbits)
+ bitoff = startoff - numfound;
+ else
+ bitoff = -1;
+
+bail:
+ LOG_EXIT_STATUS(bitoff);
+ return(bitoff);
+} /* ocfs_local_find_alloc_clear_bits */
+
+/*
+ * ocfs_clear_local_alloc
+ */
+static void ocfs_clear_local_alloc(ocfs2_dinode *alloc)
+{
+ int i;
+ LOG_ENTRY();
+
+ LOCAL_ALLOC(alloc)->la_bm_bits = 0;
+ LOCAL_ALLOC(alloc)->la_bits_set = 0;
+ LOCAL_ALLOC(alloc)->la_bm_off = 0;
+ for(i = 0; i < LOCAL_ALLOC(alloc)->la_size; i++)
+ LOCAL_ALLOC(alloc)->la_bitmap[i] = 0;
+
+ LOG_EXIT();
+ return;
+} /* ocfs_clear_local_alloc */
+
+/*
+ * This essentially does the same thing as sync_local_to_main, but
+ * without a journal handle -- used during shutdown and recovery.
+ */
+static int ocfs_sync_local_from_shutdown(ocfs_super *osb,
+ ocfs_bitmap_free_head **f,
+ struct buffer_head *local_alloc_bh,
+ int in_recovery)
+{
+ int status = 0;
+ int bit_off, left;
+ ocfs2_dinode *alloc = NULL;
+ void *bitmap;
+
+ LOG_ENTRY();
+
+ if (!local_alloc_bh)
+ BUG();
+
+ alloc = (ocfs2_dinode *) local_alloc_bh->b_data;
+ if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
+ LOG_TRACE_STR("nothing to sync!");
+ goto bail;
+ }
+
+ if (!(*f)) {
+ *f = ocfs_alloc_bitmap_free_head();
+ if (*f == NULL) {
+ LOG_ERROR_STATUS(-ENOMEM);
+ goto bail;
+ }
+ }
+
+ alloc = (ocfs2_dinode *) local_alloc_bh->b_data;
+
+ LOG_TRACE_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits,
+ ocfs_local_alloc_count_bits(alloc),
+ LOCAL_ALLOC(alloc)->la_bits_set);
+
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
+
+ /* any unset bits in local alloc need to be unset in bitmap. */
+ bit_off = 0;
+ left = LOCAL_ALLOC(alloc)->la_bm_bits;
+ while ((bit_off = find_next_zero_bit(bitmap, left, bit_off))
+ != -1) {
+ if (bit_off >= left) {
+ /*LOG_TRACE_ARGS("bit_off (%d) >= left\n", bit_off);*/
+ break;
+ }
+ /* LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n", bit_off);*/
+ status = ocfs_add_to_bitmap_free_head(osb,
+ *f, 1,
+ bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
+ -1, 0, DISK_ALLOC_VOLUME);
+ if (status < 0) {
+ ocfs_free_bitmap_free_head(*f);
+ *f = NULL;
+ }
+ bit_off++;
+ }
+
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_sync_local_from_shutdown */
+
+/*
+ * ocfs_sync_local_to_main
+ *
+ * sync the local alloc to main bitmap.
+ *
+ * assumes you've already locked the main bitmap -- the bitmap inode
+ * passed is used for caching.
+ */
+static int ocfs_sync_local_to_main(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_dinode *alloc,
+ struct inode *main_bm_inode,
+ struct buffer_head *main_bm_bh)
+{
+ int status = 0;
+ int bit_off, left;
+ void *bitmap;
+ unsigned int start, numblocks, bitmapblocks;
+ ocfs2_dinode *bm_fe;
+
+ LOG_ENTRY_ARGS("alloc->la_bm_bits = %u, COUNT = %u, la_bits_set = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits,
+ ocfs_local_alloc_count_bits(alloc),
+ LOCAL_ALLOC(alloc)->la_bits_set);
+
+ if (LOCAL_ALLOC(alloc)->la_bm_bits == 0) {
+ LOG_TRACE_STR("nothing to sync!");
+ goto bail;
+ }
+
+ bitmapblocks =
+ ocfs_blocks_for_bits(osb->sb,
+ osb->cluster_bitmap.validbits);
+
+ /* figure out which block in the bitmap to start on and the
+ * maximum number of blocks we can span over -- we don't need
+ * to read any more as that's the most we'll be touching... */
+ numblocks = ocfs_bitmap_blocks_affected(osb->sb,
+ LOCAL_ALLOC(alloc)->la_bm_off,
+ LOCAL_ALLOC(alloc)->la_bits_set,
+ &start);
+
+ if ((start + numblocks) > bitmapblocks) {
+ printk("uhoh, bitmap calculation is bad!\n");
+ printk("alloc->la_bm_bits = %u, COUNT = %u, alloc->la_bits_set = %u"
+ "start=%u, alloc->la_bm_off = %u, numblocks=%u, "
+ "bitmapblocks = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits, ocfs_local_alloc_count_bits(alloc),
+ LOCAL_ALLOC(alloc)->la_bits_set, start, LOCAL_ALLOC(alloc)->la_bm_off, numblocks,
+ bitmapblocks);
+
+ BUG();
+ }
+
+ bm_fe = (ocfs2_dinode *) main_bm_bh->b_data;
+ status = ocfs_journal_access(handle, main_bm_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ LOG_TRACE_ARGS("start=%u, alloc->la_bm_off = %u, numblocks=%u\n", start,
+ LOCAL_ALLOC(alloc)->la_bm_off, numblocks);
+ status = ocfs_read_bhs(osb,
+ (osb->bitmap_blkno + start) << osb->sb->s_blocksize_bits,
+ numblocks << osb->sb->s_blocksize_bits,
+ &osb->cluster_bitmap.chunk[start], OCFS_BH_CACHED,
+ main_bm_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bitmap = LOCAL_ALLOC(alloc)->la_bitmap;
+ /* any unset bits in local alloc need to be unset in bitmap. */
+ bit_off = 0;
+ left = LOCAL_ALLOC(alloc)->la_bm_bits;
+ while ((bit_off = find_next_zero_bit(bitmap, left, bit_off))
+ != -1) {
+ if (bit_off >= left) {
+ /*LOG_TRACE_ARGS("bit_off (%d) >= left\n", bit_off);*/
+ break;
+ }
+
+ LOG_TRACE_ARGS("Clearing bit %u in main bitmap\n",
+ bit_off + LOCAL_ALLOC(alloc)->la_bm_off);
+ ocfs_clear_bits(osb->sb, handle, &osb->cluster_bitmap,
+ bit_off + LOCAL_ALLOC(alloc)->la_bm_off,
+ 1);
+ bm_fe->id1.bitmap1.i_used--;
+ bit_off++;
+ }
+
+ status = ocfs_journal_dirty(handle, main_bm_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+bail:
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_sync_local_to_main */
+
+static int ocfs_local_alloc_reserve_for_window(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context **ac,
+ struct inode **bitmap_inode,
+ struct buffer_head **bitmap_bh)
+{
+ int status;
+
+ *ac = kmalloc(sizeof(ocfs2_alloc_context), GFP_KERNEL);
+ if (!(*ac)) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(*ac, 0, sizeof(ocfs2_alloc_context));
+ (*ac)->ac_handle = handle;
+
+ (*ac)->ac_bits_wanted = ocfs_local_alloc_window_bits(osb);
+ status = ocfs_reserve_main_bitmap_bits(osb,
+ handle,
+ (*ac)->ac_bits_wanted,
+ *ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ (*ac)->ac_which = OCFS_AC_USE_MAIN;
+
+ *bitmap_inode = (*ac)->ac_inode;
+ igrab(*bitmap_inode);
+ *bitmap_bh = (*ac)->ac_bh;
+ get_bh(*bitmap_bh);
+ status = 0;
+bail:
+ if ((status < 0) && *ac) {
+ ocfs_free_alloc_context(*ac);
+ *ac = NULL;
+ }
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
+/*
+ * ocfs_local_alloc_new_window
+ *
+ * pass it the bitmap lock in lock_bh if you have it.
+ */
+static int ocfs_local_alloc_new_window(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac)
+{
+ int status = 0;
+ u32 cluster_off, cluster_count;
+ ocfs2_dinode *alloc = NULL;
+
+ LOG_ENTRY();
+
+ alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
+ if (LOCAL_ALLOC(alloc)->la_bm_bits != 0)
+ LOG_TRACE_STR("asking me to alloc a new window over a"
+ " non-empty one");
+
+ LOG_TRACE_ARGS("Allocating %u clusters for a new window.\n",
+ ocfs_local_alloc_window_bits(osb));
+ /* we used the main bitmap specific reserve function, but we
+ * set everything up nicely, so there's no reason why we can't
+ * use the generic claim. */
+ status = ocfs_claim_bits(osb,
+ handle,
+ ac,
+ ocfs_local_alloc_window_bits(osb),
+ &cluster_off,
+ &cluster_count);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ atomic_inc(&osb->alloc_stats.bitmap_data);
+
+ alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
+
+ LOCAL_ALLOC(alloc)->la_bm_off = cluster_off;
+ LOCAL_ALLOC(alloc)->la_bm_bits = cluster_count;
+ /* just in case... In the future when we find space ourselves,
+ * we don't have to get all contiguous -- but we'll have to
+ * set all previously used bits in bitmap and update
+ * la_bits_set before setting the bits in the main bitmap. */
+ LOCAL_ALLOC(alloc)->la_bits_set = 0;
+ memset(LOCAL_ALLOC(alloc)->la_bitmap, 0,
+ LOCAL_ALLOC(alloc)->la_size);
+
+ LOG_TRACE_STR("New window allocated:");
+ LOG_TRACE_ARGS("window la_bm_off = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_off);
+ LOG_TRACE_ARGS("window la_bm_bits = %u\n",
+ LOCAL_ALLOC(alloc)->la_bm_bits);
+
+bail:
+ LOG_EXIT_STATUS(status);
+ return(status);
+} /* ocfs_local_alloc_new_window */
+
+/* Note that we do *NOT* lock the local alloc inode here as
+ * it's been locked already for us. */
+static int ocfs_local_alloc_slide_window(ocfs_super *osb,
+ struct inode *local_alloc_inode)
+{
+ int status = 0;
+ struct buffer_head *main_bm_bh = NULL;
+ struct inode *main_bm_inode = NULL;
+ ocfs_journal_handle *handle = NULL;
+ ocfs2_dinode *alloc;
+ ocfs2_dinode *alloc_copy = NULL;
+ ocfs2_alloc_context *ac = NULL;
+
+ LOG_ENTRY();
+
+ main_bm_inode = ocfs_get_system_file_inode(osb,
+ GLOBAL_BITMAP_SYSTEM_INODE,
+ -1);
+ if (!main_bm_inode) {
+ status = -EINVAL;
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ handle = ocfs_alloc_handle(osb);
+ if (!handle) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ /* This will lock the main bitmap for us. */
+ status = ocfs_local_alloc_reserve_for_window(osb,
+ handle,
+ &ac,
+ &main_bm_inode,
+ &main_bm_bh);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+
+ handle = ocfs_start_trans(osb, handle, OCFS_WINDOW_MOVE_CREDITS);
+ if (!handle) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
+ alloc = (ocfs2_dinode *) osb->local_alloc_bh->b_data;
+
+ /* We want to clear the local alloc before doing anything
+ * else, so that if we error later during this operation,
+ * local alloc shutdown won't try to double free main bitmap
+ * bits. Make a copy so the sync function knows which bits to
+ * free. */
+ alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL);
+ if (!alloc_copy) {
+ status = -ENOMEM;
+ goto bail;
+ }
+ memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
+
+ status = ocfs_journal_access(handle, osb->local_alloc_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ ocfs_clear_local_alloc(alloc);
+
+ status = ocfs_journal_dirty(handle, osb->local_alloc_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_sync_local_to_main(osb, handle, alloc_copy,
+ main_bm_inode, main_bm_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_local_alloc_new_window(osb, handle, ac);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ atomic_inc(&osb->alloc_stats.moves);
+
+ status = 0;
+bail:
+ if (handle)
+ ocfs_commit_trans(handle);
+
+ if (main_bm_bh)
+ brelse(main_bm_bh);
+
+ if (main_bm_inode)
+ iput(main_bm_inode);
+
+ if (alloc_copy)
+ kfree(alloc_copy);
+
+ if (ac)
+ ocfs_free_alloc_context(ac);
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
Added: trunk/src/localalloc.h
===================================================================
--- trunk/src/localalloc.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/localalloc.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -0,0 +1,55 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * localalloc.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+
+#ifndef OCFS2_LOCALALLOC_H
+#define OCFS2_LOCALALLOC_H
+
+int ocfs_load_local_alloc(ocfs_super *osb);
+
+void ocfs_shutdown_local_alloc(ocfs_super *osb);
+
+int ocfs_recover_local_alloc(ocfs_super *osb,
+ int node_num,
+ ocfs_bitmap_free_head **bits_to_free);
+
+int ocfs_alloc_should_use_local(ocfs_super *osb,
+ u64 bits);
+
+int ocfs_reserve_local_alloc_bits(ocfs_super *osb,
+ ocfs_journal_handle *passed_handle,
+ u32 bits_wanted,
+ ocfs2_alloc_context *ac);
+
+int ocfs_claim_local_alloc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 min_bits,
+ u32 *bit_off,
+ u32 *num_bits);
+
+#endif /* OCFS2_LOCALALLOC_H */
Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/namei.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -58,6 +58,7 @@
#include "inode.h"
#include "lockres.h"
#include "namei.h"
+#include "suballoc.h"
#include "util.h"
#include "vote.h"
@@ -71,26 +72,34 @@
const char *name, int namelen,
unsigned long offset,
struct ocfs2_dir_entry **res_dir);
+
static int ocfs_delete_entry(ocfs_journal_handle *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh);
-static int __ocfs_add_entry(ocfs_journal_handle *handle,
- struct inode *dir,
- const char *name, int namelen,
- struct inode *inode, u64 blkno,
- struct buffer_head *parent_fe_bh);
-static inline int ocfs_match(int len, const char *const name,
- struct ocfs2_dir_entry *de);
+static int __ocfs_add_entry (ocfs_journal_handle *handle, struct inode *dir,
+ const char *name, int namelen,
+ struct inode *inode, u64 blkno,
+ struct buffer_head *parent_fe_bh,
+ struct buffer_head *insert_bh);
+
static int ocfs_mknod_locked(ocfs_super *osb, struct inode *dir,
struct dentry *dentry, int mode,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
ocfs_journal_handle *handle,
- struct inode *inode);
+ struct inode *inode,
+ ocfs2_alloc_context *inode_ac);
+static int ocfs_fill_new_dir(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *parent,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ ocfs2_alloc_context *data_ac);
+
static int ocfs_double_lock(ocfs_super *osb,
ocfs_journal_handle *handle,
__u32 type1, __u32 flags1,
@@ -100,8 +109,15 @@
struct buffer_head **bh2,
struct inode *inode2);
+static int ocfs_prepare_orphan_dir(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *inode,
+ char **ret_name,
+ struct buffer_head **de_bh);
+
static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
- struct inode *inode, ocfs2_dinode *fe);
+ struct inode *inode, ocfs2_dinode *fe,
+ char *name, struct buffer_head *de_bh);
static int ocfs_create_symlink_data(ocfs_super *osb,
ocfs_journal_handle *handle,
@@ -111,11 +127,12 @@
static inline int ocfs_add_entry(ocfs_journal_handle *handle,
struct dentry *dentry,
struct inode *inode, u64 blkno,
- struct buffer_head *parent_fe_bh)
+ struct buffer_head *parent_fe_bh,
+ struct buffer_head *insert_bh)
{
return(__ocfs_add_entry(handle, dentry->d_parent->d_inode,
dentry->d_name.name, dentry->d_name.len,
- inode, blkno, parent_fe_bh));
+ inode, blkno, parent_fe_bh, insert_bh));
}
/*
@@ -148,11 +165,9 @@
LOG_TRACE_ARGS("about to call find_files_on_disk with inode=%p\n",
dir);
- down_write(&OCFS_I(dir)->ip_io_sem);
status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
dentry->d_name.len, &blkno,
dir, 1, &dirent_bh, &dirent);
- up_write(&OCFS_I(dir)->ip_io_sem);
if (status < 0)
goto bail_add;
@@ -171,13 +186,82 @@
bail:
if (dirent_bh)
brelse(dirent_bh);
-
+
LOG_EXIT_PTR (ret);
LOG_CLEAR_CONTEXT();
return ret;
} /* ocfs_lookup */
+static int ocfs_fill_new_dir(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *parent,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ ocfs2_alloc_context *data_ac)
+{
+ int status;
+ struct buffer_head *new_bh = NULL;
+ struct ocfs2_dir_entry *de = NULL;
+
+ LOG_ENTRY();
+
+ status = ocfs_do_extend_dir(osb->sb, handle, inode, fe_bh,
+ data_ac, NULL, &new_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ set_buffer_uptodate(new_bh);
+ SET_BH_SEQNUM(inode, new_bh);
+ status = ocfs_journal_access(handle, new_bh,
+ OCFS_JOURNAL_ACCESS_CREATE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(new_bh->b_data, 0, osb->sb->s_blocksize);
+
+ de = (struct ocfs2_dir_entry *) new_bh->b_data;
+ de->inode = cpu_to_le64(OCFS_I(inode)->ip_blkno);
+ de->name_len = 1;
+ de->rec_len =
+ cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
+ strcpy (de->name, ".");
+ ocfs_set_de_type(de, S_IFDIR);
+ de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
+ de->inode = cpu_to_le64(OCFS_I(parent)->ip_blkno);
+ de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
+ OCFS2_DIR_REC_LEN(1));
+ de->name_len = 2;
+ strcpy (de->name, "..");
+ ocfs_set_de_type(de, S_IFDIR);
+
+ status = ocfs_journal_dirty(handle, new_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ inode->i_size = inode->i_sb->s_blocksize;
+ inode->i_nlink = 2;
+ inode->i_blocks = 1;
+ status = ocfs_mark_inode_dirty(handle, inode, fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ if (new_bh)
+ brelse(new_bh);
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
static int ocfs_mknod(struct inode *dir, struct dentry *dentry,
int mode, dev_t dev)
{
@@ -189,7 +273,10 @@
ocfs2_dinode *fe = NULL;
ocfs2_dinode *dirfe;
struct buffer_head *new_fe_bh = NULL;
+ struct buffer_head *de_bh = NULL;
struct inode *inode = NULL;
+ ocfs2_alloc_context *inode_ac = NULL;
+ ocfs2_alloc_context *data_ac = NULL;
LOG_SET_CONTEXT(MKNOD);
@@ -204,8 +291,6 @@
}
#endif
- down_write(&OCFS_I(dir)->ip_io_sem);
-
/* get our super block */
osb = OCFS_SB(dir->i_sb);
if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
@@ -245,9 +330,8 @@
LOG_ERROR_STATUS (status);
goto leave;
}
-
- /* Ok, we got the lock -- we'd better add it to our transaction */
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
+ ocfs_handle_add_inode(handle, dir);
dirfe = (ocfs2_dinode *) parent_fe_bh->b_data;
if (!dirfe->i_links_count) {
@@ -256,15 +340,43 @@
goto leave;
}
+ /* get a spot inside the dir. */
+ status = ocfs_prepare_dir_for_insert(osb, dir, parent_fe_bh,
+ dentry->d_name.name,
+ dentry->d_name.len, &de_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ /* reserve an inode spot */
+ status = ocfs_reserve_new_inode(osb, handle, &inode_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
+ /* are we making a directory? If so, reserve a cluster for his
+ * 1st extent. */
+ if (S_ISDIR(mode)) {
+ status = ocfs_reserve_bits(osb, handle, 1, &data_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+ }
+
handle = ocfs_start_trans(osb, handle, OCFS_MKNOD_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto leave;
}
+ ocfs_handle_set_always_commits(handle, 1);
/* do the real work now. */
status = ocfs_mknod_locked(osb, dir, dentry, mode, dev,
- &new_fe_bh, parent_fe_bh, handle, inode);
+ &new_fe_bh, parent_fe_bh, handle,
+ inode, inode_ac);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS(status);
@@ -288,53 +400,20 @@
status = ocfs_update_lockres(osb, new_fe_bh, inode, 0);
if (S_ISDIR (mode)) {
- struct buffer_head *newdirbh = NULL;
- int retval = 0;
- struct ocfs2_dir_entry *de = NULL;
-
- newdirbh = ocfs_bread (handle, inode, 0, 1, &retval, 0);
- if (!newdirbh) {
- LOG_ERROR_STATUS(status = retval);
- goto leave;
- }
- status = ocfs_journal_access(handle, newdirbh, OCFS_JOURNAL_ACCESS_WRITE);
+ status = ocfs_fill_new_dir(osb, handle, dir, inode,
+ new_fe_bh, data_ac);
if (status < 0) {
- brelse(newdirbh);
LOG_ERROR_STATUS(status);
goto leave;
}
- de = (struct ocfs2_dir_entry *) newdirbh->b_data;
- de->inode = cpu_to_le64(fe->i_blkno);
- fe = NULL;
- de->name_len = 1;
- de->rec_len =
- cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
- strcpy (de->name, ".");
- ocfs_set_de_type(de, S_IFDIR);
- de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
- de->inode = cpu_to_le64(OCFS_I(dir)->ip_blkno);
- de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
- OCFS2_DIR_REC_LEN(1));
- de->name_len = 2;
- strcpy (de->name, "..");
- ocfs_set_de_type(de, S_IFDIR);
- inode->i_nlink = 2;
- status = ocfs_journal_dirty(handle, newdirbh);
- brelse (newdirbh);
+
+ status = ocfs_journal_access(handle, parent_fe_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- inode->i_size = inode->i_sb->s_blocksize;
-
- status = ocfs_journal_access(handle, parent_fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- fe = (ocfs2_dinode *) parent_fe_bh->b_data;
- fe->i_links_count++;
- fe = NULL;
+ dirfe->i_links_count++;
status = ocfs_journal_dirty(handle, parent_fe_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -343,17 +422,21 @@
dir->i_nlink++;
}
+ status = ocfs_add_entry(handle, dentry, inode, fe->i_blkno,
+ parent_fe_bh, de_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+
insert_inode_hash (inode);
d_instantiate (dentry, inode);
- ocfs_commit_trans(handle);
status = 0;
leave:
- if ((status < 0) && handle)
- ocfs_abort_trans(handle);
+ if (handle)
+ ocfs_commit_trans(handle);
- up_write(&OCFS_I(dir)->ip_io_sem);
-
if (status == -ENOSPC)
LOG_TRACE_STR ("Disk is full");
else if (status < 0 && status != -EINTR)
@@ -362,12 +445,21 @@
if (new_fe_bh)
brelse(new_fe_bh);
+ if (de_bh)
+ brelse(de_bh);
+
if (parent_fe_bh != NULL)
brelse(parent_fe_bh);
if ((status < 0) && inode)
iput(inode);
+ if (inode_ac)
+ ocfs_free_alloc_context(inode_ac);
+
+ if (data_ac)
+ ocfs_free_alloc_context(data_ac);
+
LOG_EXIT_STATUS(status);
LOG_CLEAR_CONTEXT();
@@ -384,39 +476,27 @@
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
ocfs_journal_handle *handle,
- struct inode *inode)
+ struct inode *inode,
+ ocfs2_alloc_context *inode_ac)
{
int status = 0;
ocfs2_dinode *fe = NULL;
ocfs2_extent_list *fel;
- __u64 disk_off = 0;
u64 fe_blkno = 0;
- __u64 fileOffset = 0;
- struct inode *inode_alloc_inode = NULL;
+ u16 suballoc_bit;
LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
OCFS_ASSERT(new_fe_bh);
*new_fe_bh = NULL;
-
- inode_alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
- if (!inode_alloc_inode) {
- status = -EFAIL;
- LOG_ERROR_STATUS(status);
- goto leave;
- }
- ocfs_handle_add_inode(handle, inode_alloc_inode);
- status = ocfs_alloc_node_block(osb, osb->sb->s_blocksize,
- &disk_off, &fileOffset,
- osb->node_num, DISK_ALLOC_INODE,
- handle);
+ status = ocfs_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
+ &fe_blkno);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto leave;
}
- fe_blkno = disk_off >> osb->sb->s_blocksize_bits;
*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
if (!*new_fe_bh) {
@@ -449,7 +529,7 @@
spin_unlock(&osb->s_next_gen_lock);
fe->i_generation = cpu_to_le32(inode->i_generation);
fe->i_blkno = fe_blkno;
- fe->i_suballoc_blkno = fileOffset >> osb->sb->s_blocksize_bits;
+ fe->i_suballoc_bit = suballoc_bit;
fe->i_suballoc_node = osb->node_num;
fe->i_uid = current->fsuid;
if (dir->i_mode & S_ISGID) {
@@ -494,22 +574,12 @@
/* Inode is not yet fully populated, but we need some fields
* for add_entry. */
inode->i_mode = mode;
-
- status = ocfs_add_entry(handle, dentry, inode, fe_blkno,
- parent_fe_bh);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
-
SET_BH_SEQNUM(inode, *new_fe_bh);
leave:
if (status < 0 && *new_fe_bh) {
brelse(*new_fe_bh);
*new_fe_bh = NULL;
}
- if (inode_alloc_inode)
- iput(inode_alloc_inode);
LOG_EXIT_STATUS (status);
return status;
@@ -563,9 +633,9 @@
ocfs_journal_handle *handle = NULL;
struct inode *inode = old_dentry->d_inode;
int err;
- int drop_dir_sem = 0, drop_inode_sem = 0;
struct buffer_head *fe_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
+ struct buffer_head *de_bh = NULL;
ocfs2_dinode *fe = NULL;
ocfs_super *osb = OCFS_SB(dir->i_sb);
@@ -592,8 +662,6 @@
goto bail;
}
- down_write(&OCFS_I(dir)->ip_io_sem);
- drop_dir_sem = 1;
/* lock the parent directory */
err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0,
&parent_fe_bh, dir);
@@ -603,9 +671,16 @@
goto bail;
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
+ ocfs_handle_add_inode(handle, dir);
- down_write(&OCFS_I(inode)->ip_io_sem);
- drop_inode_sem = 1;
+ err = ocfs_prepare_dir_for_insert(osb, dir, parent_fe_bh,
+ dentry->d_name.name,
+ dentry->d_name.len, &de_bh);
+ if (err < 0) {
+ LOG_ERROR_STATUS (err);
+ goto bail;
+ }
+
err = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE, 0, &fe_bh, inode);
if (err < 0) {
if (err != -EINTR)
@@ -614,6 +689,7 @@
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_FILE_UPDATE_OIN,
inode);
+ ocfs_handle_add_inode(handle, inode);
fe = (ocfs2_dinode *) fe_bh->b_data;
if (fe->i_links_count >= OCFS2_LINK_MAX) {
@@ -648,8 +724,8 @@
goto bail;
}
- err = ocfs_add_entry(handle, dentry, inode,
- OCFS_I(inode)->ip_blkno, parent_fe_bh);
+ err = ocfs_add_entry(handle, dentry, inode, OCFS_I(inode)->ip_blkno,
+ parent_fe_bh, de_bh);
if (err) {
fe->i_links_count--;
inode->i_nlink--;
@@ -662,12 +738,8 @@
bail:
if (handle)
ocfs_commit_trans(handle);
-
- if (drop_dir_sem)
- up_write(&OCFS_I(dir)->ip_io_sem);
- if (drop_inode_sem)
- up_write(&OCFS_I(inode)->ip_io_sem);
-
+ if (de_bh)
+ brelse(de_bh);
if (fe_bh)
brelse(fe_bh);
if (parent_fe_bh)
@@ -690,19 +762,22 @@
int retval = -EBUSY;
ocfs_super *osb = OCFS_SB(dir->i_sb);
u64 blkno;
- struct inode *parentInode = dentry->d_parent->d_inode;
ocfs2_dinode *fe = NULL;
struct buffer_head *fe_bh = NULL;
struct buffer_head *parent_node_bh = NULL; /* parent locknode */
ocfs_journal_handle *handle = NULL;
struct ocfs2_dir_entry *dirent = NULL;
struct buffer_head *dirent_bh = NULL;
+ char *orphan_name;
+ struct buffer_head *orphan_entry_bh = NULL;
LOG_SET_CONTEXT(UNLINK);
LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
dentry->d_name.len, dentry->d_name.name);
+ OCFS_ASSERT((dentry->d_parent->d_inode == dir));
+
LOG_TRACE_ARGS("ino = %llu\n", OCFS_I(inode)->ip_blkno);
status = -EBUSY;
@@ -710,31 +785,29 @@
if (inode == osb->root_inode) {
LOG_TRACE_STR ("Cannot delete the root directory");
status = -EPERM;
- goto bail;
+ goto leave;
}
handle = ocfs_alloc_handle(osb);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
- goto bail;
+ goto leave;
}
- down_write(&OCFS_I(dir)->ip_io_sem);
- down_write(&OCFS_I(inode)->ip_io_sem);
-
status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
- &parent_node_bh, parentInode);
+ &parent_node_bh, dir);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, parentInode);
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
+ ocfs_handle_add_inode(handle, dir);
/* this will re-read the directory now with the EXCLUSIVE */
/* lock already held; it will also return the blkno to us */
status = ocfs_find_files_on_disk(osb, dentry->d_name.name,
dentry->d_name.len, &blkno,
- parentInode, 0, &dirent_bh,
+ dir, 0, &dirent_bh,
&dirent);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -753,9 +826,10 @@
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_RELEASE_DENTRY,
inode);
+ ocfs_handle_add_inode(handle, inode);
if (S_ISDIR (inode->i_mode)) {
- if (!empty_dir(inode)) {
+ if (!ocfs_empty_dir(inode)) {
status = -ENOTEMPTY;
goto leave;
} else if (inode->i_nlink != 2) {
@@ -764,11 +838,22 @@
}
}
+ if (S_ISDIR(inode->i_mode) || (inode->i_nlink == 1)) {
+ status = ocfs_prepare_orphan_dir(osb, handle, inode,
+ &orphan_name,
+ &orphan_entry_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
+ }
+
handle = ocfs_start_trans(osb, handle, OCFS_FILE_DELETE_CREDITS);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto leave;
}
+ ocfs_handle_set_always_commits(handle, 1);
status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -784,26 +869,28 @@
inode->i_nlink = fe->i_links_count;
}
- if (S_ISDIR (inode->i_mode))
- fe->i_links_count = 0;
- else
- fe->i_links_count--;
- if (!fe->i_links_count) {
- status = ocfs_orphan_add(osb, handle, inode, fe);
+ if (S_ISDIR(inode->i_mode) || (fe->i_links_count == 1)) {
+ status = ocfs_orphan_add(osb, handle, inode, fe, orphan_name,
+ orphan_entry_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
}
- status = ocfs_journal_dirty(handle, fe_bh);
+ /* delete the name from the parent dir */
+ status = ocfs_delete_entry (handle, dir, dirent, dirent_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
- /* delete the name from the parent dir */
- status = ocfs_delete_entry (handle, parentInode, dirent, dirent_bh);
+ if (S_ISDIR (inode->i_mode))
+ fe->i_links_count = 0;
+ else
+ fe->i_links_count--;
+
+ status = ocfs_journal_dirty(handle, fe_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
@@ -828,22 +915,15 @@
leave:
if (handle) {
- if (status < 0)
- ocfs_abort_trans(handle);
- else {
- ocfs_commit_trans(handle);
- // already checked to make sure dir has nlink==2
- if (S_ISDIR (inode->i_mode)) {
- inode->i_nlink = 0;
- dir->i_nlink--;
- } else
- inode->i_nlink--;
- }
+ ocfs_commit_trans(handle);
+ // already checked to make sure dir has nlink==2
+ if (S_ISDIR (inode->i_mode)) {
+ inode->i_nlink = 0;
+ dir->i_nlink--;
+ } else
+ inode->i_nlink--;
}
- up_write(&OCFS_I(inode)->ip_io_sem);
- up_write(&OCFS_I(dir)->ip_io_sem);
-bail:
if (status < 0 && status != -ENOTEMPTY &&
status != -EPERM && status != -EBUSY && status != -EINTR) {
LOG_ERROR_STATUS(status);
@@ -860,6 +940,12 @@
if (parent_node_bh)
brelse(parent_node_bh);
+ if (orphan_entry_bh)
+ brelse(orphan_entry_bh);
+
+ if (orphan_name)
+ kfree(orphan_name);
+
LOG_EXIT_INT (retval);
LOG_CLEAR_CONTEXT();
@@ -933,6 +1019,7 @@
goto bail;
}
ocfs_handle_add_lock(handle, type2, flags2, inode2);
+ ocfs_handle_add_inode(handle, inode2);
}
/* lock id1 */
status = ocfs_acquire_lock(osb, type1, flags1,
@@ -942,48 +1029,31 @@
goto bail;
}
ocfs_handle_add_lock(handle, type1, flags1, inode1);
-
+ ocfs_handle_add_inode(handle, inode1);
bail:
-
LOG_EXIT_STATUS(status);
return(status);
} /* ocfs_double_lock */
-static inline void double_down_write(struct rw_semaphore *s1,
- struct rw_semaphore *s2)
-{
- if (s1 != s2) {
- if ((unsigned long) s1 < (unsigned long) s2) {
- struct rw_semaphore *tmp = s2;
- s2 = s1; s1 = tmp;
- }
- down_write(s1);
- }
- down_write(s2);
-}
-
-static inline void double_up_write(struct rw_semaphore *s1,
- struct rw_semaphore *s2)
-{
- up_write(s1);
- if (s1 != s2)
- up_write(s2);
-}
-
#define PARENT_INO(buffer) \
((struct ocfs2_dir_entry *) ((char *) buffer + \
le16_to_cpu(((struct ocfs2_dir_entry *) buffer)->rec_len)))->inode
+
/*
* ocfs_rename()
*
*/
static int ocfs_rename (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
{
+#warning "this needs to be split up into seperate functions."
int status = 0;
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
ocfs2_dinode *newfe = NULL;
+ char *orphan_name;
+ struct buffer_head *orphan_entry_bh = NULL;
struct buffer_head *newfe_bh = NULL;
+ struct buffer_head *insert_entry_bh = NULL;
ocfs_super *osb = NULL;
u64 newfe_blkno;
ocfs_journal_handle *handle = NULL;
@@ -1006,14 +1076,9 @@
osb = OCFS_SB(old_dir->i_sb);
- double_down_write(&OCFS_I(old_dir)->ip_io_sem,
- &OCFS_I(new_dir)->ip_io_sem);
- down_write(&OCFS_I(old_inode)->ip_io_sem);
-
if (new_inode) {
if (ocfs_inc_icount(new_inode) < 0)
BUG();
- down_write(&OCFS_I(new_inode)->ip_io_sem);
}
if (atomic_read (&old_dentry->d_count) > 2) {
@@ -1024,13 +1089,6 @@
}
}
- if (new_inode && S_ISDIR (old_inode->i_mode) &&
- !empty_dir (new_inode)) {
- status = -ENOTEMPTY;
- LOG_TRACE_STR ("New (directory) dentry NOT empty!");
- goto bail;
- }
-
handle = ocfs_alloc_handle(osb);
if (handle == NULL) {
LOG_ERROR_STATUS(status = -ENOMEM);
@@ -1047,7 +1105,7 @@
&new_dir_bh, new_dir);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto finally;
+ goto bail;
}
/* make sure both dirs have bhs
@@ -1059,7 +1117,7 @@
} else {
LOG_ERROR_STR("no old_dir_bh!");
status = -EIO;
- goto finally;
+ goto bail;
}
}
@@ -1076,42 +1134,42 @@
}
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto finally;
+ goto bail;
}
-
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
FLAG_RELEASE_DENTRY|FLAG_FILE_RENAME,
old_inode);
+ ocfs_handle_add_inode(handle, old_inode);
status = -EIO;
- old_inode_de_bh = ocfs_bread (handle, old_inode, 0, 0, &status, 0);
+ old_inode_de_bh = ocfs_bread (old_inode, 0, &status, 0);
if (!old_inode_de_bh)
- goto finally;
+ goto bail;
status = -EIO;
if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
OCFS_I(old_dir)->ip_blkno)
- goto finally;
+ goto bail;
status = -EMLINK;
if (!new_inode && new_dir!=old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX)
- goto finally;
+ goto bail;
} else {
/* Ah, the simple case - we're a file so just send a
* message. */
status = ocfs_notify_on_rename(osb, old_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto finally;
+ goto bail;
}
}
-
+
status = -ENOENT;
old_de_bh = ocfs_find_entry(old_dentry->d_name.name,
old_dentry->d_name.len,
old_dir, &old_de);
if (!old_de_bh)
- goto finally;
+ goto bail;
/*
* Check for inode number is _not_ due to possible IO errors.
@@ -1120,7 +1178,7 @@
* same name. Goodbye sticky bit ;-<
*/
if (le64_to_cpu(old_de->inode) != OCFS_I(old_inode)->ip_blkno)
- goto finally;
+ goto bail;
/* check if the target already exists (in which case we need
* to delete it */
@@ -1134,7 +1192,7 @@
/* If we cannot find the file specified we should just */
/* return the error... */
LOG_ERROR_STATUS (status);
- goto finally;
+ goto bail;
}
if (!new_de && new_inode)
@@ -1155,23 +1213,39 @@
new_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto finally;
+ goto bail;
}
-
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
FLAG_RELEASE_DENTRY, new_inode);
+ ocfs_handle_add_inode(handle, new_inode);
newfe = (ocfs2_dinode *) newfe_bh->b_data;
- /* if our caching is working right, then after the
- * verify_update_inode, newfe->i_nlink ==
- * new_inode->i_nlink */
- status = ocfs_refresh_inode (new_inode, newfe);
-
LOG_TRACE_ARGS("aha rename over existing... new_de=%p "
"new_blkno=%llu newfebh=%p bhblocknr=%llu\n",
new_de, newfe_blkno, newfe_bh, newfe_bh ?
(unsigned long long)newfe_bh->b_blocknr : 0ULL);
+
+ if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
+ status = ocfs_prepare_orphan_dir(osb, handle,
+ new_inode,
+ &orphan_name,
+ &orphan_entry_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ }
+ } else {
+ OCFS_ASSERT(new_dentry->d_parent->d_inode == new_dir);
+ status = ocfs_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ &insert_entry_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
}
handle = ocfs_start_trans(osb, handle, OCFS_FILE_RENAME_CREDITS);
@@ -1179,48 +1253,38 @@
LOG_ERROR_STATUS(status = -ENOMEM);
goto bail;
}
+ ocfs_handle_set_always_commits(handle, 1);
if (new_de) {
+ if (S_ISDIR (new_inode->i_mode)) {
+ if (!ocfs_empty_dir(new_inode) || new_inode->i_nlink != 2) {
+ status = -ENOTEMPTY;
+ goto bail;
+ }
+ }
status = ocfs_journal_access(handle, newfe_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS (status);
- goto finally;
+ goto bail;
}
- if (S_ISDIR (new_inode->i_mode)) {
- if (!empty_dir(new_inode) || new_inode->i_nlink != 2) {
- status = -ENOTEMPTY;
- goto finally;
- }
- }
-
- if (S_ISDIR (new_inode->i_mode))
- newfe->i_links_count = 0;
- else
- newfe->i_links_count--;
-
- if (!newfe->i_links_count) {
+ if (S_ISDIR(new_inode->i_mode) || (newfe->i_links_count == 1)){
status = ocfs_orphan_add(osb, handle, new_inode,
- newfe);
+ newfe, orphan_name,
+ orphan_entry_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto finally;
+ goto bail;
}
}
- status = ocfs_journal_dirty(handle, newfe_bh);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
-
/* change the dirent to point to the correct inode */
status = ocfs_journal_access(handle, new_de_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
LOG_ERROR_STATUS (status);
- goto finally;
+ goto bail;
}
new_de->inode =
le64_to_cpu(OCFS_I(old_inode)->ip_blkno);
@@ -1229,21 +1293,26 @@
status = ocfs_journal_dirty(handle, new_de_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto finally;
+ goto bail;
}
+
+ if (S_ISDIR (new_inode->i_mode))
+ newfe->i_links_count = 0;
+ else
+ newfe->i_links_count--;
+
+ status = ocfs_journal_dirty(handle, newfe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
} else {
/* if the name was not found in new_dir, add it now */
status = ocfs_add_entry (handle, new_dentry, old_inode,
OCFS_I(old_inode)->ip_blkno,
- new_dir_bh);
+ new_dir_bh, insert_entry_bh);
}
-finally:
- if (status < 0) {
- ocfs_abort_trans(handle);
- goto bail;
- }
-
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
@@ -1262,19 +1331,9 @@
if (old_inode_de_bh) {
status = ocfs_journal_access(handle, old_inode_de_bh,
OCFS_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- // BAD
- goto bail;
- }
PARENT_INO(old_inode_de_bh->b_data) =
le64_to_cpu(OCFS_I(new_dir)->ip_blkno);
status = ocfs_journal_dirty(handle, old_inode_de_bh);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- // BAD
- goto bail;
- }
old_dir->i_nlink--;
if (new_inode) {
new_inode->i_nlink--;
@@ -1316,25 +1375,16 @@
}
}
- /* commit_trans */
- ocfs_commit_trans(handle);
+ status = 0;
+bail:
+ if (handle)
+ ocfs_commit_trans(handle);
if (new_inode)
sync_mapping_buffers(old_inode->i_mapping);
- status = 0;
-
-bail:
-
- double_up_write(&OCFS_I(old_dir)->ip_io_sem,
- &OCFS_I(new_dir)->ip_io_sem);
- up_write(&OCFS_I(old_inode)->ip_io_sem);
-
- if (new_inode) {
- up_write(&OCFS_I(new_inode)->ip_io_sem);
+ if (new_inode)
iput(new_inode);
- }
-
if (newfe_bh)
brelse(newfe_bh);
if (old_dir_bh)
@@ -1347,7 +1397,14 @@
brelse(old_de_bh);
if (old_inode_de_bh)
brelse(old_inode_de_bh);
+ if (orphan_entry_bh)
+ brelse(orphan_entry_bh);
+ if (insert_entry_bh)
+ brelse(insert_entry_bh);
+ if (orphan_name)
+ kfree(orphan_name);
+
LOG_EXIT_STATUS(status);
LOG_CLEAR_CONTEXT();
@@ -1479,11 +1536,14 @@
struct super_block *sb;
int l;
struct buffer_head *new_fe_bh = NULL;
+ struct buffer_head *de_bh = NULL;
struct buffer_head *parent_fe_bh = NULL;
ocfs2_dinode *fe = NULL;
ocfs2_dinode *dirfe;
ocfs_journal_handle *handle = NULL;
int credits;
+ ocfs2_alloc_context *inode_ac = NULL;
+ ocfs2_alloc_context *data_ac = NULL;
LOG_SET_CONTEXT(SYMLINK);
@@ -1494,8 +1554,6 @@
sb = dir->i_sb;
osb = OCFS_SB(sb);
- down_write(&OCFS_I(dir)->ip_io_sem);
-
inode = new_inode (sb);
if (IS_ERR (inode)) {
status = PTR_ERR(inode);
@@ -1514,7 +1572,7 @@
l = strlen (symname) + 1;
newsize = l - 1;
- credits = ocfs_calc_symlink_credits(sb, newsize);
+ credits = ocfs_calc_symlink_credits(sb);
handle = ocfs_alloc_handle(osb);
if (handle == NULL) {
@@ -1528,35 +1586,56 @@
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
- goto abort_trans;
+ goto bail;
}
-
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0, dir);
+ ocfs_handle_add_inode(handle, dir);
dirfe = (ocfs2_dinode *) parent_fe_bh->b_data;
if (!dirfe->i_links_count) {
/* can't make a file in a deleted directory. */
status = -ENOENT;
- goto abort_trans;
+ goto bail;
}
+ status = ocfs_prepare_dir_for_insert(osb, dir, parent_fe_bh,
+ dentry->d_name.name,
+ dentry->d_name.len, &de_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+
+ status = ocfs_reserve_new_inode(osb, handle, &inode_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+
+ status = ocfs_reserve_bits(osb, handle, 1, &data_ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status = -ENOMEM);
+ goto bail;
+ }
+
handle = ocfs_start_trans(osb, handle, credits);
if (handle == NULL) {
LOG_ERROR_STATUS (status = -ENOMEM);
goto bail;
}
+ ocfs_handle_set_always_commits(handle, 1);
status = ocfs_mknod_locked(osb, dir, dentry,
S_IFLNK | S_IRWXUGO, 0,
&new_fe_bh, parent_fe_bh, handle,
- inode);
+ inode, inode_ac);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto abort_trans;
+ goto bail;
}
fe = (ocfs2_dinode *) new_fe_bh->b_data;
-
+
if (ocfs_populate_inode (inode, fe, 1) < 0) {
LOG_ERROR_ARGS("populate inode failed! bh->b_blocknr=%llu, "
"i_blkno=%llu, i_ino=%lu\n",
@@ -1571,47 +1650,58 @@
if (status < 0)
LOG_ERROR_STATUS(status);
- status = ocfs_extend_file(osb, newsize, handle, inode, NULL, 0,
- new_fe_bh);
+ status = ocfs_extend_allocation(osb, inode, 1, new_fe_bh, handle,
+ data_ac, NULL, NULL);
if (status < 0) {
if (status != -ENOSPC && status != -EINTR) {
LOG_ERROR_ARGS ("Failed to extend file to %llu", newsize);
LOG_ERROR_STATUS(status);
status = -ENOSPC;
}
- goto abort_trans;
+ goto bail;
}
inode->i_rdev = 0;
inode->i_size = newsize;
inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ status = ocfs_mark_inode_dirty(handle, inode, new_fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
status = ocfs_inode_fill_ext_map(osb, new_fe_bh, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
- goto abort_trans;
+ goto bail;
}
status = ocfs_create_symlink_data(osb, handle, inode, symname);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
-abort_trans:
+ status = ocfs_add_entry(handle, dentry, inode, fe->i_blkno,
+ parent_fe_bh, de_bh);
if (status < 0) {
- ocfs_abort_trans(handle);
- } else {
- insert_inode_hash (inode);
- d_instantiate (dentry, inode);
-
- ocfs_commit_trans(handle);
+ LOG_ERROR_STATUS (status);
+ goto bail;
}
+ insert_inode_hash (inode);
+ d_instantiate (dentry, inode);
bail:
- up_write(&OCFS_I(dir)->ip_io_sem);
- if (new_fe_bh) {
+ if (handle)
+ ocfs_commit_trans(handle);
+ if (new_fe_bh)
brelse(new_fe_bh);
- }
if (parent_fe_bh)
brelse(parent_fe_bh);
+ if (de_bh)
+ brelse(de_bh);
+ if (inode_ac)
+ ocfs_free_alloc_context(inode_ac);
LOG_EXIT_STATUS (status);
@@ -1644,75 +1734,41 @@
return error_msg == NULL ? 1 : 0;
}
-static inline int ocfs_match (int len, const char * const name, struct ocfs2_dir_entry * de)
-{
- if (len != de->name_len)
- return 0;
- if (!de->inode)
- return 0;
- return !memcmp(name, de->name, len);
-}
-
/* we don't always have a dentry for what we want to add, so people
- * like orphan dir can call this instead. */
+ * like orphan dir can call this instead.
+ *
+ * If you pass me insert_bh, I'll skip the search of the other dir
+ * blocks and put the record in there.
+*/
static int __ocfs_add_entry (ocfs_journal_handle *handle, struct inode *dir,
const char *name, int namelen,
struct inode *inode, u64 blkno,
- struct buffer_head *parent_fe_bh)
+ struct buffer_head *parent_fe_bh,
+ struct buffer_head *insert_bh)
{
unsigned long offset;
unsigned short rec_len;
- struct buffer_head * bh;
struct ocfs2_dir_entry * de, * de1;
struct super_block * sb;
int retval, status;
- ocfs2_dinode *fe = NULL;
LOG_ENTRY();
+ OCFS_ASSERT(insert_bh);
sb = dir->i_sb;
if (!namelen)
return -EINVAL;
- bh = ocfs_bread (handle, dir, 0, 0, &retval, 0);
- if (!bh)
- return retval;
+
rec_len = OCFS2_DIR_REC_LEN(namelen);
offset = 0;
- de = (struct ocfs2_dir_entry *) bh->b_data;
+ de = (struct ocfs2_dir_entry *) insert_bh->b_data;
while (1) {
- if ((char *)de >= sb->s_blocksize + bh->b_data) {
- brelse (bh);
- bh = NULL;
- bh = ocfs_bread (handle, dir, offset >> sb->s_blocksize_bits, 1, &retval, 0);
- if (!bh)
- goto bail;
- if (dir->i_size <= offset) {
- if (dir->i_size == 0) {
- retval = -ENOENT;
- goto bail;
- }
-
- /* create next block */
- status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
- de = (struct ocfs2_dir_entry *) bh->b_data;
- de->inode = 0;
- de->rec_len = le16_to_cpu(sb->s_blocksize);
- dir->i_size = offset + sb->s_blocksize;
- status = ocfs_journal_dirty(handle, bh);
-
- /* update the parent file entry file size */
- status = ocfs_journal_access(handle, parent_fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
- fe = (ocfs2_dinode *) parent_fe_bh->b_data;
- fe->i_size = dir->i_size;
- status = ocfs_journal_dirty(handle, parent_fe_bh);
- } else {
- /* move to next block */
- de = (struct ocfs2_dir_entry *) bh->b_data;
- }
-
- }
- if (!ocfs_check_dir_entry (dir, de, bh, offset)) {
+ OCFS_ASSERT((char *)de < sb->s_blocksize + insert_bh->b_data);
+ /* These checks should've already been passed by the
+ * prepare function, but I guess we can leave them
+ * here anyway. */
+ if (!ocfs_check_dir_entry (dir, de, insert_bh, offset)) {
retval = -ENOENT;
goto bail;
}
@@ -1724,7 +1780,8 @@
(le16_to_cpu(de->rec_len) >= rec_len)) ||
(le16_to_cpu(de->rec_len) >=
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
- status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ status = ocfs_journal_access(handle, insert_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
@@ -1747,7 +1804,7 @@
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
dir->i_version++;
- status = ocfs_journal_dirty(handle, bh);
+ status = ocfs_journal_dirty(handle, insert_bh);
retval = 0;
goto bail;
}
@@ -1755,9 +1812,10 @@
de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
}
+ /* when you think about it, the assert above should prevent us
+ * from ever getting here. */
retval = -ENOSPC;
bail:
- brelse (bh);
LOG_EXIT_STATUS(retval);
return retval;
@@ -1904,7 +1962,7 @@
num++;
#warning questionable readahead stuff here
- bh = ocfs_bread(NULL, dir, b++, 0, &err, 1);
+ bh = ocfs_bread(dir, b++, &err, 1);
bh_use[ra_max] = bh;
#if 0 // ???
if (bh)
@@ -1956,44 +2014,62 @@
return ret;
}
-/*
- * ocfs_orphan_add()
- *
- */
-static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
- struct inode *inode, ocfs2_dinode *fe)
+static int ocfs_blkno_stringify(u64 blkno, char **retval)
{
- struct inode *orphan_dir_inode = NULL;
- struct buffer_head *orphan_dir_bh = NULL;
- int status = 0;
char *name = NULL;
int namelen;
- ocfs2_dinode *orphan_fe;
- LOG_ENTRY_ARGS("(inode->i_ino = %lu)\n", inode->i_ino);
+ LOG_ENTRY();
- /* create a unique name here. */
+ *retval = NULL;
name = kmalloc(OCFS2_MAX_FILENAME_LENGTH+1, GFP_KERNEL);
if (!name) {
- status = -EFAIL;
- LOG_ERROR_STATUS(status);
- goto leave;
+ namelen = -ENOMEM;
+ LOG_ERROR_STATUS(namelen);
+ goto bail;
}
namelen = snprintf(name, OCFS2_MAX_FILENAME_LENGTH+1, "%llu",
- OCFS_I(inode)->ip_blkno);
+ blkno);
if (namelen <= 0) {
- if (namelen)
- status = namelen;
- else
- status = -EFAIL;
+ kfree(name);
+ if (!namelen)
+ namelen = -EFAULT;
+ LOG_ERROR_STATUS(namelen);
+ goto bail;
+ }
+
+ LOG_TRACE_ARGS("built filename '%s' for orphan dir (len=%d)\n", name,
+ namelen);
+
+ *retval = name;
+bail:
+ LOG_EXIT_STATUS(namelen);
+ return(namelen);
+}
+
+static int ocfs_prepare_orphan_dir(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *inode,
+ char **ret_name,
+ struct buffer_head **de_bh)
+{
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ int status = 0;
+ char *name = NULL;
+ int namelen;
+
+ *ret_name = NULL;
+
+ /* create a unique name here. */
+ namelen = ocfs_blkno_stringify(OCFS_I(inode)->ip_blkno, &name);
+ if (namelen < 0) {
+ status = namelen;
LOG_ERROR_STATUS(status);
goto leave;
}
- LOG_TRACE_ARGS("adding filename '%s' to orphan dir (len=%d)\n", name,
- namelen);
-
orphan_dir_inode = ocfs_get_system_file_inode(osb,
ORPHAN_DIR_SYSTEM_INODE,
-1);
@@ -2002,7 +2078,6 @@
LOG_ERROR_STATUS(status);
goto leave;
}
- ocfs_handle_add_inode(handle, orphan_dir_inode);
/* disk lock orphan dir here. */
status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, 0,
@@ -2013,15 +2088,64 @@
}
ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, 0,
orphan_dir_inode);
+ ocfs_handle_add_inode(handle, orphan_dir_inode);
- status = __ocfs_add_entry(handle, orphan_dir_inode, name, namelen,
- inode, OCFS_I(inode)->ip_blkno,
- orphan_dir_bh);
+ status = ocfs_prepare_dir_for_insert(osb, orphan_dir_inode,
+ orphan_dir_bh, name, namelen,
+ de_bh);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto leave;
}
+ *ret_name = name;
+leave:
+ if (orphan_dir_inode)
+ iput(orphan_dir_inode);
+
+ if ((status < 0) && name)
+ kfree(name);
+
+ if (orphan_dir_bh)
+ brelse(orphan_dir_bh);
+
+ LOG_EXIT_STATUS(status);
+ return(status);
+}
+
+/*
+ * ocfs_orphan_add()
+ *
+ */
+static int ocfs_orphan_add(ocfs_super *osb, ocfs_journal_handle *handle,
+ struct inode *inode, ocfs2_dinode *fe,
+ char *name, struct buffer_head *de_bh)
+{
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ int status = 0;
+ int namelen;
+ ocfs2_dinode *orphan_fe;
+
+ LOG_ENTRY_ARGS("(inode->i_ino = %lu)\n", inode->i_ino);
+
+ namelen = strlen(name);
+
+ orphan_dir_inode = ocfs_get_system_file_inode(osb,
+ ORPHAN_DIR_SYSTEM_INODE,
+ -1);
+ if (!orphan_dir_inode) {
+ status = -EFAIL;
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
+ status = ocfs_read_bh(osb, OCFS_I(orphan_dir_inode)->ip_blkno << osb->sb->s_blocksize_bits, &orphan_dir_bh, OCFS_BH_CACHED, orphan_dir_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
status = ocfs_journal_access(handle, orphan_dir_bh,
OCFS_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -2042,16 +2166,19 @@
goto leave;
}
+ status = __ocfs_add_entry(handle, orphan_dir_inode, name, namelen,
+ inode, OCFS_I(inode)->ip_blkno,
+ orphan_dir_bh, de_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
fe->i_flags |= OCFS2_ORPHANED_FL;
-
leave:
if (orphan_dir_inode)
iput(orphan_dir_inode);
- if (name)
- kfree(name);
-
if (orphan_dir_bh)
brelse(orphan_dir_bh);
Modified: trunk/src/namei.h
===================================================================
--- trunk/src/namei.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/namei.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -40,5 +40,13 @@
int ocfs_orphan_del(ocfs_super *osb, ocfs_journal_handle *handle,
struct inode *orphan_dir_inode, struct inode *inode,
struct buffer_head *orphan_dir_bh);
+static inline int ocfs_match (int len, const char * const name, struct ocfs2_dir_entry * de)
+{
+ if (len != de->name_len)
+ return 0;
+ if (!de->inode)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
#endif /* OCFS2_NAMEI_H */
Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/ocfs.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -98,6 +98,13 @@
/* convenience macro */
#define OCFS_ASSERT(x) do { if (!(x)) BUG(); } while (0)
+#define OCFS_ASSERT_RO(x) \
+ do { \
+ if (!(x)) { \
+ printk(KERN_ERR "This should make the filesystem remount RO\n"); \
+ BUG(); \
+ } \
+ } while (0)
#define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
@@ -266,6 +273,8 @@
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
#define IS_VALID_EXTENT_BLOCK(ptr) \
(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
+#define IS_VALID_GROUP_DESC(ptr) \
+ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
/*
** Macros
@@ -360,7 +369,8 @@
__u32 uncommitted_holders;
__u8 lock_type;
struct rw_semaphore lock;
- unsigned long readonly_state;
+#warning readonly_state is an int, *_bit requires unsigned long
+ int readonly_state;
ocfs_node_map readonly_map;
};
@@ -815,15 +825,15 @@
/* these three used as 'type' in ocfs_bitmap_update */
#define DISK_ALLOC_EXTENT_NODE 2
#define DISK_ALLOC_VOLUME 3
-#define DISK_ALLOC_INODE 4
/* a bitmap update, currently used for freeing bits */
typedef struct ocfs_bitmap_update
{
- __u64 length;
- __u64 file_off;
- __u32 type;
- __s16 node_num;
+ u64 length;
+ u64 file_off;
+ u32 type;
+ s16 node_num;
+ u64 blkno;
}
ocfs_bitmap_update;
Modified: trunk/src/ocfs2_fs.h
===================================================================
--- trunk/src/ocfs2_fs.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/ocfs2_fs.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -56,6 +56,7 @@
#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
#define OCFS2_INODE_SIGNATURE "INODE01"
#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
+#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
/* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -96,7 +97,7 @@
#define OCFS2_BITMAP_FL (0x00000080) /* Allocation bitmap */
#define OCFS2_JOURNAL_FL (0x00000100) /* Node journal */
#define OCFS2_DLM_FL (0x00000200) /* DLM area */
-
+#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
/* Limit of space in ocfs2_dir_entry */
#define OCFS2_MAX_FILENAME_LENGTH 255
@@ -117,16 +118,13 @@
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
- GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE,
DLM_SYSTEM_INODE,
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE DLM_SYSTEM_INODE
GLOBAL_BITMAP_SYSTEM_INODE,
ORPHAN_DIR_SYSTEM_INODE,
#define OCFS2_LAST_GLOBAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
EXTENT_ALLOC_SYSTEM_INODE,
- EXTENT_ALLOC_BITMAP_SYSTEM_INODE,
INODE_ALLOC_SYSTEM_INODE,
- INODE_ALLOC_BITMAP_SYSTEM_INODE,
JOURNAL_SYSTEM_INODE,
LOCAL_ALLOC_SYSTEM_INODE,
NUM_SYSTEM_INODES
@@ -134,10 +132,9 @@
static char *ocfs2_system_inode_names[NUM_SYSTEM_INODES] = {
/* Global system inodes (single copy) */
- /* The first three are only used from userspace mfks/tunefs */
+ /* The first two are only used from userspace mfks/tunefs */
[BAD_BLOCK_SYSTEM_INODE] "bad_blocks",
[GLOBAL_INODE_ALLOC_SYSTEM_INODE] "global_inode_alloc",
- [GLOBAL_INODE_ALLOC_BITMAP_SYSTEM_INODE] "global_inode_alloc_bitmap",
/* These are used by the running filesystem */
[DLM_SYSTEM_INODE] "dlm",
@@ -146,9 +143,7 @@
/* Node-specific system inodes (one copy per node) */
[EXTENT_ALLOC_SYSTEM_INODE] "extent_alloc:%04d",
- [EXTENT_ALLOC_BITMAP_SYSTEM_INODE] "extent_alloc_bitmap:%04d",
[INODE_ALLOC_SYSTEM_INODE] "inode_alloc:%04d",
- [INODE_ALLOC_BITMAP_SYSTEM_INODE] "inode_alloc_bitmap:%04d",
[JOURNAL_SYSTEM_INODE] "journal:%04d",
[LOCAL_ALLOC_SYSTEM_INODE] "local_alloc:%04d"
};
@@ -225,6 +220,12 @@
/*10*/
} ocfs2_extent_rec;
+typedef struct _ocfs2_chain_rec {
+ __u32 c_free; /* Number of free bits in this chain. */
+ __u32 c_total; /* Number of total bits in this chain */
+ __u64 c_blkno; /* Physical disk offset (blocks) of 1st group */
+} ocfs2_chain_rec;
+
/*
* On disk extent list for OCFS2 (node in the tree). Note that this
* is contained inside ocfs2_dinode or ocfs2_extent_block, so the
@@ -245,16 +246,30 @@
} ocfs2_extent_list;
/*
+ * On disk allocation chain list for OCFS2. Note that this is
+ * contained inside ocfs2_dinode, so the offsets are relative to
+ * ocfs2_dinode.id2.i_chain.
+ */
+typedef struct _ocfs2_chain_list {
+/*00*/ __u16 cl_cpg; /* Clusters per Block Group */
+ __u16 cl_bpc; /* Bits per cluster */
+ __u16 cl_count; /* Total chains in this list */
+ __u16 cl_next_free_rec; /* Next unused chain slot */
+ __u64 cl_reserved1;
+/*10*/ ocfs2_chain_rec cl_recs[0]; /* Chain records */
+} ocfs2_chain_list;
+
+/*
* On disk extent block (indirect block) for OCFS2
*/
typedef struct _ocfs2_extent_block
{
/*00*/ __u8 h_signature[8]; /* Signature for verification */
- __u64 h_suballoc_blkno; /* Node suballocator offset,
- in blocks */
+ __u64 h_reserved1;
/*10*/ __s16 h_suballoc_node; /* Node suballocator this
extent_header belongs to */
- __u16 h_reserved1;
+ __u16 h_suballoc_bit; /* Bit offset in suballocater
+ block group */
__u32 h_reserved2;
__u64 h_blkno; /* Offset on disk, in blocks */
/*20*/ __u64 h_parent_blk; /* Offset on disk, in blocks,
@@ -273,12 +288,9 @@
typedef struct _ocfs2_disk_lock
{
/*00*/ __s16 dl_master; /* Node number of current master */
- __u16 dl_reserved1;
__u8 dl_level; /* Lock level */
- __u8 dl_reserved2[3]; /* Pad to u64 */
- __u64 dl_reserved3; /* was dl_seq_num */
-/*10*/ __u32 dl_reserved4[8]; /* was dl_node_map */
-/*30*/
+ __u8 dl_reserved1;
+/*04*/
} ocfs2_disk_lock;
/*
@@ -338,45 +350,43 @@
typedef struct _ocfs2_dinode {
/*00*/ __u8 i_signature[8]; /* Signature for validation */
__u32 i_generation; /* Generation number */
- __u16 i_reserved1;
__s16 i_suballoc_node; /* Node suballocater this inode
belongs to */
-/*10*/ __u64 i_suballoc_blkno; /* Node suballocator offset,
- in blocks */
-/*18*/ ocfs2_disk_lock i_disk_lock; /* Lock structure */
-/*48*/ __u32 i_uid; /* Owner UID */
+ __u16 i_suballoc_bit; /* Bit offset in suballocater
+ block group */
+/*10*/ ocfs2_disk_lock i_disk_lock; /* Lock structure */
+/*14*/ __u32 i_clusters; /* Cluster count */
+/*18*/ __u32 i_uid; /* Owner UID */
__u32 i_gid; /* Owning GID */
-/*50*/ __u64 i_size; /* Size in bytes */
+/*20*/ __u64 i_size; /* Size in bytes */
__u16 i_mode; /* File mode */
__u16 i_links_count; /* Links count */
__u32 i_flags; /* File flags */
-/*60*/ __u64 i_atime; /* Access time */
+/*30*/ __u64 i_atime; /* Access time */
__u64 i_ctime; /* Creation time */
-/*70*/ __u64 i_mtime; /* Modification time */
+/*40*/ __u64 i_mtime; /* Modification time */
__u64 i_dtime; /* Deletion time */
-/*80*/ __u64 i_blkno; /* Offset on disk, in blocks */
- __u32 i_clusters; /* Cluster count */
- __u32 i_reserved2;
-/*90*/ __u64 i_last_eb_blk; /* Pointer to last extent
+/*50*/ __u64 i_blkno; /* Offset on disk, in blocks */
+ __u64 i_last_eb_blk; /* Pointer to last extent
block */
- __u64 i_reserved3;
-/*A0*/ __u64 i_reserved4;
- __u64 i_reserved5;
-/*B0*/ __u64 i_reserved6;
- union {
- __u64 i_pad1; /* Generic way to refer to this 64bit
- union */
+/*60*/ __u64 i_reserved1[11];
+/*B8*/ union {
+ __u64 i_pad1; /* Generic way to refer to this
+ 64bit union */
struct {
__u64 i_rdev; /* Device number */
} dev1;
- struct { /* Info for bitmap system inodes */
+ struct { /* Info for bitmap system
+ inodes */
__u32 i_used; /* Bits (ie, clusters) used */
- __u32 i_total; /* Total bits (clusters) available */
+ __u32 i_total; /* Total bits (clusters)
+ available */
} bitmap1;
} id1; /* Inode type dependant 1 */
/*C0*/ union {
ocfs2_super_block i_super;
- ocfs2_local_alloc i_lab;
+ ocfs2_local_alloc i_lab;
+ ocfs2_chain_list i_chain;
ocfs2_extent_list i_list;
} id2;
/* Actual on-disk size is one block */
@@ -394,8 +404,29 @@
/* Actual on-disk length specified by rec_len */
};
+/*
+ * On disk allocator group structure for OCFS2
+ */
+typedef struct _ocfs2_group_desc
+{
+/*00*/ __u8 bg_signature[8]; /* Signature for validation */
+ __u16 bg_size; /* Size of included bitmap in
+ bytes. */
+ __u16 bg_bits; /* Bits represented by this
+ group. */
+ __u16 bg_free_bits_count; /* Free bits count */
+ __u16 bg_chain; /* What chain I am in. */
+/*10*/ __u32 bg_generation;
+ __u32 bg_reserved1;
+ __u64 bg_next_group; /* Next group in my list, in
+ blocks */
+/*20*/ __u64 bg_parent_dinode; /* dinode which owns me, in
+ blocks */
+ __u64 bg_blkno; /* Offset on disk, in blocks */
+/*30*/ __u64 bg_reserved2[2];
+/*40*/ __u8 bg_bitmap[0];
+} ocfs2_group_desc;
-
#ifdef __KERNEL__
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
{
@@ -407,6 +438,16 @@
return size / sizeof(struct _ocfs2_extent_rec);
}
+static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_chain.cl_recs);
+
+ return size / sizeof(struct _ocfs2_chain_rec);
+}
+
static inline int ocfs2_extent_recs_per_eb(struct super_block *sb)
{
int size;
@@ -426,6 +467,16 @@
return size;
}
+
+static inline int ocfs2_group_bitmap_size(struct super_block *sb)
+{
+ int size;
+
+ size = sb->s_blocksize -
+ offsetof(struct _ocfs2_group_desc, bg_bitmap);
+
+ return size;
+}
#else
static inline int ocfs2_extent_recs_per_inode(int blocksize)
{
@@ -437,6 +488,16 @@
return size / sizeof(struct _ocfs2_extent_rec);
}
+static inline int ocfs2_chain_recs_per_inode(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_dinode, id2.i_chain.cl_recs);
+
+ return size / sizeof(struct _ocfs2_chain_rec);
+}
+
static inline int ocfs2_extent_recs_per_eb(int blocksize)
{
int size;
@@ -456,6 +517,16 @@
return size;
}
+
+static inline int ocfs2_group_bitmap_size(int blocksize)
+{
+ int size;
+
+ size = blocksize -
+ offsetof(struct _ocfs2_group_desc, bg_bitmap);
+
+ return size;
+}
#endif /* __KERNEL__ */
Modified: trunk/src/ocfs_journal.h
===================================================================
--- trunk/src/ocfs_journal.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/ocfs_journal.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -185,7 +185,10 @@
/* We know how many buffers (max) we'll have for this
* transaction so we can just allocate an array of pointers at
- * the same time as the creation of this handle. */
+ * the same time as the creation of this handle.
+ *
+ * NOTE: 'num_buffs' will always be zero if your transaction
+ * is roll-forward only. */
int num_buffs;
struct buffer_head **buffs;
@@ -216,8 +219,8 @@
#define OCFS_HANDLE_SYNC 2
/* This is really the right way to do things, but until we fix all the
* code, it's a performance improvement for a handle which never
- * aborts. Should be set before passing any buffers to
- * journal_access! */
+ * aborts (always roll-forward). Should be set before passing any
+ * buffers to journal_access! */
#define OCFS_HANDLE_ALWAYS_COMMITS 4
static inline void ocfs_handle_free_all_copyout(ocfs_journal_handle *handle)
@@ -249,8 +252,9 @@
}
static inline int ocfs_handle_add_commit_bits(ocfs_journal_handle *handle,
- __u32 len, __u32 fileoff,
- __u32 nodenum, __u32 type)
+ u32 len, u32 fileoff,
+ u32 nodenum, u64 blkno,
+ u32 type)
{
int ret = 0;
if (!handle->commit_bits)
@@ -262,7 +266,7 @@
ret = ocfs_add_to_bitmap_free_head(handle->osb,
handle->commit_bits,
len, fileoff,
- nodenum, type);
+ nodenum, blkno, type);
return ret;
}
@@ -301,6 +305,9 @@
* this handle.
* ocfs_commit_trans - Complete a handle.
* ocfs_abort_trans - Abort a handle.
+ * ocfs_extend_trans - Extend a handle by nblocks credits. This may
+ * commit the handle to disk in the process, but will
+ * not release any locks taken during the transaction.
* ocfs_journal_access - Notify the handle that we want to journal this
* buffer. Will have to call ocfs_journal_dirty once
* we've actually dirtied it. Type is one of . or .
@@ -321,6 +328,9 @@
int max_buffs);
void ocfs_commit_trans(ocfs_journal_handle *handle);
void ocfs_abort_trans(ocfs_journal_handle *handle);
+int ocfs_extend_trans(ocfs_journal_handle *handle,
+ int nblocks);
+
/*
* Create access is for when we get a newly created buffer and we're
* not gonna read it off disk, but rather fill it ourselves. If it's
@@ -395,6 +405,9 @@
(OCFS_SINGLE_FILE_EXTEND_CREDITS * 4) + \
OCFS_JOURNAL_FUZZ_CREDITS)
+/* local alloc metadata change + main bitmap updates */
+#define OCFS_WINDOW_MOVE_CREDITS (1 + 8 + OCFS_JOURNAL_FUZZ_CREDITS)
+
/* single file metadata updates * 3 because we might have to extend
* the file alloc and file alloc bitmap files + possible update to
* local bitmap. + 2 blocks for bits to set in the metadata alloc
@@ -402,18 +415,15 @@
#define OCFS_FILE_EXTEND_CREDITS (OCFS_SINGLE_FILE_EXTEND_CREDITS * 3 \
+ 1 + 2 + 8 + OCFS_JOURNAL_FUZZ_CREDITS)
-
/* Now that we journal bitmap writes, this might get a bit more
* complicated, use this function to determine how many credits are
* needed for an extend. Unfortunately, we're in bytes because the
* rest of the file system is.
*/
static inline int ocfs_calc_extend_credits(struct super_block *sb,
- __u32 bytes_wanted)
+ __u32 bits_wanted)
{
int bitmap_blocks, sysfile_bitmap_blocks;
- unsigned int bits_wanted;
- bits_wanted = ocfs_clusters_for_bytes(sb, bytes_wanted);
/* take advantage of the fact that we always allocate in one
* large chunk. */
bitmap_blocks = ocfs_blocks_for_bits(sb, bits_wanted) + 1;
@@ -434,16 +444,25 @@
return (bitmap_blocks + sysfile_bitmap_blocks + OCFS_FILE_EXTEND_CREDITS);
}
-static inline int ocfs_calc_symlink_credits(struct super_block *sb,
- int size)
+static inline int ocfs_calc_symlink_credits(struct super_block *sb)
{
/* get our fuzz from mknod and extend credits. */
int blocks = OCFS_MKNOD_CREDITS + 1;
- blocks += ocfs_calc_extend_credits(sb, size);
+ blocks += ocfs_calc_extend_credits(sb, 1);
+ blocks += ocfs_clusters_to_blocks(sb, 1);
- blocks += (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ return(blocks);
+}
+static inline int ocfs_calc_group_alloc_credits(struct super_block *sb,
+ unsigned int cpg)
+{
+ int blocks;
+ int bitmap_blocks = ocfs_blocks_for_bits(sb, cpg) + 1;
+ /* parent inode update + new block group header + bitmap inode update
+ + bitmap blocks affected */
+ blocks = 1 + 1 + 1 + bitmap_blocks + OCFS_JOURNAL_FUZZ_CREDITS;
return(blocks);
}
@@ -452,7 +471,7 @@
/* the file entry + the locknode + possibily the parent dirnode + fuzz */
/* ok, these credits are messed up and need to be re calculated. */
-#define OCFS_FILE_DELETE_CREDITS (1 + 1 + 1 + OCFS_JOURNAL_FUZZ_CREDITS)
+#define OCFS_FILE_DELETE_CREDITS (2 + 1 + 1 + 1 + OCFS_JOURNAL_FUZZ_CREDITS)
/* fe change, locknode change, dirnode head, times two plus a possible
* delete, plus a possible dirnode addition in insert_file, and fuzz */
Modified: trunk/src/ocfs_log.h
===================================================================
--- trunk/src/ocfs_log.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/ocfs_log.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -112,8 +112,8 @@
#define OCFS_DEBUG_CONTEXT_FILE 0x00000200 /* file.c */
#define OCFS_DEBUG_CONTEXT_INODE 0x00000400 /* inode.c */
#define OCFS_DEBUG_CONTEXT_JOURNAL 0x00000800 /* journal.c */
-#define OCFS_DEBUG_CONTEXT_UNUSED1 0x00001000 /* */
-#define OCFS_DEBUG_CONTEXT_UNUSED2 0x00002000 /* */
+#define OCFS_DEBUG_CONTEXT_CHAINALLOC 0x00001000 /* */
+#define OCFS_DEBUG_CONTEXT_LOCALALLOC 0x00002000 /* */
#define OCFS_DEBUG_CONTEXT_SYSFILE 0x00004000 /* sysfile.c */
#define OCFS_DEBUG_CONTEXT_VOLCFG 0x00008000 /* volcfg.c */
#define OCFS_DEBUG_CONTEXT_DCACHE 0x00010000 /* dcache.c */
Added: trunk/src/suballoc.c
===================================================================
--- trunk/src/suballoc.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/suballoc.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -0,0 +1,1091 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * suballoc.c
+ *
+ * metadata alloc and free
+ * Inspired by ext3 block groups.
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Mark Fasheh, Kurt Hackel, Joel Becker, Sunil Mushran,
+ * Wim Coekaerts, Manish Singh
+ */
+
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "dlm.h"
+#include "util.h"
+#include "suballoc.h"
+#include "sysfile.h"
+
+#include "ocfs_journal.h"
+#include "buffer_head_io.h"
+
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_CHAINALLOC
+
+static inline void debug_bg(ocfs2_group_desc *bg);
+static inline void debug_suballoc_inode(ocfs2_dinode *fe);
+static inline u16 ocfs2_find_victim_chain(ocfs2_chain_list *cl);
+static int ocfs2_block_group_fill(ocfs_journal_handle *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *bg_bh,
+ u64 group_blkno,
+ u16 my_chain,
+ ocfs2_chain_list *cl);
+static int ocfs_block_group_alloc(ocfs_super *osb,
+ struct inode *alloc_inode,
+ struct buffer_head *bh);
+static int ocfs_reserve_suballoc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac);
+static int ocfs_claim_suballoc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 bits_wanted,
+ u16 *bit_off,
+ unsigned int *num_bits,
+ u64 *bg_blkno);
+static int ocfs_block_group_find_clear_bits(ocfs_super *osb,
+ ocfs2_group_desc *bg,
+ unsigned int bits_wanted,
+ u16 *bit_off,
+ u16 *bits_found);
+static inline int ocfs_block_group_set_bits(ocfs_journal_handle *handle,
+ ocfs2_group_desc *bg,
+ struct buffer_head *group_bh,
+ unsigned int bit_off,
+ unsigned int num_bits);
+static int ocfs_relink_block_group(ocfs_journal_handle *handle,
+ struct buffer_head *fe_bh,
+ struct buffer_head *bg_bh,
+ struct buffer_head *prev_bg_bh,
+ u16 chain);
+static inline int ocfs_block_group_reasonably_empty(ocfs2_group_desc *bg);
+static inline u64 ocfs_which_suballoc_group(u64 block, unsigned int bit);
+
+static int ocfs2_block_group_fill(ocfs_journal_handle *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *bg_bh,
+ u64 group_blkno,
+ u16 my_chain,
+ ocfs2_chain_list *cl)
+{
+ int status = 0;
+ ocfs2_group_desc *bg = (ocfs2_group_desc *) bg_bh->b_data;
+ struct super_block * sb = alloc_inode->i_sb;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT(((unsigned long long) bg_bh->b_blocknr) == group_blkno);
+
+ set_buffer_uptodate(bg_bh);
+ SET_BH_SEQNUM(alloc_inode, bg_bh);
+ status = ocfs_journal_access(handle,
+ bg_bh,
+ OCFS_JOURNAL_ACCESS_CREATE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ memset(bg, 0, sb->s_blocksize);
+ strcpy (bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
+ bg->bg_generation = cpu_to_le32(alloc_inode->i_generation);
+ bg->bg_size = ocfs2_group_bitmap_size(sb);
+ bg->bg_bits = (u32) cl->cl_cpg * (u32) cl->cl_bpc;
+ bg->bg_chain = my_chain;
+ bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
+ bg->bg_parent_dinode = OCFS_I(alloc_inode)->ip_blkno;
+ bg->bg_blkno = group_blkno;
+ /* set the 1st bit in the bitmap to account for the descriptor block */
+ set_bit(0, bg->bg_bitmap);
+ bg->bg_free_bits_count = bg->bg_bits - 1;
+
+ status = ocfs_journal_dirty(handle, bg_bh);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+#warning "we need to zero out the other blocks in the group! (only inode alloc?)"
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static inline u16 ocfs2_find_smallest_chain(ocfs2_chain_list *cl)
+{
+ u16 curr, best;
+
+ best = curr = 0;
+ while (curr < cl->cl_count) {
+ if (cl->cl_recs[best].c_total > cl->cl_recs[curr].c_total)
+ best = curr;
+ curr++;
+ }
+ return best;
+}
+#ifdef OCFS_BG_ZERO
+static struct buffer_head **ocfs_block_group_zero_start(ocfs_super *osb,
+ u32 bit_off,
+ u16 clusters)
+{
+ struct buffer_head **bhs = NULL;
+ unsigned int blocks;
+ u64 blkno;
+ int i;
+
+ LOG_ENTRY();
+
+ blocks = ocfs_clusters_to_blocks(osb->sb, (u32) clusters) - (u64) 1;
+ bhs = kmalloc(blocks * sizeof(struct buffer_head *), GFP_KERNEL);
+ if (!bhs) {
+ LOG_ERROR_STATUS(-ENOMEM);
+ goto bail;
+ }
+ memset(bhs, 0, blocks * sizeof(struct buffer_head *));
+
+ blkno = ocfs_clusters_to_blocks(osb->sb, bit_off) + (u64) 1;
+ for(i = 0; i < blocks; i++) {
+ bhs[i] = sb_getblk(osb->sb, ((u64) i + blkno));
+ if (!bhs[i]) {
+ kfree(bhs);
+ bhs = NULL;
+ LOG_ERROR_STATUS(-EIO);
+ goto bail;
+ }
+ lock_buffer(bhs[i]);
+ OCFS_ASSERT(!buffer_jbd(bhs[i]));
+
+ memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
+ set_buffer_uptodate(bhs[i]);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ clear_buffer_dirty(bhs[i]);
+#else
+ mark_buffer_clean(bhs[i]);
+#endif
+
+ bhs[i]->b_end_io = ocfs_end_buffer_io_sync;
+ submit_bh(WRITE, bhs[i]);
+ }
+bail:
+ LOG_EXIT();
+ return bhs;
+}
+
+static void ocfs_block_group_zero_wait(ocfs_super *osb,
+ struct buffer_head **bhs,
+ u16 clusters)
+{
+ unsigned int blocks =
+ ocfs_clusters_to_blocks(osb->sb, (u32) clusters) - (u64) 1;
+
+ do {
+ blocks--;
+ wait_on_buffer(bhs[i]);
+ brelse(bhs[i]);
+ } while (blocks);
+
+ kfree(bhs);
+ return;
+}
+#endif
+/*
+ * We expect the block group allocator to already be locked.
+ */
+static int ocfs_block_group_alloc(ocfs_super *osb,
+ struct inode *alloc_inode,
+ struct buffer_head *bh)
+{
+ int status, credits;
+ ocfs2_dinode *fe = (ocfs2_dinode *) bh->b_data;
+ ocfs2_chain_list *cl;
+ ocfs2_alloc_context *ac = NULL;
+ ocfs_journal_handle *handle = NULL;
+ u32 bit_off, num_bits;
+ u16 alloc_rec;
+ u64 bg_blkno;
+ struct buffer_head *bg_bh = NULL;
+ ocfs2_group_desc *bg;
+#ifdef OCFS_BG_ZERO
+ struct buffer_head **zero_bhs = NULL;
+#endif
+
+ LOG_ENTRY();
+
+ handle = ocfs_alloc_handle(osb);
+ if (!handle) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ cl = &fe->id2.i_chain;
+ status = ocfs_reserve_bits(osb,
+ handle,
+ cl->cl_cpg,
+ &ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ credits = ocfs_calc_group_alloc_credits(osb->sb, cl->cl_cpg);
+ handle = ocfs_start_trans(osb, handle, credits);
+ if (!handle) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ ocfs_handle_set_always_commits(handle, 1);
+
+ status = ocfs_claim_bits(osb,
+ handle,
+ ac,
+ cl->cl_cpg,
+ &bit_off,
+ &num_bits);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+#ifdef OCFS_BG_ZERO
+ status = ocfs_block_group_zero_start(osb, bit_off, cl->cpg, &zero_bhs);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+#endif
+ alloc_rec = ocfs2_find_smallest_chain(cl);
+
+ /* setup the group */
+ bg_blkno = ocfs_clusters_to_blocks(osb->sb, bit_off);
+ LOG_TRACE_ARGS("new descriptor, record %u, at block %llu\n",
+ alloc_rec, bg_blkno);
+
+ bg_bh = sb_getblk(osb->sb, bg_blkno);
+ if (!bg_bh) {
+ status = -EIO;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs2_block_group_fill(handle,
+ alloc_inode,
+ bg_bh,
+ bg_blkno,
+ alloc_rec,
+ cl);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bg = (ocfs2_group_desc *) bg_bh->b_data;
+
+ status = ocfs_journal_access(handle, bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ cl->cl_recs[alloc_rec].c_free += bg->bg_free_bits_count;
+ cl->cl_recs[alloc_rec].c_total += bg->bg_bits;
+ cl->cl_recs[alloc_rec].c_blkno = bg_blkno;
+ if (cl->cl_next_free_rec < cl->cl_count)
+ cl->cl_next_free_rec++;
+
+ fe->id1.bitmap1.i_used += (bg->bg_bits - bg->bg_free_bits_count);
+ fe->id1.bitmap1.i_total += bg->bg_bits;
+ fe->i_clusters += cl->cl_cpg;
+
+ status = ocfs_journal_dirty(handle, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ down (&(OCFS_I(alloc_inode)->ip_sem));
+ OCFS_I(alloc_inode)->ip_alloc_size =
+ (u64)fe->i_clusters << osb->s_clustersize_bits;
+ fe->i_size = OCFS_I(alloc_inode)->ip_alloc_size;
+ OCFS_I(alloc_inode)->u.ip_bitinfo.used_bits = fe->id1.bitmap1.i_used;
+ OCFS_I(alloc_inode)->u.ip_bitinfo.total_bits = fe->id1.bitmap1.i_total;
+ up (&(OCFS_I(alloc_inode)->ip_sem));
+ alloc_inode->i_size = fe->i_size;
+ alloc_inode->i_blocks = (alloc_inode->i_size + osb->sb->s_blocksize - 1) >> osb->sb->s_blocksize_bits;
+
+ status = 0;
+bail:
+#ifdef OCFS_BG_ZERO
+ if (zero_bhs)
+ ocfs_block_group_zero_wait(osb, zero_bhs, cl->cl_cpg);
+#endif
+ if (handle)
+ ocfs_commit_trans(handle);
+
+ if (ac)
+ ocfs_free_alloc_context(ac);
+
+ if (bg_bh)
+ brelse(bg_bh);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static int ocfs_reserve_suballoc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac)
+{
+ int status;
+ u32 bits_wanted = ac->ac_bits_wanted;
+ struct inode *alloc_inode = ac->ac_inode;
+ struct buffer_head *bh = NULL;
+ ocfs2_dinode *fe;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT(!(handle->flags & OCFS_HANDLE_STARTED));
+
+ status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
+ 0, &bh, alloc_inode);
+ if (status < 0) {
+ if (status != -EINTR)
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
+ 0, alloc_inode);
+ ocfs_handle_add_inode(handle, alloc_inode);
+
+ fe = (ocfs2_dinode *) bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
+ OCFS_ASSERT_RO(fe->i_flags & OCFS2_CHAIN_FL);
+
+ if (bits_wanted > (le32_to_cpu(fe->id1.bitmap1.i_total) -
+ le32_to_cpu(fe->id1.bitmap1.i_used))) {
+ status = ocfs_block_group_alloc(osb, alloc_inode, bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ /* You should never ask for this much metadata */
+ OCFS_ASSERT(bits_wanted <=
+ (le32_to_cpu(fe->id1.bitmap1.i_total)
+ - le32_to_cpu(fe->id1.bitmap1.i_used)));
+ }
+
+ get_bh(bh);
+ ac->ac_bh = bh;
+ status = 0;
+bail:
+ if (bh)
+ brelse(bh);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+int ocfs_reserve_new_metadata(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_dinode *fe,
+ ocfs2_alloc_context **ac)
+{
+ int status;
+ struct inode *alloc_inode = NULL;
+
+ *ac = kmalloc(sizeof(ocfs2_alloc_context), GFP_KERNEL);
+ if (!(*ac)) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(*ac, 0, sizeof(ocfs2_alloc_context));
+ /* Our file data alloc path is such a mess that I really feel
+ * comfortable just always over-reserving here. */
+ (*ac)->ac_bits_wanted = 2 * ocfs2_extend_meta_needed(fe);
+ (*ac)->ac_handle = handle;
+ (*ac)->ac_which = OCFS_AC_USE_META;
+
+#ifndef OCFS_USE_ALL_METADATA_SUBALLOCATORS
+ alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
+#else
+ alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, osb->node_num);
+#endif
+ if (!alloc_inode) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ (*ac)->ac_inode = igrab(alloc_inode);
+
+ status = ocfs_reserve_suballoc_bits(osb, handle, (*ac));
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ if ((status < 0) && *ac) {
+ ocfs_free_alloc_context(*ac);
+ *ac = NULL;
+ }
+
+ if (alloc_inode)
+ iput(alloc_inode);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+int ocfs_reserve_new_inode(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context **ac)
+{
+ int status;
+ struct inode *alloc_inode = NULL;
+
+ *ac = kmalloc(sizeof(ocfs2_alloc_context), GFP_KERNEL);
+ if (!(*ac)) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(*ac, 0, sizeof(ocfs2_alloc_context));
+ (*ac)->ac_bits_wanted = 1;
+ (*ac)->ac_handle = handle;
+ (*ac)->ac_which = OCFS_AC_USE_INODE;
+
+ alloc_inode = ocfs_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, osb->node_num);
+ if (!alloc_inode) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ (*ac)->ac_inode = igrab(alloc_inode);
+
+ status = ocfs_reserve_suballoc_bits(osb, handle, *ac);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ if ((status < 0) && *ac) {
+ ocfs_free_alloc_context(*ac);
+ *ac = NULL;
+ }
+
+ if (alloc_inode)
+ iput(alloc_inode);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static int ocfs_block_group_find_clear_bits(ocfs_super *osb,
+ ocfs2_group_desc *bg,
+ unsigned int bits_wanted,
+ u16 *bit_off,
+ u16 *bits_found)
+{
+ void *bitmap;
+ u16 best_offset, best_size;
+ int offset, start, found, status = 0;
+
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(bg));
+
+ found = start = best_offset = best_size = 0;
+ bitmap = bg->bg_bitmap;
+
+ while((offset = find_next_zero_bit(bitmap,
+ bg->bg_bits,
+ start)) != -1) {
+ if (offset == bg->bg_bits)
+ break;
+
+ if (offset == start) {
+ /* we found a zero */
+ found++;
+ start++;
+ } else {
+ /* got a zero after some ones */
+ found = 1;
+ start = offset + 1;
+ }
+ if (found > best_size) {
+ best_size = found;
+ best_offset = start - found;
+ }
+ /* we got everything we needed */
+ if (found == bits_wanted) {
+ /* LOG_TRACE_STR("Found it all!"); */
+ break;
+ }
+ }
+
+ if (found == bits_wanted) {
+ *bit_off = start - found;
+ *bits_found = found;
+ } else if (best_offset) {
+ *bit_off = best_offset;
+ *bits_found = best_size;
+ } else {
+ status = -ENOSPC;
+ LOG_ERROR_STATUS(status);
+ }
+
+ return status;
+}
+
+static inline int ocfs_block_group_set_bits(ocfs_journal_handle *handle,
+ ocfs2_group_desc *bg,
+ struct buffer_head *group_bh,
+ unsigned int bit_off,
+ unsigned int num_bits)
+{
+ int status;
+ void *bitmap = bg->bg_bitmap;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(bg));
+ OCFS_ASSERT(bg->bg_free_bits_count >= num_bits);
+
+ LOG_TRACE_ARGS("block_group_set_bits: off = %u, num = %u\n", bit_off,
+ num_bits);
+
+ status = ocfs_journal_access(handle,
+ group_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bg->bg_free_bits_count -= num_bits;
+
+ while(num_bits--)
+ set_bit(bit_off++, bitmap);
+
+ status = ocfs_journal_dirty(handle,
+ group_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+/* find the one with the most empty bits */
+static inline u16 ocfs2_find_victim_chain(ocfs2_chain_list *cl)
+{
+ u16 curr, best;
+
+ OCFS_ASSERT(cl->cl_next_free_rec);
+
+ best = curr = 0;
+ while (curr < cl->cl_next_free_rec) {
+ if (cl->cl_recs[curr].c_free > cl->cl_recs[best].c_free)
+ best = curr;
+ curr++;
+ }
+
+ OCFS_ASSERT(best < cl->cl_next_free_rec);
+ return best;
+}
+
+static int ocfs_relink_block_group(ocfs_journal_handle *handle,
+ struct buffer_head *fe_bh,
+ struct buffer_head *bg_bh,
+ struct buffer_head *prev_bg_bh,
+ u16 chain)
+{
+ int status;
+ /* there is a really tiny chance the journal calls could fail,
+ * but we wouldn't want inconsistent blocks in *any* case. */
+ u64 fe_ptr, bg_ptr, prev_bg_ptr;
+ ocfs2_dinode *fe = (ocfs2_dinode *) fe_bh->b_data;
+ ocfs2_group_desc *bg = (ocfs2_group_desc *) bg_bh->b_data;
+ ocfs2_group_desc *prev_bg = (ocfs2_group_desc *) prev_bg_bh->b_data;
+
+ OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(bg));
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(prev_bg));
+
+ printk("In suballoc %llu, chain %u, move group %llu to top, "
+ "prev = %llu\n", fe->i_blkno, chain, bg->bg_blkno,
+ prev_bg->bg_blkno);
+
+ fe_ptr = fe->id2.i_chain.cl_recs[chain].c_blkno;
+ bg_ptr = bg->bg_next_group;
+ prev_bg_ptr = prev_bg->bg_next_group;
+
+ status = ocfs_journal_access(handle, prev_bg_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ prev_bg->bg_next_group = bg->bg_next_group;
+
+ status = ocfs_journal_dirty(handle, prev_bg_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_journal_access(handle, bg_bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
+
+ status = ocfs_journal_dirty(handle, bg_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_journal_access(handle, fe_bh, OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
+
+ status = ocfs_journal_dirty(handle, fe_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ if (status < 0) {
+ fe->id2.i_chain.cl_recs[chain].c_blkno = fe_ptr;
+ bg->bg_next_group = bg_ptr;
+ prev_bg->bg_next_group = prev_bg_ptr;
+ }
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static inline int ocfs_block_group_reasonably_empty(ocfs2_group_desc *bg)
+{
+ return bg->bg_free_bits_count >= (bg->bg_bits / 2);
+}
+
+/* will give out up to bits_wanted contiguous bits. */
+static int ocfs_claim_suballoc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 bits_wanted,
+ u16 *bit_off,
+ unsigned int *num_bits,
+ u64 *bg_blkno)
+{
+ int status, groups_read;
+ struct inode *alloc_inode = ac->ac_inode;
+ struct buffer_head *group_bh = NULL;
+ struct buffer_head *prev_group_bh = NULL;
+ ocfs2_chain_list *cl;
+ ocfs2_dinode *fe;
+ ocfs2_group_desc *bg;
+ u16 chain, tmp_bits;
+ u64 next_group;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT(ac->ac_bits_given < ac->ac_bits_wanted);
+ OCFS_ASSERT(ac->ac_handle == handle);
+ OCFS_ASSERT(bits_wanted <= (ac->ac_bits_wanted - ac->ac_bits_given));
+ OCFS_ASSERT(ac->ac_bh);
+
+ fe = (ocfs2_dinode *) ac->ac_bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
+ OCFS_ASSERT_RO(fe->id1.bitmap1.i_used < fe->id1.bitmap1.i_total);
+
+ cl = (ocfs2_chain_list *) &fe->id2.i_chain;
+
+ chain = ocfs2_find_victim_chain(cl);
+
+ LOG_TRACE_ARGS("trying to alloc %u bits from chain %u, inode %llu\n",
+ bits_wanted, chain, OCFS_I(alloc_inode)->ip_blkno);
+
+ status = ocfs_read_bh(osb,
+ cl->cl_recs[chain].c_blkno << osb->sb->s_blocksize_bits,
+ &group_bh,
+ OCFS_BH_CACHED,
+ alloc_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ bg = (ocfs2_group_desc *) group_bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(bg));
+
+ /* for now, the chain search is a bit simplistic. We just use
+ * the 1st group with any empty bits. */
+ groups_read = 1;
+ while (!bg->bg_free_bits_count) {
+ /*
+ * This means we've walked off the end of a chain that
+ * we thought had bits, but didn't. While this
+ * _could_ be a code error, it is more likely to be
+ * corruption on disk.
+ */
+ OCFS_ASSERT_RO(bg->bg_next_group);
+
+ if (prev_group_bh) {
+ brelse(prev_group_bh);
+ prev_group_bh = NULL;
+ }
+ next_group = bg->bg_next_group;
+ prev_group_bh = group_bh;
+ group_bh = NULL;
+ status = ocfs_read_bh(osb,
+ next_group << osb->sb->s_blocksize_bits,
+ &group_bh,
+ OCFS_BH_CACHED,
+ alloc_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ groups_read++;
+ bg = (ocfs2_group_desc *) group_bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(bg));
+ }
+
+#define OCFS2_BG_RELINK_TRIGGER 1
+ /*
+ * Keep track of previous block descriptor read. When
+ * we find a target, if we have read more than X
+ * number of descriptors, and the target is reasonably
+ * empty, relink him to top of his chain.
+ *
+ * prev_bg->bg_next_group = bg->bg_next_group;
+ * bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
+ * fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
+ *
+ * We've read 0 extra blocks and only send one more to
+ * the transaction, yet the next guy to search has a
+ * much easier time.
+ */
+ if ((prev_group_bh)
+ && (groups_read > OCFS2_BG_RELINK_TRIGGER)
+ && (ocfs_block_group_reasonably_empty(bg))) {
+ status = ocfs_relink_block_group(handle, ac->ac_bh, group_bh,
+ prev_group_bh, chain);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ }
+
+ status = ocfs_block_group_find_clear_bits(osb,
+ bg,
+ bits_wanted,
+ bit_off,
+ &tmp_bits);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ *num_bits = tmp_bits;
+
+ OCFS_ASSERT(*num_bits);
+
+ /* we found some. set the info on dinode, chainlist and then
+ * the group */
+ status = ocfs_journal_access(handle,
+ ac->ac_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ fe->id1.bitmap1.i_used += *num_bits;
+ cl->cl_recs[chain].c_free -= *num_bits;
+
+ status = ocfs_journal_dirty(handle,
+ ac->ac_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_block_group_set_bits(handle,
+ bg,
+ group_bh,
+ *bit_off,
+ *num_bits);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ LOG_TRACE_ARGS("Allocated %u bits from suballocator %llu\n",
+ *num_bits, fe->i_blkno);
+
+ down (&(OCFS_I(alloc_inode)->ip_sem));
+ OCFS_I(alloc_inode)->u.ip_bitinfo.used_bits = fe->id1.bitmap1.i_used;
+ up (&(OCFS_I(alloc_inode)->ip_sem));
+ *bg_blkno = bg->bg_blkno;
+bail:
+ if (group_bh)
+ brelse(group_bh);
+ if (prev_group_bh)
+ brelse(prev_group_bh);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+int ocfs_claim_metadata(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 bits_wanted,
+ u16 *suballoc_bit_start,
+ unsigned int *num_bits,
+ u64 *blkno_start)
+{
+ int status;
+ u64 bg_blkno;
+
+ OCFS_ASSERT(ac);
+ OCFS_ASSERT(ac->ac_bits_wanted >= (ac->ac_bits_given + bits_wanted));
+ OCFS_ASSERT(ac->ac_which == OCFS_AC_USE_META);
+ OCFS_ASSERT(ac->ac_handle == handle);
+
+ status = ocfs_claim_suballoc_bits(osb,
+ handle,
+ ac,
+ bits_wanted,
+ suballoc_bit_start,
+ num_bits,
+ &bg_blkno);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
+ ac->ac_bits_given += (*num_bits);
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+int ocfs_claim_new_inode(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u16 *suballoc_bit,
+ u64 *fe_blkno)
+{
+ int status;
+ unsigned int num_bits;
+ u64 bg_blkno;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT(ac);
+ OCFS_ASSERT(ac->ac_bits_given == 0);
+ OCFS_ASSERT(ac->ac_bits_wanted == 1);
+ OCFS_ASSERT(ac->ac_which == OCFS_AC_USE_INODE);
+ OCFS_ASSERT(ac->ac_handle == handle);
+
+ status = ocfs_claim_suballoc_bits(osb,
+ handle,
+ ac,
+ 1,
+ suballoc_bit,
+ &num_bits,
+ &bg_blkno);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ OCFS_ASSERT(num_bits == 1);
+
+ *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
+ ac->ac_bits_given++;
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static inline u64 ocfs_which_suballoc_group(u64 block, unsigned int bit)
+{
+ return(block - (u64) bit);
+}
+
+/*
+ * expects the suballoc inode to already be locked.
+ */
+int ocfs_free_suballoc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *alloc_bh,
+ unsigned int start_bit,
+ u64 start_block,
+ unsigned int count)
+{
+ int status = 0;
+ ocfs2_dinode *fe = (ocfs2_dinode *) alloc_bh->b_data;
+ ocfs2_chain_list *cl = &fe->id2.i_chain;
+ u64 bg_blkno;
+ struct buffer_head *group_bh = NULL;
+ ocfs2_group_desc *group;
+ unsigned int tmp;
+
+ LOG_ENTRY();
+
+ OCFS_ASSERT_RO(IS_VALID_FILE_ENTRY(fe));
+ OCFS_ASSERT((count + start_bit)
+ <= ((u32) cl->cl_cpg * (u32) cl->cl_bpc));
+
+ bg_blkno = ocfs_which_suballoc_group(start_block, start_bit);
+ LOG_TRACE_ARGS("freeing %u bits from group %llu, starting at %u "
+ "(1st block = %llu, suballocator %llu)\n",
+ count, bg_blkno, start_bit, start_block,
+ OCFS_I(alloc_inode)->ip_blkno);
+
+ status = ocfs_read_bh(osb,
+ bg_blkno << osb->sb->s_blocksize_bits,
+ &group_bh,
+ OCFS_BH_CACHED,
+ alloc_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ group = (ocfs2_group_desc *) group_bh->b_data;
+ OCFS_ASSERT_RO(IS_VALID_GROUP_DESC(group));
+
+ status = ocfs_journal_access(handle, group_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ tmp = count;
+ while(tmp--)
+ clear_bit((start_bit + tmp), group->bg_bitmap);
+ group->bg_free_bits_count += count;
+
+ status = ocfs_journal_dirty(handle, group_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs_journal_access(handle, alloc_bh,
+ OCFS_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ cl->cl_recs[group->bg_chain].c_free += count;
+ fe->id1.bitmap1.i_used -= count;
+
+ status = ocfs_journal_dirty(handle, alloc_bh);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ down (&(OCFS_I(alloc_inode)->ip_sem));
+ OCFS_I(alloc_inode)->u.ip_bitinfo.used_bits = fe->id1.bitmap1.i_used;
+ up (&(OCFS_I(alloc_inode)->ip_sem));
+bail:
+ if (group_bh)
+ brelse(group_bh);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
+static inline void debug_bg(ocfs2_group_desc *bg)
+{
+ printk("Block Group:\n");
+ printk("bg_signature: %s\n", bg->bg_signature);
+ printk("bg_size: %u\n", bg->bg_size);
+ printk("bg_bits: %u\n", bg->bg_bits);
+ printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
+ printk("bg_chain: %u\n", bg->bg_chain);
+ printk("bg_generation: %u\n", bg->bg_generation);
+ printk("bg_next_group: %llu\n", bg->bg_next_group);
+ printk("bg_parent_dinode: %llu\n", bg->bg_parent_dinode);
+ printk("bg_blkno: %llu\n", bg->bg_blkno);
+ return;
+}
+
+static inline void debug_suballoc_inode(ocfs2_dinode *fe)
+{
+ int i;
+
+ printk("Suballoc Inode %llu:\n", fe->i_blkno);
+ printk("i_signature: %s\n", fe->i_signature);
+ printk("i_size: %llu\n", fe->i_size);
+ printk("i_clusters: %u\n", fe->i_clusters);
+ printk("i_generation: %u\n", fe->i_generation);
+ printk("id1.bitmap1.i_used: %u\n", fe->id1.bitmap1.i_used);
+ printk("id1.bitmap1.i_total: %u\n", fe->id1.bitmap1.i_total);
+ printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
+ printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
+ printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
+ printk("id2.i_chain.cl_next_free_rec: %u\n",
+ fe->id2.i_chain.cl_next_free_rec);
+ for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
+ printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, fe->id2.i_chain.cl_recs[i].c_free);
+ printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, fe->id2.i_chain.cl_recs[i].c_total);
+ printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, fe->id2.i_chain.cl_recs[i].c_blkno);
+ }
+ return;
+}
Added: trunk/src/suballoc.h
===================================================================
--- trunk/src/suballoc.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/suballoc.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -0,0 +1,58 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * suballoc.h
+ *
+ * Defines sub allocator api
+ *
+ * Copyright (C) 2003, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Mark Fasheh, Kurt Hackel, Joel Becker, Sunil Mushran,
+ * Manish Singh, Wim Coekaerts
+ */
+
+#ifndef _CHAINALLOC_H_
+#define _CHAINALLOC_H_
+
+int ocfs_reserve_new_metadata(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_dinode *fe,
+ ocfs2_alloc_context **ac);
+int ocfs_reserve_new_inode(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context **ac);
+int ocfs_claim_new_inode(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u16 *suballoc_bit,
+ u64 *fe_blkno);
+int ocfs_claim_metadata(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ ocfs2_alloc_context *ac,
+ u32 bits_wanted,
+ u16 *suballoc_bit_start,
+ u32 *num_bits,
+ u64 *blkno_start);
+int ocfs_free_suballoc_bits(ocfs_super *osb,
+ ocfs_journal_handle *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *alloc_bh,
+ unsigned int start_bit,
+ u64 start_block,
+ unsigned int count);
+#endif /* _CHAINALLOC_H_ */
Modified: trunk/src/super.c
===================================================================
--- trunk/src/super.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/super.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -56,6 +56,7 @@
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
+#include "localalloc.h"
#include "nm.h"
#include "proc.h"
#include "super.h"
Modified: trunk/src/symlink.c
===================================================================
--- trunk/src/symlink.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/symlink.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -43,6 +43,7 @@
#include "ocfs_log.h"
#include "ocfs.h"
+#include "alloc.h"
#include "file.h"
#include "inode.h"
#include "symlink.h"
Modified: trunk/src/sysfile.c
===================================================================
--- trunk/src/sysfile.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/sysfile.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -126,78 +126,3 @@
return inode;
}
-
-/*
- * ocfs_read_system_file()
- *
- * make sure that the bhs array is either all NULL'd out or you really
- * know what you're doing! Also, those bh's will have to be brelse'd
- * after you're done with them!
- *
- */
-int ocfs_read_system_file(ocfs_super *osb, int type, __u32 node,
- struct buffer_head *bhs[], __u64 Length)
-{
- int status = 0, i;
- __u64 ret = 0;
- struct inode *inode = NULL;
- __u32 blocks;
- __u64 off, contig_blocks, contig_bytes;
-
- LOG_ENTRY_ARGS ("type=%d, node=%u, Length = %llu\n", type, node, Length);
-
- OCFS_ASSERT(!(Length & (osb->sb->s_blocksize - 1)));
-
- inode = ocfs_get_system_file_inode(osb, type, node);
- if (!inode) {
- LOG_ERROR_STATUS(status=-EINVAL);
- goto leave;
- }
-
- off = 0;
- i = 0;
- blocks = Length >> osb->sb->s_blocksize_bits;
- while (blocks > 0) {
- status = ocfs_lookup_file_allocation(osb, off, &ret,
- (Length - off),
- &contig_bytes,
- inode, 1);
- contig_blocks =
- contig_bytes >> osb->sb->s_blocksize_bits;
- if (contig_bytes !=
- (contig_blocks << osb->sb->s_blocksize_bits)) {
- LOG_ERROR_ARGS("unaligned system file read! off=%llu, "
- "contig_bytes=%llu\n", off, contig_bytes);
- LOG_ERROR_STATUS(status=-EINVAL);
- goto leave;
- }
- if (contig_blocks > blocks) {
- contig_blocks = blocks;
- contig_bytes = blocks << osb->sb->s_blocksize_bits;
- }
- if (status == 0) {
- // found all remaining
- } else if (status == -EFAIL && contig_blocks > 0) {
- // found some
- } else {
- // failed
- LOG_ERROR_STATUS(status = -EIO);
- goto leave;
- }
- status = ocfs_read_bhs(osb, ret, contig_blocks << osb->sb->s_blocksize_bits, &(bhs[i]), OCFS_BH_CACHED, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- i += contig_blocks;
- blocks -= contig_blocks;
- off += contig_bytes;
- }
-
-leave:
- if (inode)
- iput(inode);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_read_system_file */
Modified: trunk/src/sysfile.h
===================================================================
--- trunk/src/sysfile.h 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/sysfile.h 2004-10-02 01:08:08 UTC (rev 1543)
@@ -30,7 +30,5 @@
#define OCFS2_SYSFILE_H
struct inode * ocfs_get_system_file_inode(ocfs_super *osb, int type, __u32 node);
-int ocfs_read_system_file (ocfs_super *osb, int type, __u32 node,
- struct buffer_head *bhs[], __u64 Length);
#endif /* OCFS2_SYSFILE_H */
Modified: trunk/src/vote.c
===================================================================
--- trunk/src/vote.c 2004-10-02 01:01:24 UTC (rev 1542)
+++ trunk/src/vote.c 2004-10-02 01:08:08 UTC (rev 1543)
@@ -885,7 +885,7 @@
#warning "should we even be erroring here at all!"
LOG_ERROR_ARGS("inode %llu, vote_status=%d, vote_state=%d, "
"lockid=%llu, flags = 0x%x, asked type = %u "
- "master = %d, state = 0x%lx, type = %u\n",
+ "master = %d, state = 0x%x, type = %u\n",
OCFS_I(inode)->ip_blkno, obj->vote_status,
obj->vote_state, lock_id, flags, lock_type,
GET_INODE_LOCKRES(inode)->master_node_num,
More information about the Ocfs2-commits
mailing list