[Ocfs2-devel] [PATCH] split inode.c
Christoph Hellwig
hch at lst.de
Sat Aug 14 01:27:16 CDT 2004
two new files:
- aops.c implementing ocfs_aops
- 24io.c implementing 2.4 specific direct I/O and AIO code
Index: src/Makefile
===================================================================
--- src/Makefile (revision 1355)
+++ src/Makefile (working copy)
@@ -54,7 +54,9 @@
endif
CFILES = \
+ 24io.c \
alloc.c \
+ aops.c \
bitmap.c \
buffer_head_io.c \
dcache.c \
Index: src/inode.c
===================================================================
--- src/inode.c (revision 1355)
+++ src/inode.c (working copy)
@@ -58,17 +58,8 @@
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INODE
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-# include <linux/iobuf.h>
-# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)
-# define free_kiovec_sz(nr, buf, bh) free_kiovec(nr, buf)
-# define alloc_kiovec_sz(nr, buf, bh) alloc_kiovec(nr, buf)
-# endif
-#endif /* for 2.6 - no more kiovec, kiobuf structures - vfs handles
- * this for us (direct i/o) */
-
-
extern struct semaphore recovery_list_sem;
+extern struct address_space_operations ocfs_aops;
typedef struct _ocfs_find_inode_args
{
@@ -78,47 +69,14 @@
}
ocfs_find_inode_args;
-static int ocfs_readpage (struct file *file, struct page *page);
-static int ocfs_prepare_write (struct file *file, struct page *page, unsigned from, unsigned to);
-static int ocfs_commit_write (struct file *file, struct page *page, unsigned from, unsigned to);
-static int ocfs_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create);
-static int ocfs_symlink_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create);
-
static int ocfs_read_locked_inode(struct inode *inode, ocfs_find_inode_args *args);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static sector_t ocfs_bmap(struct address_space *mapping, sector_t block);
-static int ocfs_writepage (struct page *page, struct writeback_control *wbc);
-static ssize_t ocfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs);
static int ocfs_init_locked_inode(struct inode * inode, void * opaque);
static int ocfs_find_actor (struct inode *inode, void *opaque);
#else /* 2.4 kernel */
static int ocfs_find_inode (struct inode *inode, unsigned long ino, void *opaque);
-static int ocfs_bmap(struct address_space *mapping, long block);
-static int ocfs_writepage (struct page *page);
-static int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock, int len);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined(SUSE)
-static int ocfs_direct_IO (int rw, struct file *filp, struct kiobuf *iobuf, unsigned long blocknr, int blocksize);
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
-static int ocfs_direct_IO (int rw, struct inode *inode, struct kiobuf *iobuf, unsigned long blocknr, int blocksize);
#endif
-#endif
-static struct address_space_operations ocfs_aops = {
- .readpage = ocfs_readpage,
- .writepage = ocfs_writepage,
- .prepare_write = ocfs_prepare_write,
- .bmap = ocfs_bmap,
- .commit_write = ocfs_commit_write,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
-/*
- * On a 2.4 system, we are only adding this here as a dummy basically,
- * just need open with O_DIRECT to succeed, we still call ocfs_rw_direct().
- * For a 2.6 system, this is the way a filesystem provides direct-io support.
- */
- .direct_IO = ocfs_direct_IO
-#endif
-};
-
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
/*
* ocfs_ilookup()
@@ -813,281 +771,7 @@
return;
} /* ocfs_clear_inode */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-inline void __mark_dirty(struct buffer_head *bh)
-{
- set_buffer_flushtime(bh);
- refile_buffer(bh);
-}
-
-static int __block_commit_write(struct inode *inode, struct page *page,
- unsigned from, unsigned to)
-{
- unsigned block_start, block_end;
- int partial = 0, need_balance_dirty = 0;
- unsigned blocksize;
- struct buffer_head *bh, *head;
-
- blocksize = 1 << inode->i_blkbits;
-
- for(bh = head = page->buffers, block_start = 0;
- bh != head || !block_start;
- block_start=block_end, bh = bh->b_this_page) {
- block_end = block_start + blocksize;
- if (block_end <= from || block_start >= to) {
- if (!buffer_uptodate(bh))
- partial = 1;
- } else {
- set_bit(BH_Uptodate, &bh->b_state);
- if (!atomic_set_buffer_dirty(bh)) {
- __mark_dirty(bh);
- buffer_insert_inode_data_queue(bh, inode);
- need_balance_dirty = 1;
- }
- }
- }
-
- if (need_balance_dirty)
- balance_dirty();
- /*
- * is this a partial write that happened to make all buffers
- * uptodate then we can optimize away a bogus readpage() for
- * the next read(). Here we 'discover' wether the page went
- * uptodate as a result of this (potentially partial) write.
- */
- if (!partial)
- SetPageUptodate(page);
- return 0;
-}
-
-static int ocfs2_cont_prepare_write(struct page *page, unsigned offset, unsigned to, get_block_t *get_block, loff_t *bytes)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- struct page *new_page;
- unsigned long pgpos;
- long status;
- unsigned zerofrom;
- unsigned blocksize = 1 << inode->i_blkbits;
- char *kaddr;
-
- while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
- status = -ENOMEM;
- new_page = grab_cache_page(mapping, pgpos);
- if (!new_page)
- goto out;
- /* we might sleep */
- if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
- unlock_page(new_page);
- page_cache_release(new_page);
- continue;
- }
- zerofrom = *bytes & ~PAGE_CACHE_MASK;
- if (zerofrom & (blocksize-1)) {
- *bytes |= (blocksize-1);
- (*bytes)++;
- }
- status = block_prepare_write(new_page, zerofrom,
- PAGE_CACHE_SIZE, get_block);
- if (status)
- goto out_unmap;
- kaddr = page_address(new_page);
- memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
- flush_dcache_page(new_page);
- __block_commit_write(inode, new_page, zerofrom, PAGE_CACHE_SIZE);
- kunmap(new_page);
- unlock_page(new_page);
- page_cache_release(new_page);
- }
-
- if (page->index < pgpos) {
- /* completely inside the area */
- zerofrom = offset;
- } else {
- /* page covers the boundary, find the boundary offset */
- zerofrom = *bytes & ~PAGE_CACHE_MASK;
-
- /* if we will expand the thing last block will be filled */
- if (to > zerofrom && (zerofrom & (blocksize-1))) {
- *bytes |= (blocksize-1);
- (*bytes)++;
- }
-
- /* starting below the boundary? Nothing to zero out */
- if (offset <= zerofrom)
- zerofrom = offset;
- }
- status = block_prepare_write(page, zerofrom, to, get_block);
- if (status)
- goto out1;
- kaddr = page_address(page);
- if (zerofrom < offset) {
- memset(kaddr+zerofrom, 0, offset-zerofrom);
- flush_dcache_page(page);
- __block_commit_write(inode, page, zerofrom, offset);
- }
- return 0;
-out1:
- ClearPageUptodate(page);
- kunmap(page);
- return status;
-
-out_unmap:
- ClearPageUptodate(new_page);
- kunmap(new_page);
- UnlockPage(new_page);
- page_cache_release(new_page);
-out:
- return status;
-}
-
-/* Mark's favorite hack */
-#undef cont_prepare_write
-#define cont_prepare_write ocfs2_cont_prepare_write
-#endif /* < 2.6.0 */
-
/*
- * ocfs_prepare_write()
- *
- */
-static int ocfs_prepare_write (struct file *file, struct page *page, unsigned from, unsigned to)
-{
- int ret;
- struct inode *inode = page->mapping->host;
-
- LOG_SET_CONTEXT(PREPARE_WRITE);
-
- LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
-
- if (!inode)
- BUG();
-
- ret = cont_prepare_write(page, from, to, ocfs_get_block,
- &(OCFS_I(page->mapping->host)->ip_mmu_private));
-
- LOG_EXIT_INT (ret);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-} /* ocfs_prepare_write */
-
-/*
- * ocfs_commit_write()
- *
- */
-static int ocfs_commit_write (struct file *file, struct page *page, unsigned from, unsigned to)
-{
- int ret;
-
- LOG_SET_CONTEXT(COMMIT_WRITE);
-
- LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
-
- ret = generic_commit_write (file, page, from, to);
-
- LOG_EXIT_INT (ret);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-} /* ocfs_commit_write */
-
-/*
- * ocfs_symlink_get_block()
- *
- */
-static int ocfs_symlink_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
-{
- int err = -EIO;
- int status;
- ocfs2_dinode *fe = NULL;
- struct buffer_head *bh = NULL;
- struct buffer_head *buffer_cache_bh = NULL;
- ocfs_super *osb = OCFS_SB(inode->i_sb);
- void *kaddr;
-
- LOG_ENTRY_ARGS ("(0x%p, %llu, 0x%p, %d)\n", inode,
- (unsigned long long)iblock, bh_result, create);
-
- if (!inode) {
- LOG_ERROR_STR ("bad inode");
- goto bail;
- }
-
- if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
- LOG_ERROR_ARGS ("block offset > PATH_MAX: %llu",
- (unsigned long long)iblock);
- goto bail;
- }
-
- status = ocfs_read_bh(OCFS_SB(inode->i_sb),
- OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits,
- &bh,
- OCFS_BH_CACHED, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto bail;
- }
- fe = (ocfs2_dinode *) bh->b_data;
-
- if (!IS_VALID_FILE_ENTRY(fe)) {
- LOG_ERROR_ARGS("Invalid fe at blkno %llu",
- OCFS_I(inode)->ip_blkno);
- goto bail;
- }
-
- if ((u64)iblock >= ocfs_clusters_to_blocks(inode->i_sb,
- fe->i_clusters)) {
- LOG_ERROR_ARGS ("block offset is outside the allocated size: %llu",
- (unsigned long long)iblock);
- goto bail;
- }
-
- /* We don't use the page cache to create symlink data, so if
- * need be, copy it over from the buffer cache. */
- if (!buffer_uptodate(bh_result) && !ocfs_inode_is_new(osb, inode)) {
- buffer_cache_bh = sb_getblk(osb->sb,
- fe->id2.i_list.l_recs[0].e_blkno + iblock);
- if (!buffer_cache_bh) {
- LOG_ERROR_STR("couldn't getblock for symlink!");
- goto bail;
- }
-
- /* we haven't locked out transactions, so a commit
- * could've happened. Since we've got a reference on
- * the bh, even if it commits while we're doing the
- * copy, the data is still good. */
- if (buffer_jbd(buffer_cache_bh)
- && !ocfs_inode_is_new(osb, inode)) {
- kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
- if (!kaddr) {
- LOG_ERROR_ARGS("couldn't kmap!\n");
- goto bail;
- }
- memcpy(kaddr + (bh_result->b_size * iblock),
- buffer_cache_bh->b_data,
- bh_result->b_size);
- kunmap_atomic(kaddr, KM_USER0);
- set_buffer_uptodate(bh_result);
- }
- brelse(buffer_cache_bh);
- }
-
- map_bh(bh_result, inode->i_sb,
- fe->id2.i_list.l_recs[0].e_blkno + iblock);
-
- err = 0;
-
-bail:
- if (bh)
- brelse(bh);
-
- LOG_EXIT_INT (err);
- return err;
-} /* ocfs_symlink_get_block */
-
-
-/*
* TODO: this should probably be merged into ocfs_get_block
*
* However, you now need to pay attention to the cont_prepare_write()
@@ -1194,834 +878,6 @@
}
/*
- * ocfs_get_block()
- *
- */
-static int ocfs_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
-{
- int err = -EIO;
- __s64 vbo = 0;
- __s64 lbo = 0;
- __u32 len;
- int open_direct;
-
- LOG_ENTRY_ARGS ("(0x%p, %llu, 0x%p, %d)\n", inode,
- (unsigned long long)iblock, bh_result, create);
-
- if (!inode) {
- LOG_ERROR_STR ("bad inode");
- goto bail;
- }
-
- if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE) {
- printk("get_block on system inode 0x%p (%lu)\n",
- inode, inode->i_ino);
- }
-
- open_direct = OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO;
-
- if (S_ISLNK (inode->i_mode)) {
- /* this always does I/O for some reason. */
- down_read(&OCFS_I(inode)->ip_io_sem);
- err = ocfs_symlink_get_block (inode, iblock, bh_result,
- create);
- up_read(&OCFS_I(inode)->ip_io_sem);
- goto bail;
- }
-
- vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
-
-#if 0
- if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->ip_alloc_size) {
- int vbo_pad;
-
- vbo_pad = inode->i_sb->s_blocksize;
- vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
-
- LOG_TRACE_STR("Extending allocation");
- LOG_ERROR_ARGS("extending inode %lu in get_block!!\n",
- inode->i_ino);
- down_write(&OCFS_I(inode)->ip_io_sem);
- err = ocfs_extend_file(osb, vbo + vbo_pad,
- NULL, inode, NULL, 0, NULL);
- up_write(&OCFS_I(inode)->ip_io_sem);
- if (err < 0) {
- err = -ENOSPC;
- LOG_ERROR_STATUS (err);
- goto bail;
- }
- }
-#else
- if (vbo >= OCFS_I(inode)->ip_alloc_size) {
- err = -EIO;
- LOG_ERROR_ARGS("Trying to extend in ocfs_get_block() (inode %llu, blkno %llu, vbo %llu, alloc %llu)\n", OCFS_I(inode)->ip_blkno, (u64)iblock, (u64)vbo, OCFS_I(inode)->ip_alloc_size);
- goto bail;
- }
-#endif
-
- len = inode->i_sb->s_blocksize;
- if (!open_direct)
- down_read(&OCFS_I(inode)->ip_extend_sem);
- err = ocfs_lookup_file_allocation(OCFS2_SB(inode->i_sb),
- vbo, &lbo, len, NULL,
- inode, open_direct);
- if (!open_direct)
- up_read(&OCFS_I(inode)->ip_extend_sem);
- if (err < 0) {
- LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo, lbo, len);
- goto bail;
- }
-
- map_bh(bh_result, inode->i_sb, lbo >> inode->i_sb->s_blocksize_bits);
-
- err = 0;
-
- if (bh_result->b_blocknr == 0) {
- err = -EIO;
- LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u, blkno=(%llu)\n",
- vbo, lbo, len,
- OCFS_I(inode)->ip_blkno);
- }
-
- if (vbo < OCFS_I(inode)->ip_mmu_private)
- goto bail;
- if (!create)
- goto bail;
- if (vbo != OCFS_I(inode)->ip_mmu_private) {
- LOG_ERROR_ARGS("Uh-oh, vbo = %lld, i_size = %llu, mmu = %llu, "
- "inode = %llu\n",
- vbo, inode->i_size,
- OCFS_I(inode)->ip_mmu_private,
- OCFS_I(inode)->ip_blkno);
- BUG();
- err = -EIO;
- goto bail;
- }
-
- bh_result->b_state |= (1UL << BH_New);
- OCFS_I(inode)->ip_mmu_private += inode->i_sb->s_blocksize;
-
-bail:
- if (err < 0)
- err = -EIO;
-
- LOG_EXIT_INT (err);
- return err;
-} /* ocfs_get_block */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static sector_t ocfs_bmap(struct address_space *mapping, sector_t block)
-#else
-static int ocfs_bmap(struct address_space *mapping, long block)
-#endif
-{
- int disk_block = 0;
- ocfs_super *osb = OCFS_SB(mapping->host->i_sb);
- __s64 vbo = 0;
- __s64 lbo = 0;
- __u32 len;
- int err = 0, status;
- struct inode *inode = mapping->host;
-
- LOG_SET_CONTEXT(BMAP);
-
- LOG_ENTRY_ARGS("(block = %llu)\n", (unsigned long long)block);
-
- if (!inode) {
- LOG_ERROR_STR ("bmap: bad inode");
- err = -EINVAL;
- LOG_ERROR_STATUS(err);
- goto bail;
- }
-
- if (!INODE_JOURNAL(inode)) {
- LOG_ERROR_STR("bmap is only for journal inodes!");
- err = -EINVAL;
- LOG_ERROR_STATUS(err);
- goto bail;
- }
-
- vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
- len = osb->sb->s_blocksize;
- err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
- inode, 1);
- if (err < 0) {
- LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo,
- lbo, len);
- LOG_ERROR_STATUS(err);
- goto bail;
- }
-
- disk_block = lbo >> inode->i_sb->s_blocksize_bits;
-
-bail:
- status = err ? err : disk_block;
- LOG_EXIT_STATUS(status);
-
- LOG_CLEAR_CONTEXT();
- return(status);
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-/*
- * ocfs_get_block2()
- *
- */
-static int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock, int len)
-{
- int err = -EIO;
- ocfs_super *osb;
- __s64 vbo = 0;
- __s64 lbo = 0;
-
- LOG_ENTRY_ARGS ("(0x%p, %ld)\n", inode, iblock);
-
- if (!inode) {
- LOG_ERROR_STR ("bad inode");
- err = -1;
- goto bail;
- }
-
- osb = OCFS_SB(inode->i_sb);
-
- vbo = (__s64) iblock << osb->s_sectsize_bits;
- err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
- inode, 1);
- if (err < 0) {
- LOG_ERROR_STATUS (err);
- err = -1;
- goto bail;
- }
-
- err = 0;
-
- *oblock = lbo >> osb->s_sectsize_bits;
- if (*oblock == 0) {
- err = -EIO;
- LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u, blkno=(%llu)\n",
- vbo, lbo, len,
- OCFS_I(inode)->ip_blkno);
- }
-
-bail:
- if (err < 0)
- err = -EIO;
- LOG_EXIT_INT (err);
- return err;
-} /* ocfs_get_block2 */
-#endif
-
-/*
- * ocfs_readpage()
- *
- */
-static int ocfs_readpage (struct file *file, struct page *page)
-{
- int ret;
-
- LOG_SET_CONTEXT(READPAGE);
-
- LOG_ENTRY_ARGS ("(0x%p, %lu)\n", file, (page ? page->index : 0));
-
- ret = block_read_full_page (page, ocfs_get_block);
- if (ret < 0)
- goto bail;
-
-bail:
- LOG_EXIT_INT (ret);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-} /* ocfs_readpage */
-
-/*
- * ocfs_writepage()
- *
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static int ocfs_writepage (struct page *page, struct writeback_control *wbc)
-{
- int ret;
-
- LOG_SET_CONTEXT(WRITEPAGE);
-
- LOG_ENTRY_ARGS ("(0x%p)\n", page);
-
- ret = block_write_full_page (page, ocfs_get_block, wbc);
-
- LOG_EXIT_INT (ret);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-} /* ocfs_writepage */
-#else
-static int ocfs_writepage (struct page *page)
-{
- int ret;
-
- LOG_SET_CONTEXT(WRITEPAGE);
-
- LOG_ENTRY_ARGS ("(0x%p)\n", page);
-
- ret = block_write_full_page (page, ocfs_get_block);
-
- LOG_EXIT_INT (ret);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-} /* ocfs_writepage */
-#endif
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/*
- * TODO: Make this into a generic get_blocks function.
- *
- * From do_direct_io in direct-io.c:
- * "So what we do is to permit the ->get_blocks function to populate
- * bh.b_size with the size of IO which is permitted at this offset and
- * this i_blkbits."
- *
- * This function is called directly from get_more_blocks in direct-io.c.
- *
- * We should probably have this data in the oin for the inode.
- * Otherwise, we might want to look at ocfs_rw_direct,
- * ocfs_lookup_file_allocation and ocfs_get_block
- *
- * called like this: dio->get_blocks(dio->inode, fs_startblk,
- * fs_count, map_bh, dio->rw == WRITE);
- */
-static int ocfs_direct_IO_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create)
-{
- int ret = -1;
- int status;
- ocfs_super *osb = NULL;
- __s64 vbo; /* file offset */
- __s64 lbo; /* logical (disk) offset */
- __s64 vbo_max; /* file offset, max_blocks from iblock */
- int set_new = 0; /* flag */
- __u64 new_size; /* In bytes, the size of the contiguous block */
- unsigned char blocksize_bits;
-
- if (!inode || !bh_result) {
- LOG_ERROR_STR("ocfs_direct_IO_get_blocks: inode or bh_result is null");
- return -EIO;
- }
-
- osb = inode->i_sb->s_fs_info;
- blocksize_bits = inode->i_sb->s_blocksize_bits;
- /* make sure we're up to date... */
- if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
- LOG_TRACE_STR ("ocfs_direct_IO_get_blocks: verify oin.");
- status = ocfs_verify_update_inode (osb, inode, 0);
- if (status < 0) {
- LOG_TRACE_STR ("ocfs_verify_update_inode failed");
- ret = -EIO;
- goto bail;
- }
- }
-
- /* This function won't even be called if the request isn't all
- * nicely aligned and of the right size, so there's no need
- * for us to check any of that. */
-
- vbo = (__s64) iblock << blocksize_bits;
- vbo_max = vbo + ((__s64) max_blocks << blocksize_bits);
-
- /* NOTE: create flag is set when we ?may? have to allocate some
- blocks for the file. */
- if (create && vbo_max > OCFS_I(inode)->ip_alloc_size) {
- /* WARNING: How much do we really want to extend the file? */
- status = ocfs_extend_file(osb, vbo_max,
- NULL, inode, NULL, 0, NULL);
- if (status < 0) {
- status = -ENOSPC;
- LOG_ERROR_STR("ocfs_direct_IO_get_blocks: failed to extend the file!");
- goto bail;
- }
- set_new = 1;
- }
-
- /* This figure out the size of the next contiguous block, and
- * our logical offset */
- /* TODO: Try our damndest to give sizes in multiples of PAGE_SIZE */
- status = ocfs_lookup_file_allocation(osb, vbo, &lbo, max_blocks << blocksize_bits,
- &new_size, inode, 1);
-
- /* Do whatever we need to the buffer_head */
- if (set_new) {
- set_buffer_new(bh_result);
- /* Do we really want to set bh_result->b_blocknr here too? */
- bh_result->b_blocknr = lbo >> blocksize_bits;
- } else {
- clear_buffer_new(bh_result);
- /* is the last argument here correct? */
- map_bh(bh_result, inode->i_sb, lbo >> blocksize_bits);
- }
-
- /* make sure we don't map more than max_blocks blocks here as
- that's all the kernel will handle at this point. */
- if (new_size > (__u64)max_blocks << blocksize_bits)
- new_size = (__u64)max_blocks << blocksize_bits;
- bh_result->b_size = new_size;
-
- ret = 0;
-bail:
- return ret;
-}
-
-/*
- * ocfs_direct_IO()
- * used to be:
- * static int ocfs_direct_IO (int rw,
- * struct inode *inode,
- * struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
- *
- * now:
- static int ocfs_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
- * int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
- * loff_t offset, unsigned long nr_segs);
- */
-static ssize_t ocfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
- int ret;
-
- LOG_SET_CONTEXT(DIRECT_IO);
-
- LOG_ENTRY ();
-
- /* blockdev_direct_IO checks alignment for us, using */
- ret = blockdev_direct_IO (rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ocfs_direct_IO_get_blocks, NULL);
-
- LOG_EXIT_INT (ret);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-} /* ocfs_direct_IO */
-
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
-/*
- * ocfs_direct_IO()
- *
- * we are not using this function anymore, in fact
- * we should never get here any more
- * so let's just BUG(), hint from sct at redhat.com
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined(SUSE)
-static int ocfs_direct_IO (int rw, struct file *filp, struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
-{
- BUG();
- return 0;
-} /* ocfs_direct_IO */
-#else
-static int ocfs_direct_IO (int rw, struct inode *inode, struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
-{
- BUG();
- return 0;
-} /* ocfs_direct_IO */
-#endif
-#endif /* version >= 2.4.10 */
-
-#if defined(SUSE) && LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)
-#define OCFS_KIO_BLOCKS(_iobuf) ((_iobuf)->kio_blocks)
-#else
-#define OCFS_KIO_BLOCKS(_iobuf) ((_iobuf)->blocks)
-#endif
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,10)
-#define KERNEL_NO_F_IOBUF 1
-#elif defined(SUSE) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20)
-#define KERNEL_NO_F_IOBUF 1
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-/*
- * ocfs_rw_direct()
- *
- */
-ssize_t ocfs_rw_direct (int rw, struct file *filp, char *buf, size_t size, loff_t * offp)
-{
-#ifdef KERNEL_NO_F_IOBUF
- struct kiobuf *iobuf;
-#else
- struct kiobuf *iobuf = filp->f_iobuf;
- int new_iobuf = 0;
-#endif
- int err = 0;
- unsigned long blocknr, blocks, myiosize;
- size_t transferred;
- int iosize, clustersize;
- int i;
- struct inode *inode = filp->f_dentry->d_inode;
- int max_sectors;
- int nbhs;
- int sector_size, sector_bits, sector_mask, sectors_per_page;
- int ret = 0;
- int large_io = 0;
- int inuse = 0;
- unsigned long blocks_end_cluster = 0;
- loff_t saved_off;
- size_t saved_size;
- unsigned long firstlogic;
- long firstphys;
- long nextphys;
- unsigned long nextlogic = 0;
- unsigned long totalioblocks = 0;
-
- saved_off = *offp;
- saved_size = size;
-
- /* FIXME: Need to differentiate between sectors and blocksize */
- sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
- sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
- sector_mask = sector_size - 1;
- sectors_per_page = PAGE_SIZE / sector_size;
- /* max sectors is 1024 in 2.4.9
- * max data is 512kb
- */
-
- err = -EINVAL;
- if (size == 0) {
- printk("direct write of 0 byte\n");
- return 0;
- }
-
- if (rw == READ) {
- if (inode->i_size <= *offp) /* read past end of file */
- return 0;
- if (size > (inode->i_size - *offp))
- size = inode->i_size - *offp;
- }
-
- /* make sure aligned to either PAGE_SIZE or sect_size IO */
-#ifndef LARGEIOS
- if ((*offp & sector_mask) || (size & sector_mask))
- /* if not, then fail, we need either to do dio */
- return err;
-
- max_sectors = KIO_MAX_SECTORS;
- large_io = 0;
-#endif
-#ifdef LARGEIOS
- if ((*offp & ~PAGE_MASK) || (size & ~PAGE_MASK)) {
- /* if it's not PAGE_SIZE, then sect_size */
- if ((*offp & sector_mask) || (size & sector_mask))
- /* if not, then fail, we need either to do dio */
- return err;
- max_sectors = KIO_MAX_SECTORS; /* for 2.4.9 - 1024 */
- } /* ok we 're PAGE_SIZE aligned, lets see if the buffer is */
- else {
- if (!((unsigned long) buf & ~PAGE_MASK)) {
- /* yippie we are .. we can do PAGE_SIZE size io's */
- large_io = 1;
- /* for 2.4.9 */
- max_sectors = KIO_MAX_SECTORS / sectors_per_page;
- } else {
- max_sectors = KIO_MAX_SECTORS;
- large_io = 0;
- }
-
- }
-#endif
- /* find out how far we are to the end of our cluster */
-
- err = 0;
- if (size)
- err = -ENXIO;
-
- /* Split the IO into KIO_MAX_SECTORS chunks, mapping and */
- /* unmapping the single kiobuf as we go to perform each chunk of IO. */
-
- transferred = 0;
- blocknr = *offp >> sector_bits;
- clustersize = inode->i_blksize >> sector_bits;
- myiosize = size >> sector_bits;
- blocks_end_cluster = clustersize - (blocknr % clustersize);
- firstlogic = blocknr;
- totalioblocks = 0;
-
- ret = ocfs_get_block2 (inode, blocknr, &firstphys, sector_size);
- if (ret == -1) {
- err = 0;
- goto out;
- }
- while (myiosize > 0) {
- if (blocks_end_cluster + 1 > myiosize) {
- totalioblocks += myiosize;
- myiosize = 0;
- goto doio;
- } else {
- totalioblocks += blocks_end_cluster;
- myiosize -= blocks_end_cluster;
- nextlogic = firstlogic + blocks_end_cluster;
- }
-again:
- ret = ocfs_get_block2 (inode, nextlogic, &nextphys, sector_size);
- if (ret == -1) {
- err = 0;
- goto out;
- }
- if (nextphys == (firstphys + totalioblocks)) {
- // merge ok
- blocks_end_cluster = clustersize - (nextlogic % clustersize);
- if (blocks_end_cluster + 1 > myiosize) {
- totalioblocks += myiosize;
- myiosize = 0;
- } else {
- totalioblocks += blocks_end_cluster;
- myiosize -= blocks_end_cluster;
- nextlogic = nextlogic + blocks_end_cluster;
- goto again;
- }
- }
-doio:
- size = totalioblocks << sector_bits;
- if (large_io)
- nbhs = (size >> PAGE_SHIFT);
- else
- nbhs = (size >> sector_bits);
- if (nbhs > max_sectors)
- nbhs = max_sectors;
-
-#ifdef KERNEL_NO_F_IOBUF
- err = alloc_kiovec_sz (1, &iobuf, &nbhs);
- if (err)
- goto out;
-#else
- if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
- /*
- * A parallel read/write is using the preallocated iobuf
- * so just run slow and allocate a new one.
- */
- err = alloc_kiovec_sz (1, &iobuf, &nbhs);
- if (err)
- goto out;
- new_iobuf = 1;
- } else
- new_iobuf = 0;
-
-#endif
- inuse = 1;
-
- totalioblocks = 0;
- while (size > 0) {
- if (large_io) {
- blocks = size >> PAGE_SHIFT;
- if (blocks > max_sectors)
- blocks = max_sectors;
- iosize = blocks << PAGE_SHIFT;
- } else {
- blocks = size >> sector_bits;
- if (blocks > max_sectors)
- blocks = max_sectors;
- iosize = blocks << sector_bits;
- }
- if (!blocks)
- break;
- err = map_user_kiobuf (rw, iobuf, (unsigned long) buf, iosize);
- if (err)
- break;
- /* get the blocknr depending on io size for all blocks */
- /* since we are awlays within the extent we only need to get the first block */
- OCFS_KIO_BLOCKS(iobuf)[0] = firstphys + totalioblocks;
-
- if (large_io) {
- blocknr += sectors_per_page;
- OCFS_KIO_BLOCKS(iobuf)[0] = OCFS_KIO_BLOCKS(iobuf)[0] / sectors_per_page;
- } else {
- blocknr++;
- }
-
- for (i = 1; i < blocks; i++) {
- if (large_io) {
- blocknr += sectors_per_page;
- } else {
- blocknr++;
- }
- OCFS_KIO_BLOCKS(iobuf)[i] = OCFS_KIO_BLOCKS(iobuf)[0] + i;
- }
- err = brw_kiovec (rw, 1, &iobuf, inode->i_dev, OCFS_KIO_BLOCKS(iobuf),
- large_io ? PAGE_SIZE : sector_size);
-#ifdef SUSE
- if (rw == READ && err > 0)
- mark_dirty_kiobuf(iobuf, err);
-#endif
- if (err >= 0) {
- transferred += err;
- size -= err;
- buf += err;
- if (large_io) {
- totalioblocks +=
- (blocks * sectors_per_page);
- } else {
- totalioblocks += blocks;
- }
- } else {
- printk( "ocfs_rw_direct : brw_kiovec() %d\n", err);
- break;
- }
- unmap_kiobuf (iobuf);
- if (err != iosize)
- break;
- }
-#ifdef KERNEL_NO_F_IOBUF
- free_kiovec_sz(1, &iobuf, &nbhs);
-#else
- if (!new_iobuf)
- clear_bit(0, &filp->f_iobuf_lock);
- else
- free_kiovec_sz(1, &iobuf, &nbhs);
-#endif
- inuse = 0;
- totalioblocks = 0;
- firstlogic = nextlogic;
- firstphys = nextphys;
- }
- if (transferred) {
- *offp += transferred;
- err = transferred;
- }
-
-out:
-#ifdef KERNEL_NO_F_IOBUF
- if (inuse)
- free_kiovec_sz (1, &iobuf, &nbhs);
-#else
- if (inuse) {
- if (!new_iobuf)
- clear_bit(0, &filp->f_iobuf_lock);
- else
- free_kiovec_sz(1, &iobuf, &nbhs);
- }
-#endif
- return err;
-} /* ocfs_rw_direct */
-#endif /* 2.4.x kernel */
-
-#ifdef AIO_ENABLED
-static int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb,
- size_t size, loff_t pos)
-{
- int err = 0;
- int max_sectors = 25000;
- struct inode *inode = filp->f_dentry->d_inode;
- unsigned long blocknr, blocks, iosize,myiosize;
- long firstphys;
- int clustersize;
- unsigned long blocks_end_cluster = 0;
-
- /* FIXME: Need to differentiate betwen sectors and blocksize */
- int sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
- int sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
- int sector_mask = sector_size - 1;
-
- int ret;
- unsigned long firstlogic;
- long nextphys;
- unsigned long nextlogic = 0;
- unsigned long totalioblocks = 0;
-
- if (!size || (pos == inode->i_size)) {
- cb.fn(cb.data, cb.vec, err);
- return err;
- }
-
- err = -ENXIO;
- if (pos >= inode->i_size) {
- return err;
- }
-
- err = -EINVAL;
- if ((pos < 0) || (pos & sector_mask) || (size & sector_mask)) {
- return err;
- }
-
- blocknr = pos >> sector_bits;
-
- blocks = size >> sector_bits;;
- if (blocks > max_sectors)
- blocks = max_sectors;
- if (!blocks) {
- err = -ENXIO;
- return err;;
- }
-
- iosize = blocks << sector_bits;
- clustersize = inode->i_blksize >> sector_bits;
- blocks_end_cluster = clustersize - (blocknr % clustersize);
- myiosize = size >> sector_bits;
- firstlogic = blocknr;
- totalioblocks = 0;
-
- err = ocfs_get_block2(inode, blocknr, &firstphys, sector_size);
- if ( err == -1 ) {
- err = 0;
- return err;
- }
- if (blocks_end_cluster + 1 > myiosize) {
- totalioblocks += myiosize;
- myiosize = 0;
- goto doio;
- } else {
- totalioblocks += blocks_end_cluster;
- myiosize -= blocks_end_cluster;
- nextlogic = firstlogic + blocks_end_cluster;
- }
-again:
- ret = ocfs_get_block2 (inode, nextlogic, &nextphys, sector_size);
- if (ret == -1) {
- err = 0;
- return err;
- }
- if (nextphys == (firstphys + totalioblocks)) {
- blocks_end_cluster = clustersize - (nextlogic % clustersize);
- if (blocks_end_cluster + 1 > myiosize) {
- totalioblocks += myiosize;
- myiosize = 0;
- } else {
- totalioblocks += blocks_end_cluster;
- myiosize -= blocks_end_cluster;
- nextlogic = nextlogic + blocks_end_cluster;
- goto again;
- }
- }
-doio:
- blocks = totalioblocks;
- err = brw_kvec_async(rw, cb, inode->i_dev, blocks, firstphys, sector_bits);
- return err;
-
-}
-
-int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
-{
- int ret;
-
- LOG_SET_CONTEXT(KVEC_READ);
-
- ret = ocfs_kvec_rw(file, READ, cb, size, pos);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-}
-
-int ocfs_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
-{
- int ret;
-
- LOG_SET_CONTEXT(KVEC_WRITE);
-
- ret = ocfs_kvec_rw(file, WRITE, cb, size, pos);
-
- LOG_CLEAR_CONTEXT();
- return ret;
-}
-#endif
-
-/*
* ocfs_inode_revalidate()
*
* In 2.4, this is called only from stat.c always without i_sem before
Index: src/aops.c
===================================================================
--- src/aops.c (revision 0)
+++ src/aops.c (revision 0)
@@ -0,0 +1,677 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * Copyright (C) 2002, 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Authors: Kurt Hackel, Mark Fasheh, Sunil Mushran, Wim Coekaerts,
+ * Manish Singh, Neeraj Goyal, Suchit Kaura
+ */
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <asm/byteorder.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "buffer_head_io.h"
+#include "file.h"
+#include "inode.h"
+#include "ocfs_journal.h"
+
+
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INODE
+
+static int ocfs_symlink_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ int err = -EIO;
+ int status;
+ ocfs2_dinode *fe = NULL;
+ struct buffer_head *bh = NULL;
+ struct buffer_head *buffer_cache_bh = NULL;
+ ocfs_super *osb = OCFS_SB(inode->i_sb);
+ void *kaddr;
+
+ LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, %d)\n", inode,
+ (unsigned long long)iblock, bh_result, create);
+
+ if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
+ LOG_ERROR_ARGS ("block offset > PATH_MAX: %llu",
+ (unsigned long long)iblock);
+ goto bail;
+ }
+
+ status = ocfs_read_bh(OCFS_SB(inode->i_sb),
+ OCFS_I(inode)->ip_blkno <<
+ inode->i_sb->s_blocksize_bits,
+ &bh,
+ OCFS_BH_CACHED, inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto bail;
+ }
+ fe = (ocfs2_dinode *) bh->b_data;
+
+ if (!IS_VALID_FILE_ENTRY(fe)) {
+ LOG_ERROR_ARGS("Invalid fe at blkno %llu",
+ OCFS_I(inode)->ip_blkno);
+ goto bail;
+ }
+
+ if ((u64)iblock >= ocfs_clusters_to_blocks(inode->i_sb,
+ fe->i_clusters)) {
+ LOG_ERROR_ARGS ("block offset is outside the allocated size: %llu",
+ (unsigned long long)iblock);
+ goto bail;
+ }
+
+ /* We don't use the page cache to create symlink data, so if
+ * need be, copy it over from the buffer cache. */
+ if (!buffer_uptodate(bh_result) && !ocfs_inode_is_new(osb, inode)) {
+ buffer_cache_bh = sb_getblk(osb->sb,
+ fe->id2.i_list.l_recs[0].e_blkno + iblock);
+ if (!buffer_cache_bh) {
+ LOG_ERROR_STR("couldn't getblock for symlink!");
+ goto bail;
+ }
+
+ /* we haven't locked out transactions, so a commit
+ * could've happened. Since we've got a reference on
+ * the bh, even if it commits while we're doing the
+ * copy, the data is still good. */
+ if (buffer_jbd(buffer_cache_bh)
+ && !ocfs_inode_is_new(osb, inode)) {
+ kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
+ if (!kaddr) {
+ LOG_ERROR_ARGS("couldn't kmap!\n");
+ goto bail;
+ }
+ memcpy(kaddr + (bh_result->b_size * iblock),
+ buffer_cache_bh->b_data,
+ bh_result->b_size);
+ kunmap_atomic(kaddr, KM_USER0);
+ set_buffer_uptodate(bh_result);
+ }
+ brelse(buffer_cache_bh);
+ }
+
+ map_bh(bh_result, inode->i_sb,
+ fe->id2.i_list.l_recs[0].e_blkno + iblock);
+
+ err = 0;
+
+bail:
+ if (bh)
+ brelse(bh);
+
+ LOG_EXIT_INT (err);
+ return err;
+}
+
+static int ocfs_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ int err = -EIO;
+ __s64 vbo = 0;
+ __s64 lbo = 0;
+ __u32 len;
+ int open_direct;
+
+ LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, %d)\n", inode,
+ (unsigned long long)iblock, bh_result, create);
+
+ if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE) {
+ printk("get_block on system inode 0x%p (%lu)\n",
+ inode, inode->i_ino);
+ }
+
+ open_direct = OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO;
+
+ if (S_ISLNK(inode->i_mode)) {
+ /* this always does I/O for some reason. */
+ down_read(&OCFS_I(inode)->ip_io_sem);
+ err = ocfs_symlink_get_block (inode, iblock, bh_result,
+ create);
+ up_read(&OCFS_I(inode)->ip_io_sem);
+ goto bail;
+ }
+
+ vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
+
+#if 0
+ if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->ip_alloc_size) {
+ int vbo_pad;
+
+ vbo_pad = inode->i_sb->s_blocksize;
+ vbo_pad -= vbo & (s64)(inode->i_sb->s_blocksize - 1);
+
+ LOG_TRACE_STR("Extending allocation");
+ LOG_ERROR_ARGS("extending inode %lu in get_block!!\n",
+ inode->i_ino);
+ down_write(&OCFS_I(inode)->ip_io_sem);
+ err = ocfs_extend_file(osb, vbo + vbo_pad,
+ NULL, inode, NULL, 0, NULL);
+ up_write(&OCFS_I(inode)->ip_io_sem);
+ if (err < 0) {
+ err = -ENOSPC;
+ LOG_ERROR_STATUS (err);
+ goto bail;
+ }
+ }
+#else
+ if (vbo >= OCFS_I(inode)->ip_alloc_size) {
+ err = -EIO;
+ LOG_ERROR_ARGS("Trying to extend in ocfs_get_block() "
+ "(inode %llu, blkno %llu, vbo %llu, alloc %llu)\n",
+ OCFS_I(inode)->ip_blkno, (u64)iblock, (u64)vbo,
+ OCFS_I(inode)->ip_alloc_size);
+ goto bail;
+ }
+#endif
+
+ len = inode->i_sb->s_blocksize;
+ if (!open_direct)
+ down_read(&OCFS_I(inode)->ip_extend_sem);
+ err = ocfs_lookup_file_allocation(OCFS2_SB(inode->i_sb),
+ vbo, &lbo, len, NULL,
+ inode, open_direct);
+ if (!open_direct)
+ up_read(&OCFS_I(inode)->ip_extend_sem);
+
+ if (err < 0) {
+ LOG_ERROR_ARGS("vbo=%lld lbo=%lld len=%u", vbo, lbo, len);
+ goto bail;
+ }
+
+ map_bh(bh_result, inode->i_sb, lbo >> inode->i_sb->s_blocksize_bits);
+
+ err = 0;
+
+ if (bh_result->b_blocknr == 0) {
+ err = -EIO;
+ LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u, blkno=(%llu)\n",
+ vbo, lbo, len,
+ OCFS_I(inode)->ip_blkno);
+ }
+
+ if (vbo < OCFS_I(inode)->ip_mmu_private)
+ goto bail;
+ if (!create)
+ goto bail;
+ if (vbo != OCFS_I(inode)->ip_mmu_private) {
+ LOG_ERROR_ARGS("Uh-oh, vbo = %lld, i_size = %llu, mmu = %llu, "
+ "inode = %llu\n",
+ vbo, inode->i_size,
+ OCFS_I(inode)->ip_mmu_private,
+ OCFS_I(inode)->ip_blkno);
+ BUG();
+ err = -EIO;
+ goto bail;
+ }
+
+ bh_result->b_state |= (1UL << BH_New);
+ OCFS_I(inode)->ip_mmu_private += inode->i_sb->s_blocksize;
+
+bail:
+ if (err < 0)
+ err = -EIO;
+
+ LOG_EXIT_INT (err);
+ return err;
+}
+
+static int ocfs_readpage(struct file *file, struct page *page)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(READPAGE);
+ LOG_ENTRY_ARGS("(0x%p, %lu)\n", file, (page ? page->index : 0));
+
+ ret = block_read_full_page(page, ocfs_get_block);
+
+ LOG_EXIT_INT(ret);
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static int ocfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(WRITEPAGE);
+ LOG_ENTRY_ARGS("(0x%p)\n", page);
+
+ ret = block_write_full_page(page, ocfs_get_block, wbc);
+
+ LOG_EXIT_INT(ret);
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+#else
+static int ocfs_writepage(struct page *page)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(WRITEPAGE);
+ LOG_ENTRY_ARGS("(0x%p)\n", page);
+
+ ret = block_write_full_page(page, ocfs_get_block);
+
+ LOG_EXIT_INT(ret);
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+inline void __mark_dirty(struct buffer_head *bh)
+{
+ set_buffer_flushtime(bh);
+ refile_buffer(bh);
+}
+
+static int __block_commit_write(struct inode *inode, struct page *page,
+ unsigned from, unsigned to)
+{
+ unsigned block_start, block_end;
+ int partial = 0, need_balance_dirty = 0;
+ unsigned blocksize;
+ struct buffer_head *bh, *head;
+
+ blocksize = 1 << inode->i_blkbits;
+
+ for(bh = head = page->buffers, block_start = 0;
+ bh != head || !block_start;
+ block_start=block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_end <= from || block_start >= to) {
+ if (!buffer_uptodate(bh))
+ partial = 1;
+ } else {
+ set_bit(BH_Uptodate, &bh->b_state);
+ if (!atomic_set_buffer_dirty(bh)) {
+ __mark_dirty(bh);
+ buffer_insert_inode_data_queue(bh, inode);
+ need_balance_dirty = 1;
+ }
+ }
+ }
+
+ if (need_balance_dirty)
+ balance_dirty();
+ /*
+ * is this a partial write that happened to make all buffers
+ * uptodate then we can optimize away a bogus readpage() for
+ * the next read(). Here we 'discover' wether the page went
+ * uptodate as a result of this (potentially partial) write.
+ */
+ if (!partial)
+ SetPageUptodate(page);
+ return 0;
+}
+
+static int ocfs2_cont_prepare_write(struct page *page, unsigned offset,
+ unsigned to, get_block_t *get_block, loff_t *bytes)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct page *new_page;
+ unsigned long pgpos;
+ long status;
+ unsigned zerofrom;
+ unsigned blocksize = 1 << inode->i_blkbits;
+ char *kaddr;
+
+ while (page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
+ status = -ENOMEM;
+ new_page = grab_cache_page(mapping, pgpos);
+ if (!new_page)
+ goto out;
+ /* we might sleep */
+ if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
+ unlock_page(new_page);
+ page_cache_release(new_page);
+ continue;
+ }
+ zerofrom = *bytes & ~PAGE_CACHE_MASK;
+ if (zerofrom & (blocksize-1)) {
+ *bytes |= (blocksize-1);
+ (*bytes)++;
+ }
+ status = block_prepare_write(new_page, zerofrom,
+ PAGE_CACHE_SIZE, get_block);
+ if (status)
+ goto out_unmap;
+ kaddr = page_address(new_page);
+ memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
+ flush_dcache_page(new_page);
+ __block_commit_write(inode, new_page, zerofrom, PAGE_CACHE_SIZE);
+ kunmap(new_page);
+ unlock_page(new_page);
+ page_cache_release(new_page);
+ }
+
+ if (page->index < pgpos) {
+ /* completely inside the area */
+ zerofrom = offset;
+ } else {
+ /* page covers the boundary, find the boundary offset */
+ zerofrom = *bytes & ~PAGE_CACHE_MASK;
+
+ /* if we will expand the thing last block will be filled */
+ if (to > zerofrom && (zerofrom & (blocksize-1))) {
+ *bytes |= (blocksize-1);
+ (*bytes)++;
+ }
+
+ /* starting below the boundary? Nothing to zero out */
+ if (offset <= zerofrom)
+ zerofrom = offset;
+ }
+ status = block_prepare_write(page, zerofrom, to, get_block);
+ if (status)
+ goto out1;
+ kaddr = page_address(page);
+ if (zerofrom < offset) {
+ memset(kaddr+zerofrom, 0, offset-zerofrom);
+ flush_dcache_page(page);
+ __block_commit_write(inode, page, zerofrom, offset);
+ }
+ return 0;
+out1:
+ ClearPageUptodate(page);
+ kunmap(page);
+ return status;
+
+out_unmap:
+ ClearPageUptodate(new_page);
+ kunmap(new_page);
+ UnlockPage(new_page);
+ page_cache_release(new_page);
+out:
+ return status;
+}
+
+/* Mark's favorite hack */
+#undef cont_prepare_write
+#define cont_prepare_write ocfs2_cont_prepare_write
+#endif /* < 2.6.0 */
+
+/*
+ * ocfs_prepare_write()
+ *
+ */
+static int ocfs_prepare_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(PREPARE_WRITE);
+ LOG_ENTRY_ARGS("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
+
+ ret = cont_prepare_write(page, from, to, ocfs_get_block,
+ &(OCFS_I(page->mapping->host)->ip_mmu_private));
+
+ LOG_EXIT_INT(ret);
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+
+/*
+ * ocfs_commit_write()
+ *
+ */
+static int ocfs_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(COMMIT_WRITE);
+ LOG_ENTRY_ARGS("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
+
+ ret = generic_commit_write(file, page, from, to);
+
+ LOG_EXIT_INT(ret);
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static sector_t ocfs_bmap(struct address_space *mapping, sector_t block)
+#else
+static int ocfs_bmap(struct address_space *mapping, long block)
+#endif
+{
+ int disk_block = 0;
+ ocfs_super *osb = OCFS_SB(mapping->host->i_sb);
+ __s64 vbo = 0;
+ __s64 lbo = 0;
+ __u32 len;
+ int err = 0, status;
+ struct inode *inode = mapping->host;
+
+ LOG_SET_CONTEXT(BMAP);
+ LOG_ENTRY_ARGS("(block = %llu)\n", (unsigned long long)block);
+
+ if (!INODE_JOURNAL(inode)) {
+ LOG_ERROR_STR("bmap is only for journal inodes!");
+ err = -EINVAL;
+ LOG_ERROR_STATUS(err);
+ goto bail;
+ }
+
+ vbo = (__s64) block << inode->i_sb->s_blocksize_bits;
+ len = osb->sb->s_blocksize;
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, 1);
+ if (err < 0) {
+ LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u", vbo,
+ lbo, len);
+ LOG_ERROR_STATUS(err);
+ goto bail;
+ }
+
+ disk_block = lbo >> inode->i_sb->s_blocksize_bits;
+
+bail:
+ status = err ? err : disk_block;
+
+ LOG_EXIT_STATUS(status);
+ LOG_CLEAR_CONTEXT();
+ return status;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+/*
+ * TODO: Make this into a generic get_blocks function.
+ *
+ * From do_direct_io in direct-io.c:
+ * "So what we do is to permit the ->get_blocks function to populate
+ * bh.b_size with the size of IO which is permitted at this offset and
+ * this i_blkbits."
+ *
+ * This function is called directly from get_more_blocks in direct-io.c.
+ *
+ * We should probably have this data in the oin for the inode.
+ * Otherwise, we might want to look at ocfs_rw_direct,
+ * ocfs_lookup_file_allocation and ocfs_get_block
+ *
+ * called like this: dio->get_blocks(dio->inode, fs_startblk,
+ * fs_count, map_bh, dio->rw == WRITE);
+ */
+static int ocfs_direct_IO_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create)
+{
+ int ret = -1;
+ int status;
+ ocfs_super *osb = NULL;
+ __s64 vbo; /* file offset */
+ __s64 lbo; /* logical (disk) offset */
+ __s64 vbo_max; /* file offset, max_blocks from iblock */
+ int set_new = 0; /* flag */
+ __u64 new_size; /* In bytes, the size of the contiguous block */
+ unsigned char blocksize_bits;
+
+ if (!inode || !bh_result) {
+ LOG_ERROR_STR("ocfs_direct_IO_get_blocks: inode or bh_result is null");
+ return -EIO;
+ }
+
+ osb = inode->i_sb->s_fs_info;
+ blocksize_bits = inode->i_sb->s_blocksize_bits;
+ /* make sure we're up to date... */
+ if (atomic_read(&OCFS_I(inode)->ip_needs_verification)) {
+ LOG_TRACE_STR ("ocfs_direct_IO_get_blocks: verify oin.");
+ status = ocfs_verify_update_inode (osb, inode, 0);
+ if (status < 0) {
+ LOG_TRACE_STR ("ocfs_verify_update_inode failed");
+ ret = -EIO;
+ goto bail;
+ }
+ }
+
+ /* This function won't even be called if the request isn't all
+ * nicely aligned and of the right size, so there's no need
+ * for us to check any of that. */
+
+ vbo = (__s64) iblock << blocksize_bits;
+ vbo_max = vbo + ((__s64) max_blocks << blocksize_bits);
+
+ /* NOTE: create flag is set when we ?may? have to allocate some
+ blocks for the file. */
+ if (create && vbo_max > OCFS_I(inode)->ip_alloc_size) {
+ /* WARNING: How much do we really want to extend the file? */
+ status = ocfs_extend_file(osb, vbo_max,
+ NULL, inode, NULL, 0, NULL);
+ if (status < 0) {
+ status = -ENOSPC;
+ LOG_ERROR_STR("ocfs_direct_IO_get_blocks: failed to extend the file!");
+ goto bail;
+ }
+ set_new = 1;
+ }
+
+ /* This figure out the size of the next contiguous block, and
+ * our logical offset */
+ /* TODO: Try our damndest to give sizes in multiples of PAGE_SIZE */
+ status = ocfs_lookup_file_allocation(osb, vbo, &lbo, max_blocks << blocksize_bits,
+ &new_size, inode, 1);
+
+ /* Do whatever we need to the buffer_head */
+ if (set_new) {
+ set_buffer_new(bh_result);
+ /* Do we really want to set bh_result->b_blocknr here too? */
+ bh_result->b_blocknr = lbo >> blocksize_bits;
+ } else {
+ clear_buffer_new(bh_result);
+ /* is the last argument here correct? */
+ map_bh(bh_result, inode->i_sb, lbo >> blocksize_bits);
+ }
+
+ /* make sure we don't map more than max_blocks blocks here as
+ that's all the kernel will handle at this point. */
+ if (new_size > (__u64)max_blocks << blocksize_bits)
+ new_size = (__u64)max_blocks << blocksize_bits;
+ bh_result->b_size = new_size;
+
+ ret = 0;
+bail:
+ return ret;
+}
+
+/*
+ * ocfs_direct_IO()
+ * used to be:
+ * static int ocfs_direct_IO (int rw,
+ * struct inode *inode,
+ * struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
+ *
+ * now:
+ static int ocfs_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
+ * int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
+ * loff_t offset, unsigned long nr_segs);
+ */
+static ssize_t ocfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
+ int ret;
+
+ LOG_SET_CONTEXT(DIRECT_IO);
+
+ LOG_ENTRY ();
+
+ /* blockdev_direct_IO checks alignment for us, using */
+ ret = blockdev_direct_IO (rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ocfs_direct_IO_get_blocks, NULL);
+
+ LOG_EXIT_INT (ret);
+
+ LOG_CLEAR_CONTEXT();
+ return ret;
+} /* ocfs_direct_IO */
+
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
+/*
+ * ocfs_direct_IO()
+ *
+ * we are not using this function anymore, in fact
+ * we should never get here any more
+ * so let's just BUG(), hint from sct at redhat.com
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined(SUSE)
+static int ocfs_direct_IO (int rw, struct file *filp, struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
+{
+ BUG();
+ return 0;
+} /* ocfs_direct_IO */
+#else
+static int ocfs_direct_IO (int rw, struct inode *inode, struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
+{
+ BUG();
+ return 0;
+} /* ocfs_direct_IO */
+#endif
+#endif /* version >= 2.4.10 */
+
+
+struct address_space_operations ocfs_aops = {
+ .readpage = ocfs_readpage,
+ .writepage = ocfs_writepage,
+ .prepare_write = ocfs_prepare_write,
+ .commit_write = ocfs_commit_write,
+ .bmap = ocfs_bmap,
+
+ /*
+ * On a 2.4 system, we are only adding this here as a dummy basically.
+ * Just need open with O_DIRECT to succeed, we still call
+ * ocfs_rw_direct().
+ *
+ * For a 2.6 system, this is the way a filesystem provides
+ * direct-io support.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
+ .direct_IO = ocfs_direct_IO
+#endif
+};
Index: src/24io.c
===================================================================
--- src/24io.c (revision 0)
+++ src/24io.c (revision 0)
@@ -0,0 +1,481 @@
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+#define KERNEL_NO_F_IOBUF
+#include "ocfs_compat.h"
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/iobuf.h>
+
+#include <asm/byteorder.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+
+#include "alloc.h"
+#include "dlm.h"
+#include "extmap.h"
+#include "file.h"
+#include "inode.h"
+#include "lockres.h"
+#include "namei.h"
+#include "super.h"
+#include "symlink.h"
+#include "sysfile.h"
+#include "util.h"
+#include "vote.h"
+
+#include "ocfs_journal.h"
+#include "buffer_head_io.h"
+
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INODE
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)
+# define free_kiovec_sz(nr, buf, bh) free_kiovec(nr, buf)
+# define alloc_kiovec_sz(nr, buf, bh) alloc_kiovec(nr, buf)
+#endif
+
+#if defined(SUSE) && LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)
+#define OCFS_KIO_BLOCKS(_iobuf) ((_iobuf)->kio_blocks)
+#else
+#define OCFS_KIO_BLOCKS(_iobuf) ((_iobuf)->blocks)
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,10)
+#define KERNEL_NO_F_IOBUF 1
+#elif defined(SUSE) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20)
+#define KERNEL_NO_F_IOBUF 1
+#endif
+
+static int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock, int len)
+{
+ int err = -EIO;
+ ocfs_super *osb;
+ __s64 vbo = 0;
+ __s64 lbo = 0;
+
+ LOG_ENTRY_ARGS ("(0x%p, %ld)\n", inode, iblock);
+
+ if (!inode) {
+ LOG_ERROR_STR ("bad inode");
+ err = -1;
+ goto bail;
+ }
+
+ osb = OCFS_SB(inode->i_sb);
+
+ vbo = (__s64) iblock << osb->s_sectsize_bits;
+ err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL,
+ inode, 1);
+ if (err < 0) {
+ LOG_ERROR_STATUS (err);
+ err = -1;
+ goto bail;
+ }
+
+ err = 0;
+
+ *oblock = lbo >> osb->s_sectsize_bits;
+ if (*oblock == 0) {
+ err = -EIO;
+ LOG_ERROR_ARGS ("vbo=%lld lbo=%lld len=%u, blkno=(%llu)\n",
+ vbo, lbo, len,
+ OCFS_I(inode)->ip_blkno);
+ }
+
+bail:
+ if (err < 0)
+ err = -EIO;
+ LOG_EXIT_INT (err);
+ return err;
+} /* ocfs_get_block2 */
+
+/*
+ * ocfs_rw_direct()
+ *
+ */
+ssize_t ocfs_rw_direct (int rw, struct file *filp, char *buf, size_t size, loff_t * offp)
+{
+#ifdef KERNEL_NO_F_IOBUF
+ struct kiobuf *iobuf;
+#else
+ struct kiobuf *iobuf = filp->f_iobuf;
+ int new_iobuf = 0;
+#endif
+ int err = 0;
+ unsigned long blocknr, blocks, myiosize;
+ size_t transferred;
+ int iosize, clustersize;
+ int i;
+ struct inode *inode = filp->f_dentry->d_inode;
+ int max_sectors;
+ int nbhs;
+ int sector_size, sector_bits, sector_mask, sectors_per_page;
+ int ret = 0;
+ int large_io = 0;
+ int inuse = 0;
+ unsigned long blocks_end_cluster = 0;
+ loff_t saved_off;
+ size_t saved_size;
+ unsigned long firstlogic;
+ long firstphys;
+ long nextphys;
+ unsigned long nextlogic = 0;
+ unsigned long totalioblocks = 0;
+
+ saved_off = *offp;
+ saved_size = size;
+
+ /* FIXME: Need to differentiate between sectors and blocksize */
+ sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ sector_mask = sector_size - 1;
+ sectors_per_page = PAGE_SIZE / sector_size;
+ /* max sectors is 1024 in 2.4.9
+ * max data is 512kb
+ */
+
+ err = -EINVAL;
+ if (size == 0) {
+ printk("direct write of 0 byte\n");
+ return 0;
+ }
+
+ if (rw == READ) {
+ if (inode->i_size <= *offp) /* read past end of file */
+ return 0;
+ if (size > (inode->i_size - *offp))
+ size = inode->i_size - *offp;
+ }
+
+ /* make sure aligned to either PAGE_SIZE or sect_size IO */
+#ifndef LARGEIOS
+ if ((*offp & sector_mask) || (size & sector_mask))
+ /* if not, then fail, we need either to do dio */
+ return err;
+
+ max_sectors = KIO_MAX_SECTORS;
+ large_io = 0;
+#endif
+#ifdef LARGEIOS
+ if ((*offp & ~PAGE_MASK) || (size & ~PAGE_MASK)) {
+ /* if it's not PAGE_SIZE, then sect_size */
+ if ((*offp & sector_mask) || (size & sector_mask))
+ /* if not, then fail, we need either to do dio */
+ return err;
+ max_sectors = KIO_MAX_SECTORS; /* for 2.4.9 - 1024 */
+ } /* ok we 're PAGE_SIZE aligned, lets see if the buffer is */
+ else {
+ if (!((unsigned long) buf & ~PAGE_MASK)) {
+ /* yippie we are .. we can do PAGE_SIZE size io's */
+ large_io = 1;
+ /* for 2.4.9 */
+ max_sectors = KIO_MAX_SECTORS / sectors_per_page;
+ } else {
+ max_sectors = KIO_MAX_SECTORS;
+ large_io = 0;
+ }
+
+ }
+#endif
+ /* find out how far we are to the end of our cluster */
+
+ err = 0;
+ if (size)
+ err = -ENXIO;
+
+ /* Split the IO into KIO_MAX_SECTORS chunks, mapping and */
+ /* unmapping the single kiobuf as we go to perform each chunk of IO. */
+
+ transferred = 0;
+ blocknr = *offp >> sector_bits;
+ clustersize = inode->i_blksize >> sector_bits;
+ myiosize = size >> sector_bits;
+ blocks_end_cluster = clustersize - (blocknr % clustersize);
+ firstlogic = blocknr;
+ totalioblocks = 0;
+
+ ret = ocfs_get_block2 (inode, blocknr, &firstphys, sector_size);
+ if (ret == -1) {
+ err = 0;
+ goto out;
+ }
+ while (myiosize > 0) {
+ if (blocks_end_cluster + 1 > myiosize) {
+ totalioblocks += myiosize;
+ myiosize = 0;
+ goto doio;
+ } else {
+ totalioblocks += blocks_end_cluster;
+ myiosize -= blocks_end_cluster;
+ nextlogic = firstlogic + blocks_end_cluster;
+ }
+again:
+ ret = ocfs_get_block2 (inode, nextlogic, &nextphys, sector_size);
+ if (ret == -1) {
+ err = 0;
+ goto out;
+ }
+ if (nextphys == (firstphys + totalioblocks)) {
+ // merge ok
+ blocks_end_cluster = clustersize - (nextlogic % clustersize);
+ if (blocks_end_cluster + 1 > myiosize) {
+ totalioblocks += myiosize;
+ myiosize = 0;
+ } else {
+ totalioblocks += blocks_end_cluster;
+ myiosize -= blocks_end_cluster;
+ nextlogic = nextlogic + blocks_end_cluster;
+ goto again;
+ }
+ }
+doio:
+ size = totalioblocks << sector_bits;
+ if (large_io)
+ nbhs = (size >> PAGE_SHIFT);
+ else
+ nbhs = (size >> sector_bits);
+ if (nbhs > max_sectors)
+ nbhs = max_sectors;
+
+#ifdef KERNEL_NO_F_IOBUF
+ err = alloc_kiovec_sz (1, &iobuf, &nbhs);
+ if (err)
+ goto out;
+#else
+ if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
+ /*
+ * A parallel read/write is using the preallocated iobuf
+ * so just run slow and allocate a new one.
+ */
+ err = alloc_kiovec_sz (1, &iobuf, &nbhs);
+ if (err)
+ goto out;
+ new_iobuf = 1;
+ } else
+ new_iobuf = 0;
+
+#endif
+ inuse = 1;
+
+ totalioblocks = 0;
+ while (size > 0) {
+ if (large_io) {
+ blocks = size >> PAGE_SHIFT;
+ if (blocks > max_sectors)
+ blocks = max_sectors;
+ iosize = blocks << PAGE_SHIFT;
+ } else {
+ blocks = size >> sector_bits;
+ if (blocks > max_sectors)
+ blocks = max_sectors;
+ iosize = blocks << sector_bits;
+ }
+ if (!blocks)
+ break;
+ err = map_user_kiobuf (rw, iobuf, (unsigned long) buf, iosize);
+ if (err)
+ break;
+ /* get the blocknr depending on io size for all blocks */
+ /* since we are awlays within the extent we only need to get the first block */
+ OCFS_KIO_BLOCKS(iobuf)[0] = firstphys + totalioblocks;
+
+ if (large_io) {
+ blocknr += sectors_per_page;
+ OCFS_KIO_BLOCKS(iobuf)[0] = OCFS_KIO_BLOCKS(iobuf)[0] / sectors_per_page;
+ } else {
+ blocknr++;
+ }
+
+ for (i = 1; i < blocks; i++) {
+ if (large_io) {
+ blocknr += sectors_per_page;
+ } else {
+ blocknr++;
+ }
+ OCFS_KIO_BLOCKS(iobuf)[i] = OCFS_KIO_BLOCKS(iobuf)[0] + i;
+ }
+ err = brw_kiovec (rw, 1, &iobuf, inode->i_dev, OCFS_KIO_BLOCKS(iobuf),
+ large_io ? PAGE_SIZE : sector_size);
+#ifdef SUSE
+ if (rw == READ && err > 0)
+ mark_dirty_kiobuf(iobuf, err);
+#endif
+ if (err >= 0) {
+ transferred += err;
+ size -= err;
+ buf += err;
+ if (large_io) {
+ totalioblocks +=
+ (blocks * sectors_per_page);
+ } else {
+ totalioblocks += blocks;
+ }
+ } else {
+ printk( "ocfs_rw_direct : brw_kiovec() %d\n", err);
+ break;
+ }
+ unmap_kiobuf (iobuf);
+ if (err != iosize)
+ break;
+ }
+#ifdef KERNEL_NO_F_IOBUF
+ free_kiovec_sz(1, &iobuf, &nbhs);
+#else
+ if (!new_iobuf)
+ clear_bit(0, &filp->f_iobuf_lock);
+ else
+ free_kiovec_sz(1, &iobuf, &nbhs);
+#endif
+ inuse = 0;
+ totalioblocks = 0;
+ firstlogic = nextlogic;
+ firstphys = nextphys;
+ }
+ if (transferred) {
+ *offp += transferred;
+ err = transferred;
+ }
+
+out:
+#ifdef KERNEL_NO_F_IOBUF
+ if (inuse)
+ free_kiovec_sz (1, &iobuf, &nbhs);
+#else
+ if (inuse) {
+ if (!new_iobuf)
+ clear_bit(0, &filp->f_iobuf_lock);
+ else
+ free_kiovec_sz(1, &iobuf, &nbhs);
+ }
+#endif
+ return err;
+} /* ocfs_rw_direct */
+
+#ifdef AIO_ENABLED
+static int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb,
+ size_t size, loff_t pos)
+{
+ int err = 0;
+ int max_sectors = 25000;
+ struct inode *inode = filp->f_dentry->d_inode;
+ unsigned long blocknr, blocks, iosize,myiosize;
+ long firstphys;
+ int clustersize;
+ unsigned long blocks_end_cluster = 0;
+
+ /* FIXME: Need to differentiate betwen sectors and blocksize */
+ int sector_bits = OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ int sector_size = 1 << OCFS_SB(inode->i_sb)->s_sectsize_bits;
+ int sector_mask = sector_size - 1;
+
+ int ret;
+ unsigned long firstlogic;
+ long nextphys;
+ unsigned long nextlogic = 0;
+ unsigned long totalioblocks = 0;
+
+ if (!size || (pos == inode->i_size)) {
+ cb.fn(cb.data, cb.vec, err);
+ return err;
+ }
+
+ err = -ENXIO;
+ if (pos >= inode->i_size) {
+ return err;
+ }
+
+ err = -EINVAL;
+ if ((pos < 0) || (pos & sector_mask) || (size & sector_mask)) {
+ return err;
+ }
+
+ blocknr = pos >> sector_bits;
+
+ blocks = size >> sector_bits;;
+ if (blocks > max_sectors)
+ blocks = max_sectors;
+ if (!blocks) {
+ err = -ENXIO;
+ return err;;
+ }
+
+ iosize = blocks << sector_bits;
+ clustersize = inode->i_blksize >> sector_bits;
+ blocks_end_cluster = clustersize - (blocknr % clustersize);
+ myiosize = size >> sector_bits;
+ firstlogic = blocknr;
+ totalioblocks = 0;
+
+ err = ocfs_get_block2(inode, blocknr, &firstphys, sector_size);
+ if ( err == -1 ) {
+ err = 0;
+ return err;
+ }
+ if (blocks_end_cluster + 1 > myiosize) {
+ totalioblocks += myiosize;
+ myiosize = 0;
+ goto doio;
+ } else {
+ totalioblocks += blocks_end_cluster;
+ myiosize -= blocks_end_cluster;
+ nextlogic = firstlogic + blocks_end_cluster;
+ }
+again:
+ ret = ocfs_get_block2 (inode, nextlogic, &nextphys, sector_size);
+ if (ret == -1) {
+ err = 0;
+ return err;
+ }
+ if (nextphys == (firstphys + totalioblocks)) {
+ blocks_end_cluster = clustersize - (nextlogic % clustersize);
+ if (blocks_end_cluster + 1 > myiosize) {
+ totalioblocks += myiosize;
+ myiosize = 0;
+ } else {
+ totalioblocks += blocks_end_cluster;
+ myiosize -= blocks_end_cluster;
+ nextlogic = nextlogic + blocks_end_cluster;
+ goto again;
+ }
+ }
+doio:
+ blocks = totalioblocks;
+ err = brw_kvec_async(rw, cb, inode->i_dev, blocks, firstphys, sector_bits);
+ return err;
+
+}
+
+int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(KVEC_READ);
+
+ ret = ocfs_kvec_rw(file, READ, cb, size, pos);
+
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+
+int ocfs_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos)
+{
+ int ret;
+
+ LOG_SET_CONTEXT(KVEC_WRITE);
+
+ ret = ocfs_kvec_rw(file, WRITE, cb, size, pos);
+
+ LOG_CLEAR_CONTEXT();
+ return ret;
+}
+
+#endif /* aio */
+#endif /* 2.6 */
More information about the Ocfs2-devel
mailing list