[Ocfs2-commits] khackel commits r1314 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Mon Aug 2 00:23:31 CDT 2004


Author: khackel
Date: 2004-08-01 23:23:29 -0500 (Sun, 01 Aug 2004)
New Revision: 1314

Modified:
   trunk/src/alloc.c
   trunk/src/file.c
   trunk/src/file.h
   trunk/src/inode.c
   trunk/src/namei.c
   trunk/src/ocfs.h
Log:
change bh zeroing -> pagecache zeroing, courtesty of jlbec

Modified: trunk/src/alloc.c
===================================================================
--- trunk/src/alloc.c	2004-07-30 00:32:40 UTC (rev 1313)
+++ trunk/src/alloc.c	2004-08-02 04:23:29 UTC (rev 1314)
@@ -131,7 +131,6 @@
 	return (start_blkno == blkno);
 }
 
-/* this is a good candidate for an inline function. */
 int ocfs_process_bitmap_free_head(ocfs_super *osb, ocfs_bitmap_free_head *f) 
 {
 	struct list_head *p, *n;

Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c	2004-07-30 00:32:40 UTC (rev 1313)
+++ trunk/src/file.c	2004-08-02 04:23:29 UTC (rev 1314)
@@ -32,6 +32,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/pagemap.h>
 
 #include "ocfs_log.h"
 #include "ocfs.h"
@@ -776,6 +777,21 @@
 		    ("Generic_file_write ok, asking for OIN update now");
 		inode->i_size = newsize;
 		inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+		if (do_direct_io) {
+			/*
+			 * This leaves dirty data in holes.
+			 * Caveat Emptor.
+			 */
+			OCFS_I(inode)->ip_mmu_private = inode->i_size;
+		} else {
+			status = ocfs2_zero_extend(inode);
+			/*
+			 * Don't overwrite the result of
+			 * generic_file_write
+			 */
+			if (status)
+				LOG_ERROR_ARGS("Unable to pre-zero extension of inode (%d)", status);
+		}
 		up(&osb->extend_sem);
 	}
 
@@ -988,6 +1004,7 @@
 
 	/* if we updated correctly then we can update the alloc_size */
 	OCFS_I(inode)->ip_alloc_size = new_alloc_size;
+	OCFS_I(inode)->ip_mmu_private = fe->i_size;
 	up (&(OCFS_I(inode)->ip_sem));
 
 leave:
@@ -1007,7 +1024,39 @@
 	return status;
 }  /* ocfs_truncate_file */
 
+int ocfs2_zero_extend(struct inode *inode)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+	u64 size = inode->i_size - 1;
+	unsigned int offset;
+	int res = 0;
 
+	/* Start the zeroing of blocks */
+	if (inode->i_size > OCFS_I(inode)->ip_mmu_private) {
+		page = grab_cache_page(mapping,
+				       size >> PAGE_CACHE_SHIFT);
+		if (!page) {
+			res = -ENOMEM;
+			LOG_ERROR_STATUS(res);
+			return res;
+		}
+		offset = (unsigned int)(size & (PAGE_CACHE_SIZE - 1)) + 1;
+		res = mapping->a_ops->prepare_write(NULL, page, offset,
+						    offset);
+		if (!res)
+			res = mapping->a_ops->commit_write(NULL, page,
+							   offset,
+							   offset);
+		unlock_page(page);
+		page_cache_release(page);
+		mark_inode_dirty(inode);
+	}
+
+	return res;
+}
+
+
 /* ocfs_extend_file()
  *
  */
@@ -1153,15 +1202,13 @@
 
 #define	OCFS_MAX_ZERO_BLOCKS (4096)
 
-	down(&OCFS_I(inode)->ip_sem);
-	if (!(OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
+	if (system_file) {
 		struct buffer_head **bhs = NULL;
 		int i;
 		sector_t block;
 		u64 this_last;
 
-		up(&OCFS_I(inode)->ip_sem);
-
+		printk("Extending system inode %llu (from %u, adding %u)\n", fe->i_blkno, fe->i_clusters, numClustersAlloc);
 		LOG_TRACE_ARGS("zeroing %llu blocks from offset %llu\n", 
 			       num_blocks, block_off);
 		bhs = kmalloc(OCFS_MAX_ZERO_BLOCKS * sizeof(struct buffer_head *),
@@ -1217,8 +1264,7 @@
 			if (bhs[i])
 				brelse(bhs[i]);
 		kfree(bhs);
-	} else
-		up(&OCFS_I(inode)->ip_sem);
+	}
 
 skip_zeros:
 	ext_alloc_inode = ocfs_get_system_file_inode(osb, EXTENT_ALLOC_BITMAP_SYSTEM_INODE, osb->node_num);
@@ -1387,7 +1433,17 @@
 		}
 		inode->i_size = newsize;
 		inode->i_blocks = (newsize + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+		if (OCFS_I(inode)->ip_open_flags &
+		    OCFS_OIN_OPEN_FOR_DIRECTIO) {
+			/* This is a total broken hack for O_DIRECT crack */
+			OCFS_I(inode)->ip_mmu_private = inode->i_size;
+		}
 		up (&(OCFS_I(inode)->ip_sem));
+		status = ocfs2_zero_extend(inode);
+		if (status < 0) {
+			LOG_ERROR_STATUS(status);
+			goto bail;
+		}
 	}
 
 	status = -EFAIL;

Modified: trunk/src/file.h
===================================================================
--- trunk/src/file.h	2004-07-30 00:32:40 UTC (rev 1313)
+++ trunk/src/file.h	2004-08-02 04:23:29 UTC (rev 1314)
@@ -44,6 +44,7 @@
 			   struct inode *inode);
 int ocfs_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs_sync_inode(struct inode *inode);
+int ocfs2_zero_extend(struct inode *inode);
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 int ocfs_getattr(struct vfsmount *mnt, struct dentry *dentry,

Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c	2004-07-30 00:32:40 UTC (rev 1313)
+++ trunk/src/inode.c	2004-08-02 04:23:29 UTC (rev 1314)
@@ -353,6 +353,7 @@
 	/* These should be set in read_inode2. */
 	i->ip_alloc_size = 0ULL;
 	i->ip_blkno = 0ULL;
+	i->ip_mmu_private = 0ULL;
 
 	OCFS_SET_FLAG (i->ip_flags, OCFS_INODE_INITIALIZED);
 	return 0;
@@ -448,6 +449,7 @@
 		    inode->i_fop = &ocfs_fops;
 		    inode->i_op = &ocfs_file_iops;
 		    inode->i_size = fe->i_size;
+		    OCFS_I(inode)->ip_mmu_private = inode->i_size;
 		    break;
 	    case S_IFDIR:
 		    atomic_set(GET_INODE_CLEAN_SEQ(inode), atomic_read(&osb->clean_buffer_seq));
@@ -817,6 +819,139 @@
 }				/* ocfs_clear_inode */
 
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+inline void __mark_dirty(struct buffer_head *bh)
+{
+	set_buffer_flushtime(bh);
+	refile_buffer(bh);
+}
+
+static int __block_commit_write(struct inode *inode, struct page *page,
+		unsigned from, unsigned to)
+{
+	unsigned block_start, block_end;
+	int partial = 0, need_balance_dirty = 0;
+	unsigned blocksize;
+	struct buffer_head *bh, *head;
+
+	blocksize = 1 << inode->i_blkbits;
+
+	for(bh = head = page->buffers, block_start = 0;
+	    bh != head || !block_start;
+	    block_start=block_end, bh = bh->b_this_page) {
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to) {
+			if (!buffer_uptodate(bh))
+				partial = 1;
+		} else {
+			set_bit(BH_Uptodate, &bh->b_state);
+			if (!atomic_set_buffer_dirty(bh)) {
+				__mark_dirty(bh);
+				buffer_insert_inode_data_queue(bh, inode);
+				need_balance_dirty = 1;
+			}
+		}
+	}
+
+	if (need_balance_dirty)
+		balance_dirty();
+	/*
+	 * is this a partial write that happened to make all buffers
+	 * uptodate then we can optimize away a bogus readpage() for
+	 * the next read(). Here we 'discover' wether the page went
+	 * uptodate as a result of this (potentially partial) write.
+	 */
+	if (!partial)
+		SetPageUptodate(page);
+	return 0;
+}
+
+static int ocfs2_cont_prepare_write(struct page *page, unsigned offset, unsigned to, get_block_t *get_block, loff_t *bytes)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	struct page *new_page;
+	unsigned long pgpos;
+	long status;
+	unsigned zerofrom;
+	unsigned blocksize = 1 << inode->i_blkbits;
+	char *kaddr;
+
+	while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
+		status = -ENOMEM;
+		new_page = grab_cache_page(mapping, pgpos);
+		if (!new_page)
+			goto out;
+		/* we might sleep */
+		if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
+			unlock_page(new_page);
+			page_cache_release(new_page);
+			continue;
+		}
+		zerofrom = *bytes & ~PAGE_CACHE_MASK;
+		if (zerofrom & (blocksize-1)) {
+			*bytes |= (blocksize-1);
+			(*bytes)++;
+		}
+		status = block_prepare_write(new_page, zerofrom,
+					     PAGE_CACHE_SIZE, get_block);
+		if (status)
+			goto out_unmap;
+		kaddr = page_address(new_page);
+		memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
+		flush_dcache_page(new_page);
+		__block_commit_write(inode, new_page, zerofrom, PAGE_CACHE_SIZE);
+		kunmap(new_page);
+		unlock_page(new_page);
+		page_cache_release(new_page);
+	}
+
+	if (page->index < pgpos) {
+		/* completely inside the area */
+		zerofrom = offset;
+	} else {
+		/* page covers the boundary, find the boundary offset */
+		zerofrom = *bytes & ~PAGE_CACHE_MASK;
+
+		/* if we will expand the thing last block will be filled */
+		if (to > zerofrom && (zerofrom & (blocksize-1))) {
+			*bytes |= (blocksize-1);
+			(*bytes)++;
+		}
+
+		/* starting below the boundary? Nothing to zero out */
+		if (offset <= zerofrom)
+			zerofrom = offset;
+	}
+	status = block_prepare_write(page, zerofrom, to, get_block);
+	if (status)
+		goto out1;
+	kaddr = page_address(page);
+	if (zerofrom < offset) {
+		memset(kaddr+zerofrom, 0, offset-zerofrom);
+		flush_dcache_page(page);
+		__block_commit_write(inode, page, zerofrom, offset);
+	}
+	return 0;
+out1:
+	ClearPageUptodate(page);
+	kunmap(page);
+	return status;
+
+out_unmap:
+	ClearPageUptodate(new_page);
+	kunmap(new_page);
+	UnlockPage(new_page);
+	page_cache_release(new_page);
+out:
+	return status;
+}
+
+/* Mark's favorite hack */
+#undef cont_prepare_write
+#define cont_prepare_write ocfs2_cont_prepare_write
+#endif  /* < 2.6.0 */
+
 /*
  * ocfs_prepare_write()
  *
@@ -824,7 +959,6 @@
 static int ocfs_prepare_write (struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	int ret;
-	void *buff;
 	struct inode *inode = page->mapping->host;
 
 	LOG_SET_CONTEXT(PREPARE_WRITE);
@@ -834,16 +968,9 @@
 	if (!inode)
 		BUG();
 
-	ret = block_prepare_write (page, from, to, ocfs_get_block);
+	ret = cont_prepare_write(page, from, to, ocfs_get_block,
+		&(OCFS_I(page->mapping->host)->ip_mmu_private));
 
-	/* because of our massive security hole. */
-	if ((!PageUptodate(page)) &&
-	    page->index == (unsigned long) (inode->i_size >> PAGE_CACHE_SHIFT)) {
-		buff = kmap(page);
-		memset(buff + to, 0, PAGE_CACHE_SIZE - to);
-		kunmap(page);
-	}
-
 	LOG_EXIT_INT (ret);
 
 	LOG_CLEAR_CONTEXT();
@@ -932,7 +1059,13 @@
 }				/* ocfs_symlink_get_block */
 
 
-/* TODO: this should probably be merged into ocfs_get_block */
+/*
+ * TODO: this should probably be merged into ocfs_get_block
+ * 
+ * However, you now need to pay attention to the cont_prepare_write()
+ * stuff in ocfs_get_block (that is, ocfs_get_block pretty much
+ * expects never to extend).
+ */
 struct buffer_head *ocfs_bread(ocfs_journal_handle *handle, struct inode * inode, 
 			       int block, int create, int *err, int reada)
 {
@@ -1039,7 +1172,6 @@
 static int ocfs_get_block (struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
 {
 	int err = -EIO;
-	ocfs_super *osb = NULL;
 	__s64 vbo = 0;
 	__s64 lbo = 0;
 	__u32 len;
@@ -1048,6 +1180,16 @@
 	LOG_ENTRY_ARGS ("(0x%p, %llu, 0x%p, %d)\n", inode,
 			(unsigned long long)iblock, bh_result, create);
 
+	if (!inode) {
+		LOG_ERROR_STR ("bad inode");
+		goto bail;
+	}
+
+	if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE) {
+		printk("get_block on system inode 0x%p (%lu)\n",
+		       inode, inode->i_ino);
+	}
+
 	open_direct = OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO;
 
 	if (S_ISLNK (inode->i_mode)) {
@@ -1059,14 +1201,9 @@
 		goto bail;
 	}
 
-	if (!inode) {
-		LOG_ERROR_STR ("bad inode");
-		goto bail;
-	}
-
-	osb = OCFS_SB(inode->i_sb);
 	vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
 
+#if 0
 	if (!INODE_JOURNAL(inode) && vbo >= OCFS_I(inode)->ip_alloc_size) {
 		int vbo_pad;
 		
@@ -1086,12 +1223,20 @@
 			goto bail;
 		}
 	}
+#else
+	if (vbo >= OCFS_I(inode)->ip_alloc_size) {
+		err = -EIO;
+		LOG_ERROR_ARGS("Trying to extend in ocfs_get_block() (blkno %llu, vbo %llu, alloc %llu)\n", (u64)iblock, (u64)vbo, OCFS_I(inode)->ip_alloc_size);
+		goto bail;
+	}
+#endif
 
-	len = osb->sb->s_blocksize;
+	len = inode->i_sb->s_blocksize;
 	if (!open_direct)
 		down_read(&OCFS_I(inode)->ip_extend_sem);
-	err = ocfs_lookup_file_allocation(osb, vbo, &lbo, len, NULL, 
-					   inode, open_direct);
+	err = ocfs_lookup_file_allocation(OCFS2_SB(inode->i_sb),
+					  vbo, &lbo, len, NULL, 
+					  inode, open_direct);
 	if (!open_direct)
 		up_read(&OCFS_I(inode)->ip_extend_sem);
 	if (err < 0) {
@@ -1110,6 +1255,21 @@
 				OCFS_I(inode)->ip_blkno);
 	}
 
+	if (iblock < (OCFS_I(inode)->ip_mmu_private >> inode->i_sb->s_blocksize_bits))
+		goto bail;
+	if (!create)
+		goto bail;
+	if (vbo != OCFS_I(inode)->ip_mmu_private) {
+		LOG_ERROR_ARGS("Uh-oh, vbo = %lld, i_size = %llu, mmu = %llu\n",
+		       vbo, inode->i_size, OCFS_I(inode)->ip_mmu_private);
+		BUG();
+		err = -EIO;
+		goto bail;
+	}
+
+	bh_result->b_state |= (1UL << BH_New);
+	OCFS_I(inode)->ip_mmu_private += inode->i_sb->s_blocksize;
+
 bail:
 	if (err < 0)
 		err = -EIO;
@@ -1242,15 +1402,6 @@
 	if (ret < 0)
 		goto bail;
 
-	/* because of our massive security hole. */
-	if (page->index == 
-	    (unsigned long) (inode->i_size >> PAGE_CACHE_SHIFT)) {
-		local = (inode->i_size & ~PAGE_CACHE_MASK);
-		buff = kmap(page);
-		memset(buff + local, 0, PAGE_CACHE_SIZE - local);
-		kunmap(page);
-	}
-
 bail:
 	LOG_EXIT_INT (ret);
 
@@ -2001,6 +2152,9 @@
 		OCFS_I(inode)->ip_alloc_size = 
 			(u64)fe->i_clusters << osb->s_clustersize_bits;
 		inode->i_size = fe->i_size;
+		if (S_ISREG(inode->i_mode)) {
+			OCFS_I(inode)->ip_mmu_private = inode->i_size;
+		}
 		LOG_TRACE_ARGS("verifyupdate: setting nlink from %d to %d for %llu\n", 
 			       inode->i_nlink, fe->i_links_count,
 			       OCFS_I(inode)->ip_blkno);

Modified: trunk/src/namei.c
===================================================================
--- trunk/src/namei.c	2004-07-30 00:32:40 UTC (rev 1313)
+++ trunk/src/namei.c	2004-08-02 04:23:29 UTC (rev 1314)
@@ -192,8 +192,8 @@
 
 	LOG_SET_CONTEXT(MKNOD);
 
-	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
-			dev, dentry->d_name.len, dentry->d_name.name);
+	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
+			(unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
 
 	down_write(&OCFS_I(dir)->ip_io_sem);
 
@@ -380,8 +380,8 @@
 	struct inode *inode_alloc_inode = NULL;
 	ocfs_node_map just_me;
 
-	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')\n", dir, dentry, mode,
-			dev, dentry->d_name.len, dentry->d_name.name);
+	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %lu, '%*s')\n", dir, dentry, mode,
+			(unsigned long)dev, dentry->d_name.len, dentry->d_name.name);
 
 	OCFS_ASSERT(new_fe_bh);
 	*new_fe_bh = NULL;

Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-07-30 00:32:40 UTC (rev 1313)
+++ trunk/src/ocfs.h	2004-08-02 04:23:29 UTC (rev 1314)
@@ -410,6 +410,7 @@
 	__u64             ip_chng_seq_num;
 	ocfs_extent_map   ip_ext_map;
 	__s64             ip_alloc_size;
+	__s64		  ip_mmu_private;
 	__u32             ip_open_flags;
 
 	atomic_t          ip_needs_verification;



More information about the Ocfs2-commits mailing list