[Ocfs2-commits] mfasheh commits r1231 - trunk/src

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu Jul 1 21:13:40 CDT 2004


Author: mfasheh
Date: 2004-07-01 20:13:39 -0500 (Thu, 01 Jul 2004)
New Revision: 1231

Modified:
   trunk/src/dlm.c
   trunk/src/dlm.h
   trunk/src/file.c
   trunk/src/inode.c
   trunk/src/inode.h
   trunk/src/nm.c
   trunk/src/ocfs.h
Log:
* include a patch by Sonic Zhang et al. has been edited by me to fix a
  bug with new files, and clean up lots of the code. This a more
  performant data consistency scheme than the one we already have:

        We root caused the problem "The truncate_inode_page call in
ocfs_file_releasecauses the severethroughput drop of file reading in
OCFS2", which we put forward in our former mails. And now, we also
generate a patch to resolve this problem after one week debugging.

        This patch is against OCFS2 svn 1226.

        The average file reading throughput without our patch is 16
Mbtye/sec.
        The average file reading throughput with our patch is 1600
Mbtye/sec.
        Our patch has 100 times improvement on file reading throughput.
We will submit the full benchmark data of izone in the other mail soon.

        In our patch, we remove ocfs_truncate_pages() and
ocfs_extent_map_destroy() from routine ocfs_file_open() and
ocfs_file_release(), which enable file data page reuse between different
and sequential file access in one node.

        In current OCFS2 design, file data consistency among all nodes
in the cluster is only ensured if this file is accessed in sequence. Our
patch keeps the same consistency level by a new vote request
FLAG_TRUNCATE_PAGES and a new vote action TRUNCATE_PAGES. This request
is broadcast when a file is asked to be opened for write. Then the
receivers truncate all in memory pages and extent maps of this file. The
sender truncates part of the pages and maps only when the file is
truncated (shortened).

        Please refer to the attachment.

        The throughput drop problem also occurs when creating, changing
and deleting directories on OCFS2 volume. But it is not covered in this
patch. We will work on the other patch to solve this problem.



Modified: trunk/src/dlm.c
===================================================================
--- trunk/src/dlm.c	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/dlm.c	2004-07-02 01:13:39 UTC (rev 1231)
@@ -731,6 +731,54 @@
 	return status;
 }				/* ocfs_update_master_on_open */
 
+/*
+ * ocfs_notify_truncate_on_wopen()
+ * inode is definitely non NULL
+ */
+int ocfs_notify_truncate_on_wopen (ocfs_super * osb, struct inode *inode)
+{
+	int status = -EAGAIN;
+	int disk_vote = 0;
+	ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
+	u32 flags;
+	u64 lock_id = OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits;
+
+	LOG_ENTRY ();
+
+	while (status == -EAGAIN) {
+		ocfs_acquire_lockres(lockres, 0); // ocfs_file_open 
+
+		flags = FLAG_TRUNCATE_PAGES;
+		if (!ocfs_journal_new_file_search(osb, lock_id))
+			flags |= FLAG_FAST_PATH_LOCK;
+
+		status = new_lock_function(osb, lockres->lock_type, 
+					   flags, NULL, &disk_vote, inode);
+		if (status < 0) {
+			if (status != -EAGAIN)
+				LOG_ERROR_STATUS (status);
+			ocfs_release_lockres (lockres); // ocfs_file_open ocfs_symlink
+			if (status == -EAGAIN) {
+				ocfs_sleep (500);
+				if (ocfs_task_interruptible (osb)) {
+					LOG_TRACE_ARGS("interrupted... lockid=%llu\n",
+					OCFS_I(inode)->ip_blkno << inode->i_sb->s_blocksize_bits);
+					status = -EINTR;
+					goto bail;
+				}
+				continue;
+			}
+
+			goto bail;
+		}
+		ocfs_release_lockres (lockres); // ocfs_file_open 
+	}
+
+bail:
+	LOG_EXIT_STATUS (status);
+	return status;
+}				/* ocfs_notify_truncate_on_wopen */
+
 enum {
 	invalid_path = 0,
 	fast_path, 
@@ -1277,7 +1325,8 @@
 		/* on a master change... */
 		need_to_zap_buffers = 1; /* need to dump local buffers */
 		need_lock_write = 1;     /* and rewrite the lock */
-	} else if (flags & (FLAG_ADD_OIN_MAP | FLAG_DROP_READONLY)) {
+	} else if (flags & (FLAG_ADD_OIN_MAP | FLAG_DROP_READONLY |
+			FLAG_TRUNCATE_PAGES)) {
 		need_lock_write = 0;
 		need_to_zap_buffers = 0;
 	} else if (flags & (FLAG_READDIR | FLAG_FILE_RELEASE_CACHE)) {
@@ -1305,7 +1354,7 @@
 	/* figure out who to vote with */
 	if (flags & (FLAG_REMASTER | FLAG_FILE_DELETE |
 		     FLAG_FILE_RENAME | FLAG_DROP_READONLY |
-		     FLAG_RELEASE_DENTRY)) {
+		     FLAG_RELEASE_DENTRY | FLAG_TRUNCATE_PAGES)) {
 		ocfs_node_map_dup(osb, &vote_map, &osb->publ_map); /* broadcast */
 		/* only nodes that see this is readonly */
 		if (flags & FLAG_DROP_READONLY)
@@ -1370,7 +1419,7 @@
 		ocfs_break_cache_lock_zap_buffers(osb, inode);
 	
 	/* just alerting owner on open */
-	if (flags & FLAG_ADD_OIN_MAP)
+	if (flags & (FLAG_ADD_OIN_MAP | FLAG_TRUNCATE_PAGES))
 		goto bail;
 
 	/* drop readonly should remove anyone who has responded */

Modified: trunk/src/dlm.h
===================================================================
--- trunk/src/dlm.h	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/dlm.h	2004-07-02 01:13:39 UTC (rev 1231)
@@ -47,6 +47,7 @@
 			  ocfs_journal_handle *handle);
 int ocfs_update_master_on_open(ocfs_super *osb, struct inode *inode,
 			       ocfs_journal_handle *handle);
+int ocfs_notify_truncate_on_wopen(ocfs_super *osb, struct inode *inode);
 int ocfs_wait_for_lock_release(ocfs_super *osb, __u64 offset,
 			       __u32 time_to_wait, __u32 lock_type,
 			       struct inode *inode);

Modified: trunk/src/file.c
===================================================================
--- trunk/src/file.c	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/file.c	2004-07-02 01:13:39 UTC (rev 1231)
@@ -241,7 +241,19 @@
 			goto leave;
 		}
 
-		status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
+		if(OCFS_I(inode)->ip_ext_map.initialized &&
+			OCFS_I(inode)->ip_ext_map.count == 0) {
+			status = ocfs_inode_fill_ext_map (osb, fe_bh, inode);
+			if (status < 0) {
+				up_write(&OCFS_I(inode)->ip_io_sem);
+				LOG_ERROR_STATUS(status);
+				goto leave;
+			}
+		}
+	}
+
+	if((mode & (O_WRONLY|O_RDWR)) && !(mode & O_DIRECT)) {
+		status = ocfs_notify_truncate_on_wopen(osb, inode);
 		if (status < 0) {
 			up_write(&OCFS_I(inode)->ip_io_sem);
 			LOG_ERROR_STATUS(status);
@@ -277,14 +289,10 @@
 		}
 		status = 0;
 	} else {
-		ocfs_extent_map_destroy (&OCFS_I(inode)->ip_ext_map);
-		ocfs_extent_map_init (&OCFS_I(inode)->ip_ext_map);
 		if (mode & O_DIRECT)
 			OCFS_SET_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
-		else {
+		else
 			OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
-			truncate_pages = 1;
-		}
 	}
 
 	OCFS_I(inode)->ip_open_cnt++;
@@ -294,18 +302,8 @@
 		have_oin_sem = 0;
 	}
 
-	if (truncate_pages) {
-		status = ocfs_sync_inode(inode);
-		if (status)
-			goto leave;
+	status = ocfs_sync_inode(inode);
 
-		if (inode->i_data.nrpages)
-			ocfs_truncate_inode_pages(inode, 0);
-	}
-
-
-	status = 0;
-
 leave:
 	if (have_oin_sem) {
 		up (&(OCFS_I(inode)->ip_sem));
@@ -374,9 +372,6 @@
 			atomic_read(&dentry->d_count)); 
 
 	if (last_close) {
-		ocfs_extent_map_destroy (&OCFS_I(inode)->ip_ext_map);
-		ocfs_extent_map_init (&OCFS_I(inode)->ip_ext_map);
-
 		if (OCFS_I(inode)->ip_open_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)
 			OCFS_CLEAR_FLAG(OCFS_I(inode)->ip_open_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
 
@@ -402,11 +397,6 @@
 	up (&(OCFS_I(inode)->ip_sem));
 	ocfs_sync_inode(inode);
 
-	if (last_close) {
-		if (inode->i_data.nrpages)
-			ocfs_truncate_inode_pages(inode, 0);
-	}
-
 bail:
 	if (dec)
 		iput(inode);
@@ -1464,12 +1454,14 @@
 				goto bail;
 			}
 		}
-		if (needs_trunc)
+		if (needs_trunc) 
 			ocfs_truncate_inode_pages(inode, 0);
 
-		if (inode->i_size > newsize)
+		if (inode->i_size > newsize) {
+			ocfs_truncate_inode_pages(inode, newsize);
 			status = ocfs_truncate_file(osb, newsize, 
 						    inode);
+		}
 		else {
 			status = ocfs_extend_file(osb, newsize, NULL, 
 						  inode, attr, 0, NULL);

Modified: trunk/src/inode.c
===================================================================
--- trunk/src/inode.c	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/inode.c	2004-07-02 01:13:39 UTC (rev 1231)
@@ -120,7 +120,31 @@
 #endif
 };
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 /* 
+ * ocfs_ilookup()
+ * blkno is *required*
+ */
+struct inode *ocfs_ilookup(ocfs_super *osb, u64 blkno)
+{
+	ocfs_find_inode_args args;
+
+	args.blkno = blkno;
+	args.flags = 0;
+	args.ino = ino_from_blkno(osb->sb, blkno);
+	return ilookup5(osb->sb, args.ino, ocfs_find_actor, &args);
+}
+#else
+/* 
+ * 2.4 doesn't have ilookup so we just fall back to iget here.
+ */
+struct inode *ocfs_ilookup(ocfs_super *osb, u64 blkno)
+{
+	return ocfs_iget(osb, blkno);
+}
+#endif
+
+/* 
  * ocfs_iget()
  * blkno is *required*
  */

Modified: trunk/src/inode.h
===================================================================
--- trunk/src/inode.h	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/inode.h	2004-07-02 01:13:39 UTC (rev 1231)
@@ -35,6 +35,7 @@
 void ocfs_clear_inode(struct inode *inode);
 void ocfs_delete_inode(struct inode *inode);
 struct inode *ocfs_iget(ocfs_super *osb, __u64 feoff);
+struct inode *ocfs_ilookup(ocfs_super *osb, __u64 feoff);
 int ocfs_inode_init_private(struct inode *inode);
 int ocfs_inode_revalidate(struct dentry *dentry);
 int ocfs_populate_inode(struct inode *inode, ocfs2_dinode *fe,

Modified: trunk/src/nm.c
===================================================================
--- trunk/src/nm.c	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/nm.c	2004-07-02 01:13:39 UTC (rev 1231)
@@ -47,6 +47,7 @@
 #include "util.h"
 #include "volcfg.h"
 #include "vote.h"
+#include "extmap.h"
 
 #include "ocfs_journal.h"
 #include "buffer_head_io.h"
@@ -93,7 +94,8 @@
 	"REMASTER_REQUESTOR",   // remaster lock to requestor
 	"DROP_READONLY",        // RO cachelock needs to convert to RW
 	"READONLY",
-	"RELEASE_DENTRY"
+	"RELEASE_DENTRY",
+	"TRUNCATE_PAGES"
 };
 #endif
 
@@ -517,6 +519,8 @@
 		vote_type = RELEASE_CACHE;
 	else if (flags & FLAG_FILE_UPDATE_OIN) {
 		vote_type = UPDATE_OIN_INODE;
+	} else if (flags & FLAG_TRUNCATE_PAGES) {
+		vote_type = TRUNCATE_PAGES;
 	} else if (lockres->master_node_num == osb->node_num) {
 		if (flags & FLAG_CHANGE_MASTER)
 			vote_type = CHANGE_MASTER;
@@ -535,7 +539,8 @@
 			vote_type = REMASTER_REQUESTOR;
 	}
 	
-	if (inode == NULL && vote_type != DELETE_RELEASE)
+	if (inode == NULL && 
+		(vote_type != DELETE_RELEASE && vote_type != TRUNCATE_PAGES))
 		vote_type = INVALID_REQUEST;
 done:
 	LOG_EXIT_STATUS(vote_type);
@@ -759,8 +764,16 @@
 	if ((flags & FLAG_FILE_DELETE) && (flags & FLAG_RELEASE_LOCK))
 		inode = NULL;
 	else {
-		inode = ocfs_iget(osb,
-				  lock_id >> osb->sb->s_blocksize_bits);
+		if(flags & FLAG_TRUNCATE_PAGES) {
+			inode = ocfs_ilookup(osb, 
+					lock_id >> osb->sb->s_blocksize_bits);
+			if(!inode)
+				goto no_inode_ok;
+		} else {
+			inode = ocfs_iget(osb, 
+					lock_id >> osb->sb->s_blocksize_bits);
+		}
+
 		if (!inode) {
 			status = -EFAIL;
 			LOG_ERROR_ARGS("Could not find inode: lock_id = %llu, "
@@ -799,6 +812,7 @@
 	lockflags = (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
 	       	? 0 : OCFS_BH_CACHED;
 
+no_inode_ok:
 	/* fail here if no inode, unless this is a delete/rename release */
 	vote_type = get_process_vote_action(osb, lockres, node_num, flags, 
 					    status, &master_alive, inode);
@@ -1187,6 +1201,16 @@
 				open_handle = 1;
 			break;
 
+		case TRUNCATE_PAGES:
+			LOG_TRACE_STR("TRUNCATE_PAGES");
+			if(inode) {
+				ocfs_truncate_inode_pages(inode, 0);
+				ocfs_extent_map_destroy(&OCFS_I(inode)->ip_ext_map);
+				ocfs_extent_map_init(&OCFS_I(inode)->ip_ext_map);
+			}
+			vote_response = FLAG_VOTE_NODE;
+			break;
+
 		case INVALID_REQUEST:
 		default:
 			LOG_TRACE_STR("INVALID_REQUEST");
@@ -1194,7 +1218,8 @@
 			break;
 	}
 
-	up_write(&OCFS_I(inode)->ip_io_sem);
+	if(have_io_sem && inode)
+		up_write(&OCFS_I(inode)->ip_io_sem);
 	have_io_sem = 0;
 
 	if (inode && (flags & (FLAG_FILE_EXTEND|FLAG_FILE_TRUNCATE)) && 

Modified: trunk/src/ocfs.h
===================================================================
--- trunk/src/ocfs.h	2004-07-02 00:22:15 UTC (rev 1230)
+++ trunk/src/ocfs.h	2004-07-02 01:13:39 UTC (rev 1231)
@@ -89,7 +89,8 @@
 	REMASTER_REQUESTOR,   // remaster lock to requestor
 	DROP_READONLY,	      // RO cachelock needs to convert to RW
 	READONLY,	      // a RW or RO cachelock, requesting RO
-	RELEASE_DENTRY
+	RELEASE_DENTRY,
+	TRUNCATE_PAGES		// truncate page caches of a file
 };
 
 #define  OCFS_MAX_DLM_PKT_SIZE			256
@@ -135,7 +136,7 @@
 #define  FLAG_DIR                 0x00001000
 #define  FLAG_REMASTER            0x00002000
 #define  FLAG_FAST_PATH_LOCK      0x00004000
-#define  FLAG_FILE_UNUSED5        0x00008000
+#define  FLAG_TRUNCATE_PAGES      0x00008000
 #define  FLAG_FILE_UNUSED6        0x00010000
 //#define  FLAG_DEL_NAME            0x00020000
 //#define  FLAG_DEL_INODE           0x00040000



More information about the Ocfs2-commits mailing list