[Ocfs2-tools-devel] [PATCH 05/13] libocfs2: Provide _nocache() versions of the I/O functions.

Joel Becker joel.becker at oracle.com
Tue May 26 16:03:25 PDT 2009


Some I/O doesn't want to pollute the cache.  The _nocache() I/O
functions will not add blocks to the cache.  If the blocks are already
in the cache, they will make sure they are not broken.  For example, a
write needs to update an already existing cache block so that the cache
doesn't have stale data.  The blocks are not removed from the cache -
they're already there, why make a reader go find them?  They get moved
to the end of the LRU so that they get stolen first.

Signed-off-by: Joel Becker <joel.becker at oracle.com>
---
 include/ocfs2/ocfs2.h |   20 ++++++++++--
 libocfs2/openfs.c     |   26 ++++++++++++++--
 libocfs2/unix_io.c    |   79 ++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 109 insertions(+), 16 deletions(-)

diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
index 4dbbf43..3f2ca54 100644
--- a/include/ocfs2/ocfs2.h
+++ b/include/ocfs2/ocfs2.h
@@ -222,11 +222,22 @@ errcode_t io_set_blksize(io_channel *channel, int blksize);
 int io_get_blksize(io_channel *channel);
 int io_get_fd(io_channel *channel);
 
-/* use ocfs2_read_blocks if your application might handle o2image file */
+/*
+ * Raw I/O functions.  They will use the I/O cache if available.  The
+ * _nocache version will not add a block to the cache, but if the block is
+ * already in the cache it will be moved to the end of the LRU and kept
+ * in a good state.
+ *
+ * Use ocfs2_read_blocks() if your application might handle o2image file.
+ */
 errcode_t io_read_block(io_channel *channel, int64_t blkno, int count,
 			char *data);
+errcode_t io_read_block_nocache(io_channel *channel, int64_t blkno, int count,
+			char *data);
 errcode_t io_write_block(io_channel *channel, int64_t blkno, int count,
 			 const char *data);
+errcode_t io_write_block_nocache(io_channel *channel, int64_t blkno, int count,
+			 const char *data);
 errcode_t io_init_cache(io_channel *channel, size_t nr_blocks);
 errcode_t io_init_cache_size(io_channel *channel, size_t bytes);
 void io_destroy_cache(io_channel *channel);
@@ -238,11 +249,14 @@ errcode_t ocfs2_write_primary_super(ocfs2_filesys *fs);
 errcode_t ocfs2_write_super(ocfs2_filesys *fs);
 
 /*
- * ocfs2_read_blocks is a wraper around io_read_block. If device is an image-
- * file it translates disk offset to image offset
+ * ocfs2_read_blocks() is a wraper around io_read_block. If device is an
+ * image file it translates disk offset to image offset.
+ * ocfs2_read_blocks_nocache() calls io_read_block_nocache().
  */
 errcode_t ocfs2_read_blocks(ocfs2_filesys *fs, int64_t blkno, int count,
 			    char *data);
+errcode_t ocfs2_read_blocks_nocache(ocfs2_filesys *fs, int64_t blkno, int count,
+				    char *data);
 int ocfs2_mount_local(ocfs2_filesys *fs);
 errcode_t ocfs2_open(const char *name, int flags,
 		     unsigned int superblock, unsigned int blksize,
diff --git a/libocfs2/openfs.c b/libocfs2/openfs.c
index f10d0e8..42c3b60 100644
--- a/libocfs2/openfs.c
+++ b/libocfs2/openfs.c
@@ -31,6 +31,7 @@
 #include <string.h>
 #include <inttypes.h>
 #include <errno.h>
+#include <stdbool.h>
 
 /* I hate glibc and gcc */
 #ifndef ULLONG_MAX
@@ -48,10 +49,11 @@
  * function. At this point this function returns EIO if image file has any
  * holes
  */
-errcode_t ocfs2_read_blocks(ocfs2_filesys *fs, int64_t blkno,
-			    int count, char *data)
+static errcode_t __ocfs2_read_blocks(ocfs2_filesys *fs, int64_t blkno,
+				     int count, char *data, bool nocache)
 {
 	int i;
+	errcode_t err;
 
 	if (fs->fs_flags & OCFS2_FLAG_IMAGE_FILE) {
 		/*
@@ -66,7 +68,25 @@ errcode_t ocfs2_read_blocks(ocfs2_filesys *fs, int64_t blkno,
 		/* translate the block number */
 		blkno = ocfs2_image_get_blockno(fs, blkno);
 	}
-	return io_read_block(fs->fs_io, blkno, count, data);
+
+	if (nocache)
+		err = io_read_block_nocache(fs->fs_io, blkno, count, data);
+	else
+		err = io_read_block(fs->fs_io, blkno, count, data);
+
+	return err;
+}
+
+errcode_t ocfs2_read_blocks_nocache(ocfs2_filesys *fs, int64_t blkno,
+				    int count, char *data)
+{
+	return __ocfs2_read_blocks(fs, blkno, count, data, true);
+}
+
+errcode_t ocfs2_read_blocks(ocfs2_filesys *fs, int64_t blkno,
+			    int count, char *data)
+{
+	return __ocfs2_read_blocks(fs, blkno, count, data, false);
 }
 
 static errcode_t ocfs2_validate_ocfs1_header(ocfs2_filesys *fs)
diff --git a/libocfs2/unix_io.c b/libocfs2/unix_io.c
index 4c6b759..50fa480 100644
--- a/libocfs2/unix_io.c
+++ b/libocfs2/unix_io.c
@@ -42,6 +42,7 @@
 #include <sys/utsname.h>
 #endif
 #include <inttypes.h>
+#include <stdbool.h>
 
 #include "ocfs2/kernel-rbtree.h"
 
@@ -239,6 +240,17 @@ static void io_cache_seen(struct io_cache *ic, struct io_cache_block *icb)
 	list_add_tail(&icb->icb_list, &ic->ic_lru);
 }
 
+static void io_cache_unsee(struct io_cache *ic, struct io_cache_block *icb)
+{
+	/*
+	 * Move to the end of the LRU.  There's no point in removing an
+	 * "unseen" buffer from the cache.  It's valid, but we want the
+	 * next I/O to steal it.
+	 */
+	list_del(&icb->icb_list);
+	list_add(&icb->icb_list, &ic->ic_lru);
+}
+
 static void io_cache_disconnect(struct io_cache *ic,
 				struct io_cache_block *icb)
 {
@@ -269,9 +281,13 @@ static struct io_cache_block *io_cache_pop_lru(struct io_cache *ic)
  * the disk block, we don't need to update the cache.  This allows us
  * to look for optimal I/O sizes; it's better to call one read 1MB of
  * half-cached blocks than to read every other block.
+ *
+ * If the caller specifies "nocache", we still want to give them anything
+ * we found in the cache, but we want cached blocks moved to the front
+ * of the LRU.  That way they get stolen first.
  */
 static errcode_t io_cache_read_blocks(io_channel *channel, int64_t blkno,
-				      int count, char *data)
+				      int count, char *data, bool nocache)
 {
 	int i, good_blocks;
 	errcode_t ret = 0;
@@ -312,6 +328,9 @@ static errcode_t io_cache_read_blocks(io_channel *channel, int64_t blkno,
 			assert(icb);
 			memcpy(data, icb->icb_buf, channel->io_blksize);
 		} else if (!icb) {
+			if (nocache)
+				continue;
+
 			/* Steal the LRU buffer */
 			icb = io_cache_pop_lru(ic);
 			icb->icb_blkno = blkno + i;
@@ -331,7 +350,10 @@ static errcode_t io_cache_read_blocks(io_channel *channel, int64_t blkno,
 		 * buffer seen.
 		 */
 
-		io_cache_seen(ic, icb);
+		if (nocache)
+			io_cache_unsee(ic, icb);
+		else
+			io_cache_seen(ic, icb);
 	}
 
 out:
@@ -339,7 +361,7 @@ out:
 }
 
 static errcode_t io_cache_read_block(io_channel *channel, int64_t blkno,
-				     int count, char *data)
+				     int count, char *data, bool nocache)
 
 {
 	int todo = one_meg_of_blocks(channel);
@@ -352,7 +374,8 @@ static errcode_t io_cache_read_block(io_channel *channel, int64_t blkno,
 	while (count) {
 		if (todo > count)
 			todo = count;
-		ret = io_cache_read_blocks(channel, blkno, todo, data);
+		ret = io_cache_read_blocks(channel, blkno, todo, data,
+					   nocache);
 		if (ret)
 			break;
 
@@ -370,7 +393,8 @@ static errcode_t io_cache_read_block(io_channel *channel, int64_t blkno,
  * a whole stream and update the cache as needed.
  */
 static errcode_t io_cache_write_blocks(io_channel *channel, int64_t blkno,
-				       int count, const char *data)
+				       int count, const char *data,
+				       bool nocache)
 {
 	int i, completed = 0;
 	errcode_t ret;
@@ -384,10 +408,18 @@ static errcode_t io_cache_write_blocks(io_channel *channel, int64_t blkno,
 	/*
 	 * Now we sync up the cache with the data buffer.  We have
 	 * to sync up I/O that completed, even if the entire I/O did not.
+	 *
+	 * In the nocache case, we want to skip blocks that weren't in the
+	 * cache, but we want to update blocks that where.  Even though
+	 * the caller specified "don't cache this", it's already in the
+	 * cache.  We don't want stale data.
 	 */
 	for (i = 0; i < completed; i++, data += channel->io_blksize) {
 		icb = io_cache_lookup(ic, blkno + i);
 		if (!icb) {
+			if (nocache)
+				continue;
+
 			/*
 			 * Steal the LRU buffer.  We can't error here, so
 			 * we can safely insert it before we copy the data.
@@ -398,14 +430,18 @@ static errcode_t io_cache_write_blocks(io_channel *channel, int64_t blkno,
 		}
 
 		memcpy(icb->icb_buf, data, channel->io_blksize);
-		io_cache_seen(ic, icb);
+		if (nocache)
+			io_cache_unsee(ic, icb);
+		else
+			io_cache_seen(ic, icb);
 	}
 
 	return ret;
 }
 
 static errcode_t io_cache_write_block(io_channel *channel, int64_t blkno,
-				      int count, const char *data)
+				      int count, const char *data,
+				      bool nocache)
 {
 	/*
 	 * Unlike io_read_cache_block(), we're going to do all of the
@@ -413,7 +449,8 @@ static errcode_t io_cache_write_block(io_channel *channel, int64_t blkno,
 	 * io_cache_write_block() and io_cache_write_blocks() for
 	 * consistency.
 	 */
-	return io_cache_write_blocks(channel, blkno, count, data);
+	return io_cache_write_blocks(channel, blkno, count, data,
+				     nocache);
 }
 
 static void io_free_cache(struct io_cache *ic)
@@ -654,7 +691,18 @@ errcode_t io_read_block(io_channel *channel, int64_t blkno, int count,
 			char *data)
 {
 	if (channel->io_cache)
-		return io_cache_read_block(channel, blkno, count, data);
+		return io_cache_read_block(channel, blkno, count, data,
+					   false);
+	else
+		return unix_io_read_block(channel, blkno, count, data);
+}
+
+errcode_t io_read_block_nocache(io_channel *channel, int64_t blkno, int count,
+				char *data)
+{
+	if (channel->io_cache)
+		return io_cache_read_block(channel, blkno, count, data,
+					   true);
 	else
 		return unix_io_read_block(channel, blkno, count, data);
 }
@@ -663,7 +711,18 @@ errcode_t io_write_block(io_channel *channel, int64_t blkno, int count,
 			 const char *data)
 {
 	if (channel->io_cache)
-		return io_cache_write_block(channel, blkno, count, data);
+		return io_cache_write_block(channel, blkno, count, data,
+					    false);
+	else
+		return unix_io_write_block(channel, blkno, count, data);
+}
+
+errcode_t io_write_block_nocache(io_channel *channel, int64_t blkno, int count,
+				 const char *data)
+{
+	if (channel->io_cache)
+		return io_cache_write_block(channel, blkno, count, data,
+					    true);
 	else
 		return unix_io_write_block(channel, blkno, count, data);
 }
-- 
1.6.3




More information about the Ocfs2-tools-devel mailing list