[Ocfs2-tools-devel] [PATCH 07/11] libocfs2: Add iovec read support

Sunil Mushran sunil.mushran at oracle.com
Fri Sep 30 12:22:17 PDT 2011


Added public function io_vec_read_blocks() that performs vectored reads on the
provided set of blocks. It is io cache friendly. One use case is to use this
to warm the cache, which has proven to be very useful in fsck.

Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
---
 debugfs.ocfs2/Makefile |    2 +-
 extras/Makefile        |    2 +-
 fsck.ocfs2/Makefile    |    2 +-
 fswreck/Makefile       |    2 +-
 include/ocfs2/ocfs2.h  |   10 ++++
 libocfs2/unix_io.c     |  122 ++++++++++++++++++++++++++++++++++++++++++++++++
 listuuid/Makefile      |    2 +-
 mkfs.ocfs2/Makefile    |    2 +-
 mount.ocfs2/Makefile   |    2 +-
 mounted.ocfs2/Makefile |    2 +-
 o2cb_ctl/Makefile      |    2 +-
 o2image/Makefile       |    2 +-
 o2info/Makefile        |    2 +-
 ocfs2_hb_ctl/Makefile  |    2 +-
 tunefs.ocfs2/Makefile  |    2 +-
 15 files changed, 145 insertions(+), 13 deletions(-)

diff --git a/debugfs.ocfs2/Makefile b/debugfs.ocfs2/Makefile
index 556d284..d2ce1a9 100644
--- a/debugfs.ocfs2/Makefile
+++ b/debugfs.ocfs2/Makefile
@@ -31,7 +31,7 @@ HFILES =				\
 
 OBJS = $(subst .c,.o,$(CFILES))
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBO2CB_LIBS = -L$(TOPDIR)/libo2cb -lo2cb
 
 MANS = debugfs.ocfs2.8
diff --git a/extras/Makefile b/extras/Makefile
index 2d30004..7f90404 100644
--- a/extras/Makefile
+++ b/extras/Makefile
@@ -33,7 +33,7 @@ CHECK_METAECC_OBJS = $(subst .c,.o,$(CHECK_METAECC_CFILES))
 RESIZE_SLOTMAP_OBJS = $(subst .c,.o,$(RESIZE_SLOTMAP_CFILES))
 
 LIBOCFS2 = ../libocfs2/libocfs2.a
-EXTRAS_LIBS = $(LIBOCFS2) $(COM_ERR_LIBS)
+EXTRAS_LIBS = $(LIBOCFS2) $(COM_ERR_LIBS) -laio
 
 find_hardlinks: $(FIND_HARDLINKS_OBJS) $(LIBOCFS2)
 	$(LINK) $(EXTRAS_LIBS)
diff --git a/fsck.ocfs2/Makefile b/fsck.ocfs2/Makefile
index f806ba6..36f9dbc 100644
--- a/fsck.ocfs2/Makefile
+++ b/fsck.ocfs2/Makefile
@@ -8,7 +8,7 @@ SBIN_PROGRAMS = fsck.ocfs2
 DEFINES += -DVERSION=\"$(VERSION)\"
 
 INCLUDES = -I$(TOPDIR)/include -Iinclude
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
 LIBO2DLM_DEPS = $(TOPDIR)/libo2dlm/libo2dlm.a
diff --git a/fswreck/Makefile b/fswreck/Makefile
index b1ee546..53c2dc0 100644
--- a/fswreck/Makefile
+++ b/fswreck/Makefile
@@ -35,7 +35,7 @@ DIST_RULES = dist-subdircreate
 
 OBJS = $(subst .c,.o,$(CFILES))
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
index 9fcb2ce..3d1f6ed 100644
--- a/include/ocfs2/ocfs2.h
+++ b/include/ocfs2/ocfs2.h
@@ -354,6 +354,16 @@ errcode_t io_share_cache(io_channel *from, io_channel *to);
 errcode_t io_mlock_cache(io_channel *channel);
 void io_destroy_cache(io_channel *channel);
 
+
+struct io_vec_unit {
+	uint64_t	ivu_blkno;
+	char		*ivu_buf;
+	uint32_t	ivu_buflen;
+};
+
+errcode_t io_vec_read_blocks(io_channel *channel, struct io_vec_unit *ivus,
+			     int count);
+
 errcode_t ocfs2_read_super(ocfs2_filesys *fs, uint64_t superblock, char *sb);
 /* Writes the main superblock at OCFS2_SUPER_BLOCK_BLKNO */
 errcode_t ocfs2_write_primary_super(ocfs2_filesys *fs);
diff --git a/libocfs2/unix_io.c b/libocfs2/unix_io.c
index a805ffc..76636aa 100644
--- a/libocfs2/unix_io.c
+++ b/libocfs2/unix_io.c
@@ -42,6 +42,7 @@
 #include <sys/resource.h>
 #include <sys/utsname.h>
 #include <linux/fs.h>
+#include <libaio.h>
 #endif
 #include <sys/mman.h>
 #include <inttypes.h>
@@ -119,6 +120,65 @@ static inline int one_meg_of_blocks(io_channel *channel)
 	return count / channel->io_blksize;
 }
 
+static errcode_t unix_vec_read_blocks(io_channel *channel,
+				      struct io_vec_unit *ivus, int count)
+{
+	int i;
+	int ret;
+	io_context_t io_ctx;
+	struct iocb *iocb = NULL, **iocbs = NULL;
+	struct io_event *events = NULL;
+	int64_t offset;
+	int submitted, completed = 0;
+
+	ret = OCFS2_ET_NO_MEMORY;
+	iocb = malloc((sizeof(struct iocb) * count));
+	iocbs = malloc((sizeof(struct iocb *) * count));
+	events = malloc((sizeof(struct io_event) * count));
+	if (!iocb || !iocbs || !events)
+		goto out;
+
+	memset(&io_ctx, 0, sizeof(io_ctx));
+	ret = io_queue_init(count, &io_ctx);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < count; ++i) {
+		offset = ivus[i].ivu_blkno * channel->io_blksize;
+		io_prep_pread(&(iocb[i]), channel->io_fd, ivus[i].ivu_buf,
+			      ivus[i].ivu_buflen, offset);
+		iocbs[i] = &iocb[i];
+	}
+
+resubmit:
+	ret = io_submit(io_ctx, count - completed, &iocbs[completed]);
+	if (!ret && (count - completed))
+		ret = OCFS2_ET_SHORT_READ;
+	if (ret < 0)
+		goto out;
+	submitted = ret;
+
+	ret = io_getevents(io_ctx, submitted, submitted, events, NULL);
+	if (ret < 0)
+		goto out;
+
+	completed += submitted;
+	if (completed < count)
+		goto resubmit;
+
+out:
+	if (ret >= 0)
+		ret = 0;
+	if (!ret) 
+		channel->io_bytes_read += (count * channel->io_blksize);
+	free(iocb);
+	free(iocbs);
+	free(events);
+	io_queue_release(io_ctx);
+
+	return ret;
+}
+
 static errcode_t unix_io_read_block(io_channel *channel, int64_t blkno,
 				    int count, char *data)
 {
@@ -299,6 +359,58 @@ static struct io_cache_block *io_cache_pop_lru(struct io_cache *ic)
 }
 
 /*
+ * Unlike its sync counterpart, this function issues ios even for cached blocks.
+ */
+static errcode_t io_cache_vec_read_blocks(io_channel *channel,
+					  struct io_vec_unit *ivus,
+					  int count, bool nocache)
+{
+	struct io_cache *ic = channel->io_cache;
+	struct io_cache_block *icb;
+	errcode_t ret = 0;
+	int i, j, blksize = channel->io_blksize;
+	uint64_t blkno;
+	uint32_t numblks;
+	char *buf;
+
+	/*
+	 * Read all blocks. We could extend this to not issue ios for already
+	 * cached blocks. But is it worth the effort?
+	 */
+	ret = unix_vec_read_blocks(channel, ivus, count);
+	if (ret)
+		goto out;
+
+	/* refresh cache */
+	for (i = 0; i < count; i++) {
+		blkno = ivus[i].ivu_blkno;
+		numblks = ivus[i].ivu_buflen / blksize;
+		buf = ivus[i].ivu_buf;
+
+		for (j = 0; j < numblks; ++j, ++blkno, buf += blksize) {
+			icb = io_cache_lookup(ic, blkno);
+			if (!icb) {
+				if (nocache)
+					continue;
+				icb = io_cache_pop_lru(ic);
+				icb->icb_blkno = blkno;
+				io_cache_insert(ic, icb);
+			}
+
+			memcpy(icb->icb_buf, buf, blksize);
+
+			if (nocache)
+				io_cache_unsee(ic, icb);
+			else
+				io_cache_seen(ic, icb);
+		}
+	}
+
+out:
+	return ret;
+}
+
+/*
  * This relies on the fact that our cache is always up to date.  If a
  * block is in the cache, the same thing is on disk.  Even if we re-read
  * the disk block, we don't need to update the cache.  This allows us
@@ -822,6 +934,16 @@ void io_set_nocache(io_channel *channel, bool nocache)
 	channel->io_nocache = nocache;
 }
 
+errcode_t io_vec_read_blocks(io_channel *channel, struct io_vec_unit *ivus,
+			     int count)
+{
+	if (channel->io_cache)
+		return io_cache_vec_read_blocks(channel, ivus, count,
+						channel->io_nocache);
+	else
+		return unix_vec_read_blocks(channel, ivus, count);
+}
+
 errcode_t io_read_block(io_channel *channel, int64_t blkno, int count,
 			char *data)
 {
diff --git a/listuuid/Makefile b/listuuid/Makefile
index 784e804..cda4232 100644
--- a/listuuid/Makefile
+++ b/listuuid/Makefile
@@ -4,7 +4,7 @@ include $(TOPDIR)/Preamble.make
 
 INCLUDES = -I$(TOPDIR)/include
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
diff --git a/mkfs.ocfs2/Makefile b/mkfs.ocfs2/Makefile
index b80b8b7..179b145 100644
--- a/mkfs.ocfs2/Makefile
+++ b/mkfs.ocfs2/Makefile
@@ -5,7 +5,7 @@ include $(TOPDIR)/Preamble.make
 sbindir = $(root_sbindir)
 SBIN_PROGRAMS = mkfs.ocfs2
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2CB_LIBS = -L$(TOPDIR)/libo2cb -lo2cb
diff --git a/mount.ocfs2/Makefile b/mount.ocfs2/Makefile
index 1f0e688..7b43bd0 100644
--- a/mount.ocfs2/Makefile
+++ b/mount.ocfs2/Makefile
@@ -6,7 +6,7 @@ sbindir = $(root_sbindir)
 SBIN_PROGRAMS = mount.ocfs2
 
 INCLUDES = -I$(TOPDIR)/include
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
 LIBO2DLM_DEPS = $(TOPDIR)/libo2dlm/libo2dlm.a
diff --git a/mounted.ocfs2/Makefile b/mounted.ocfs2/Makefile
index e63414a..039cf48 100644
--- a/mounted.ocfs2/Makefile
+++ b/mounted.ocfs2/Makefile
@@ -2,7 +2,7 @@ TOPDIR = ..
 
 include $(TOPDIR)/Preamble.make
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
diff --git a/o2cb_ctl/Makefile b/o2cb_ctl/Makefile
index 0db99c6..9c73d45 100644
--- a/o2cb_ctl/Makefile
+++ b/o2cb_ctl/Makefile
@@ -10,7 +10,7 @@ INCLUDES = -I$(TOPDIR)/include
 LIBTOOLS_INTERNAL_LIBS = -L$(TOPDIR)/libtools-internal -ltools-internal
 LIBTOOLS_INTERNAL_DEPS = $(TOPDIR)/libtools-internal/libtools-internal.a
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2CB_LIBS  = -L$(TOPDIR)/libo2cb -lo2cb
diff --git a/o2image/Makefile b/o2image/Makefile
index eed2e0d..491ceec 100644
--- a/o2image/Makefile
+++ b/o2image/Makefile
@@ -7,7 +7,7 @@ WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
 
 CFLAGS += $(WARNINGS)
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm
diff --git a/o2info/Makefile b/o2info/Makefile
index 579223a..25e4b3d 100644
--- a/o2info/Makefile
+++ b/o2info/Makefile
@@ -10,7 +10,7 @@ CFLAGS += $(WARNINGS)
 LIBTOOLS_INTERNAL_LIBS = -L$(TOPDIR)/libtools-internal -ltools-internal
 LIBTOOLS_INTERNAL_DEPS = $(TOPDIR)/libtools-internal/libtools-internal.a
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 BIN_PROGRAMS = o2info
diff --git a/ocfs2_hb_ctl/Makefile b/ocfs2_hb_ctl/Makefile
index 0e1f583..e52d422 100644
--- a/ocfs2_hb_ctl/Makefile
+++ b/ocfs2_hb_ctl/Makefile
@@ -6,7 +6,7 @@ sbindir = $(root_sbindir)
 SBIN_PROGRAMS = ocfs2_hb_ctl
 
 INCLUDES = -I$(TOPDIR)/include
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
 LIBO2DLM_DEPS = $(TOPDIR)/libo2dlm/libo2dlm.a
diff --git a/tunefs.ocfs2/Makefile b/tunefs.ocfs2/Makefile
index 3847d0f..81cf108 100644
--- a/tunefs.ocfs2/Makefile
+++ b/tunefs.ocfs2/Makefile
@@ -5,7 +5,7 @@ include $(TOPDIR)/Preamble.make
 LIBTOOLS_INTERNAL_LIBS = -L$(TOPDIR)/libtools-internal -ltools-internal
 LIBTOOLS_INTERNAL_DEPS = $(TOPDIR)/libtools-internal/libtools-internal.a
 
-LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
 LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
 
 LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
-- 
1.7.4.1




More information about the Ocfs2-tools-devel mailing list