[Ocfs2-tools-devel] [PATCH 07/11] libocfs2: Add aio read support
Goldwyn Rodrigues
rgoldwyn at gmail.com
Fri Sep 23 09:49:44 PDT 2011
On Thu, Sep 22, 2011 at 9:04 PM, Sunil Mushran <sunil.mushran at oracle.com> wrote:
> Added public function io_aio_read_blocks() that performs aio reads on the
> provided set of blocks. It is io cache friendly. One use case is to use this
> to warm the cache, which has proven to be very useful in fsck.
>
> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
> ---
> debugfs.ocfs2/Makefile | 2 +-
> extras/Makefile | 2 +-
> fsck.ocfs2/Makefile | 2 +-
> fswreck/Makefile | 2 +-
> include/ocfs2/ocfs2.h | 9 ++++
> libocfs2/unix_io.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
> listuuid/Makefile | 2 +-
> mkfs.ocfs2/Makefile | 2 +-
> mount.ocfs2/Makefile | 2 +-
> mounted.ocfs2/Makefile | 2 +-
> o2cb_ctl/Makefile | 2 +-
> o2image/Makefile | 2 +-
> o2info/Makefile | 2 +-
> ocfs2_hb_ctl/Makefile | 2 +-
> tunefs.ocfs2/Makefile | 2 +-
> 15 files changed, 136 insertions(+), 13 deletions(-)
>
> diff --git a/debugfs.ocfs2/Makefile b/debugfs.ocfs2/Makefile
> index 556d284..d2ce1a9 100644
> --- a/debugfs.ocfs2/Makefile
> +++ b/debugfs.ocfs2/Makefile
> @@ -31,7 +31,7 @@ HFILES = \
>
> OBJS = $(subst .c,.o,$(CFILES))
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBO2CB_LIBS = -L$(TOPDIR)/libo2cb -lo2cb
>
> MANS = debugfs.ocfs2.8
> diff --git a/extras/Makefile b/extras/Makefile
> index 2d30004..7f90404 100644
> --- a/extras/Makefile
> +++ b/extras/Makefile
> @@ -33,7 +33,7 @@ CHECK_METAECC_OBJS = $(subst .c,.o,$(CHECK_METAECC_CFILES))
> RESIZE_SLOTMAP_OBJS = $(subst .c,.o,$(RESIZE_SLOTMAP_CFILES))
>
> LIBOCFS2 = ../libocfs2/libocfs2.a
> -EXTRAS_LIBS = $(LIBOCFS2) $(COM_ERR_LIBS)
> +EXTRAS_LIBS = $(LIBOCFS2) $(COM_ERR_LIBS) -laio
>
> find_hardlinks: $(FIND_HARDLINKS_OBJS) $(LIBOCFS2)
> $(LINK) $(EXTRAS_LIBS)
> diff --git a/fsck.ocfs2/Makefile b/fsck.ocfs2/Makefile
> index f806ba6..36f9dbc 100644
> --- a/fsck.ocfs2/Makefile
> +++ b/fsck.ocfs2/Makefile
> @@ -8,7 +8,7 @@ SBIN_PROGRAMS = fsck.ocfs2
> DEFINES += -DVERSION=\"$(VERSION)\"
>
> INCLUDES = -I$(TOPDIR)/include -Iinclude
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> LIBO2DLM_DEPS = $(TOPDIR)/libo2dlm/libo2dlm.a
> diff --git a/fswreck/Makefile b/fswreck/Makefile
> index b1ee546..53c2dc0 100644
> --- a/fswreck/Makefile
> +++ b/fswreck/Makefile
> @@ -35,7 +35,7 @@ DIST_RULES = dist-subdircreate
>
> OBJS = $(subst .c,.o,$(CFILES))
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h
> index 9fcb2ce..c44b764 100644
> --- a/include/ocfs2/ocfs2.h
> +++ b/include/ocfs2/ocfs2.h
> @@ -354,6 +354,15 @@ errcode_t io_share_cache(io_channel *from, io_channel *to);
> errcode_t io_mlock_cache(io_channel *channel);
> void io_destroy_cache(io_channel *channel);
>
> +
> +struct io_aio_unit {
> + int64_t aio_blkno;
> + char *aio_buf;
> +};
> +
> +errcode_t io_aio_read_blocks(io_channel *channel, struct io_aio_unit *aios,
> + int count);
> +
> errcode_t ocfs2_read_super(ocfs2_filesys *fs, uint64_t superblock, char *sb);
> /* Writes the main superblock at OCFS2_SUPER_BLOCK_BLKNO */
> errcode_t ocfs2_write_primary_super(ocfs2_filesys *fs);
> diff --git a/libocfs2/unix_io.c b/libocfs2/unix_io.c
> index a805ffc..369fc0f 100644
> --- a/libocfs2/unix_io.c
> +++ b/libocfs2/unix_io.c
> @@ -42,6 +42,7 @@
> #include <sys/resource.h>
> #include <sys/utsname.h>
> #include <linux/fs.h>
> +#include <libaio.h>
> #endif
> #include <sys/mman.h>
> #include <inttypes.h>
> @@ -119,6 +120,66 @@ static inline int one_meg_of_blocks(io_channel *channel)
> return count / channel->io_blksize;
> }
>
> +static errcode_t unix_aio_read_blocks(io_channel *channel,
> + struct io_aio_unit *aios, int count)
> +{
> + int i;
> + int ret;
> + io_context_t io_ctx;
> + struct iocb *iocb = NULL, **iocbs = NULL;
> + struct io_event *events = NULL;
> + int64_t offset;
> + int submitted, completed = 0;
> +
> + ret = OCFS2_ET_NO_MEMORY;
> + iocb = malloc((sizeof(struct iocb) * count));
> + iocbs = malloc((sizeof(struct iocb *) * count));
> + events = malloc((sizeof(struct io_event) * count));
> + if (!iocb || !iocbs || !events)
> + goto out;
> +
> + memset(&io_ctx, 0, sizeof(io_ctx));
> + ret = io_queue_init(count, &io_ctx);
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < count; ++i) {
> + offset = aios[i].aio_blkno * channel->io_blksize;
> + io_prep_pread(&(iocb[i]), channel->io_fd,
> + aios[i].aio_buf,
> + channel->io_blksize, offset);
> + iocbs[i] = &iocb[i];
> + }
> +
> +resubmit:
> + ret = io_submit(io_ctx, count - completed, &iocbs[completed]);
> + if (!ret && (count - completed))
> + ret = OCFS2_ET_SHORT_READ;
> + if (ret < 0)
> + goto out;
> + submitted = ret;
> +
> + ret = io_getevents(io_ctx, submitted, submitted, events, NULL);
> + if (ret < 0)
> + goto out;
> +
> + completed += submitted;
> + if (completed < count)
> + goto resubmit;
You can replace resubmit with a while loop.
However, Are you using the full potential of asynchronous reads
though? This seems like a function for performing bulk I/O of
different blocks.
By collecting the results immediately, you are waiting for all
submitted I/O to complete, or blocking. How about breaking the whole
thing into two, ie submission and collection. Submitting the I/O when
you know what is going to be read, and collecting the events when you
actually need it.
Taking the example of inodes, you can io_submit inode blocks when you
read the inode_alloc file in pass 0, and io_getevents in pass 1 when
you actually need it.
> +
> +out:
> + if (ret >= 0)
> + ret = 0;
> + if (!ret)
> + channel->io_bytes_read += (count * channel->io_blksize);
> + free(iocb);
> + free(iocbs);
> + free(events);
> + io_queue_release(io_ctx);
> +
> + return ret;
> +}
> +
> static errcode_t unix_io_read_block(io_channel *channel, int64_t blkno,
> int count, char *data)
> {
> @@ -299,6 +360,49 @@ static struct io_cache_block *io_cache_pop_lru(struct io_cache *ic)
> }
>
> /*
> + * Unlike its sync counterpart, this function issues ios even for cached blocks.
> + */
> +static errcode_t io_cache_aio_read_blocks(io_channel *channel,
> + struct io_aio_unit *aios,
> + int count, bool nocache)
> +{
> + struct io_cache *ic = channel->io_cache;
> + struct io_cache_block *icb;
> + errcode_t ret = 0;
> + int i;
> +
> + /*
> + * Read all blocks. We could extend this to not issue ios for already
> + * cached blocks. But is it worth the effort?
> + */
> + ret = unix_aio_read_blocks(channel, aios, count);
> + if (ret)
> + goto out;
> +
> + /* refresh cache */
> + for (i = 0; i < count; i++) {
> + icb = io_cache_lookup(ic, aios[i].aio_blkno);
> + if (!icb) {
> + if (nocache)
> + continue;
> + icb = io_cache_pop_lru(ic);
> + icb->icb_blkno = aios[i].aio_blkno;
> + io_cache_insert(ic, icb);
> + }
> +
> + memcpy(icb->icb_buf, aios[i].aio_buf, channel->io_blksize);
> +
> + if (nocache)
> + io_cache_unsee(ic, icb);
> + else
> + io_cache_seen(ic, icb);
> + }
> +
> +out:
> + return ret;
> +}
> +
> +/*
> * This relies on the fact that our cache is always up to date. If a
> * block is in the cache, the same thing is on disk. Even if we re-read
> * the disk block, we don't need to update the cache. This allows us
> @@ -822,6 +926,16 @@ void io_set_nocache(io_channel *channel, bool nocache)
> channel->io_nocache = nocache;
> }
>
> +errcode_t io_aio_read_blocks(io_channel *channel, struct io_aio_unit *aios,
> + int count)
> +{
> + if (channel->io_cache)
> + return io_cache_aio_read_blocks(channel, aios, count,
> + channel->io_nocache);
> + else
> + return unix_aio_read_blocks(channel, aios, count);
> +}
> +
> errcode_t io_read_block(io_channel *channel, int64_t blkno, int count,
> char *data)
> {
> diff --git a/listuuid/Makefile b/listuuid/Makefile
> index 784e804..cda4232 100644
> --- a/listuuid/Makefile
> +++ b/listuuid/Makefile
> @@ -4,7 +4,7 @@ include $(TOPDIR)/Preamble.make
>
> INCLUDES = -I$(TOPDIR)/include
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> diff --git a/mkfs.ocfs2/Makefile b/mkfs.ocfs2/Makefile
> index b80b8b7..179b145 100644
> --- a/mkfs.ocfs2/Makefile
> +++ b/mkfs.ocfs2/Makefile
> @@ -5,7 +5,7 @@ include $(TOPDIR)/Preamble.make
> sbindir = $(root_sbindir)
> SBIN_PROGRAMS = mkfs.ocfs2
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2CB_LIBS = -L$(TOPDIR)/libo2cb -lo2cb
> diff --git a/mount.ocfs2/Makefile b/mount.ocfs2/Makefile
> index 1f0e688..7b43bd0 100644
> --- a/mount.ocfs2/Makefile
> +++ b/mount.ocfs2/Makefile
> @@ -6,7 +6,7 @@ sbindir = $(root_sbindir)
> SBIN_PROGRAMS = mount.ocfs2
>
> INCLUDES = -I$(TOPDIR)/include
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> LIBO2DLM_DEPS = $(TOPDIR)/libo2dlm/libo2dlm.a
> diff --git a/mounted.ocfs2/Makefile b/mounted.ocfs2/Makefile
> index e63414a..039cf48 100644
> --- a/mounted.ocfs2/Makefile
> +++ b/mounted.ocfs2/Makefile
> @@ -2,7 +2,7 @@ TOPDIR = ..
>
> include $(TOPDIR)/Preamble.make
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> diff --git a/o2cb_ctl/Makefile b/o2cb_ctl/Makefile
> index 0db99c6..9c73d45 100644
> --- a/o2cb_ctl/Makefile
> +++ b/o2cb_ctl/Makefile
> @@ -10,7 +10,7 @@ INCLUDES = -I$(TOPDIR)/include
> LIBTOOLS_INTERNAL_LIBS = -L$(TOPDIR)/libtools-internal -ltools-internal
> LIBTOOLS_INTERNAL_DEPS = $(TOPDIR)/libtools-internal/libtools-internal.a
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2CB_LIBS = -L$(TOPDIR)/libo2cb -lo2cb
> diff --git a/o2image/Makefile b/o2image/Makefile
> index eed2e0d..491ceec 100644
> --- a/o2image/Makefile
> +++ b/o2image/Makefile
> @@ -7,7 +7,7 @@ WARNINGS = -Wall -Wstrict-prototypes -Wno-format -Wmissing-prototypes \
>
> CFLAGS += $(WARNINGS)
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm
> diff --git a/o2info/Makefile b/o2info/Makefile
> index 579223a..25e4b3d 100644
> --- a/o2info/Makefile
> +++ b/o2info/Makefile
> @@ -10,7 +10,7 @@ CFLAGS += $(WARNINGS)
> LIBTOOLS_INTERNAL_LIBS = -L$(TOPDIR)/libtools-internal -ltools-internal
> LIBTOOLS_INTERNAL_DEPS = $(TOPDIR)/libtools-internal/libtools-internal.a
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> BIN_PROGRAMS = o2info
> diff --git a/ocfs2_hb_ctl/Makefile b/ocfs2_hb_ctl/Makefile
> index 0e1f583..e52d422 100644
> --- a/ocfs2_hb_ctl/Makefile
> +++ b/ocfs2_hb_ctl/Makefile
> @@ -6,7 +6,7 @@ sbindir = $(root_sbindir)
> SBIN_PROGRAMS = ocfs2_hb_ctl
>
> INCLUDES = -I$(TOPDIR)/include
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> LIBO2DLM_DEPS = $(TOPDIR)/libo2dlm/libo2dlm.a
> diff --git a/tunefs.ocfs2/Makefile b/tunefs.ocfs2/Makefile
> index 3847d0f..81cf108 100644
> --- a/tunefs.ocfs2/Makefile
> +++ b/tunefs.ocfs2/Makefile
> @@ -5,7 +5,7 @@ include $(TOPDIR)/Preamble.make
> LIBTOOLS_INTERNAL_LIBS = -L$(TOPDIR)/libtools-internal -ltools-internal
> LIBTOOLS_INTERNAL_DEPS = $(TOPDIR)/libtools-internal/libtools-internal.a
>
> -LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
> +LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2 -laio
> LIBOCFS2_DEPS = $(TOPDIR)/libocfs2/libocfs2.a
>
> LIBO2DLM_LIBS = -L$(TOPDIR)/libo2dlm -lo2dlm $(DL_LIBS)
> --
> 1.7.4.1
>
>
> _______________________________________________
> Ocfs2-tools-devel mailing list
> Ocfs2-tools-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/ocfs2-tools-devel
>
--
Goldwyn
More information about the Ocfs2-tools-devel
mailing list