[Ocfs2-devel] [PATCH 2/8] Ocfs2: Add basic framework and source files for extent moving.
Tristan Ye
tristan.ye at oracle.com
Thu Jan 13 02:20:14 PST 2011
Patch tries to do following things:
1. Adding new files move_extents.[c|h] and fill it with basic framework.
2. Adding handler for new ioctl OCFS2_IOC_MOVE_EXT.
Signed-off-by: Tristan Ye <tristan.ye at oracle.com>
---
fs/ocfs2/Makefile | 1 +
fs/ocfs2/cluster/masklog.h | 1 +
fs/ocfs2/ioctl.c | 5 +
fs/ocfs2/move_extents.c | 240 ++++++++++++++++++++++++++++++++++++++++++++
fs/ocfs2/move_extents.h | 22 ++++
5 files changed, 269 insertions(+), 0 deletions(-)
create mode 100644 fs/ocfs2/move_extents.c
create mode 100644 fs/ocfs2/move_extents.h
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 07d9fd8..5166f2d 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -30,6 +30,7 @@ ocfs2-objs := \
namei.o \
refcounttree.o \
reservations.o \
+ move_extents.o \
resize.o \
slot_map.o \
suballoc.o \
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f..36f1cb2 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -121,6 +121,7 @@
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
+#define ML_MOVE_EXT 0x0000002000000000ULL /* move extents */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7a48681..b8fe7a7 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -23,6 +23,7 @@
#include "ioctl.h"
#include "resize.h"
#include "refcounttree.h"
+#include "move_extents.h"
#include <linux/ext2_fs.h>
@@ -523,6 +524,8 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EFAULT;
return ocfs2_info_handle(inode, &info, 0);
+ case OCFS2_IOC_MOVE_EXT:
+ return ocfs2_ioctl_move_extents(filp, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -565,6 +568,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return -EFAULT;
return ocfs2_info_handle(inode, &info, 1);
+ case OCFS2_IOC_MOVE_EXT:
+ break;
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
new file mode 100644
index 0000000..9e9b9fd
--- /dev/null
+++ b/fs/ocfs2/move_extents.c
@@ -0,0 +1,240 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * move_extents.c
+ *
+ * Copyright (C) 2010 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <linux/swap.h>
+
+#define MLOG_MASK_PREFIX ML_MOVE_EXT
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "ocfs2_ioctl.h"
+
+#include "alloc.h"
+#include "aops.h"
+#include "dlmglue.h"
+#include "extent_map.h"
+#include "inode.h"
+#include "journal.h"
+#include "suballoc.h"
+#include "uptodate.h"
+#include "super.h"
+#include "dir.h"
+#include "buffer_head_io.h"
+#include "sysfile.h"
+#include "suballoc.h"
+#include "move_extents.h"
+
+struct ocfs2_move_extents_context {
+ struct inode *inode;
+ struct file *file;
+ int auto_defrag;
+ int credits;
+ u32 new_phys_cpos;
+ u32 clusters_moved;
+ struct ocfs2_move_extents *range;
+ struct ocfs2_extent_tree et;
+ struct ocfs2_alloc_context *meta_ac;
+ struct ocfs2_alloc_context *data_ac;
+ struct ocfs2_cached_dealloc_ctxt dealloc;
+};
+
+static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
+{
+ int status;
+ handle_t *handle;
+ struct inode *inode = context->inode;
+ struct ocfs2_dinode *di;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ if (!inode)
+ return -ENOENT;
+
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return -EROFS;
+
+ mutex_lock(&inode->i_mutex);
+
+ /*
+ * This prevents concurrent writes from other nodes
+ */
+ status = ocfs2_rw_lock(inode, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_rw_unlock;
+ }
+
+ /*
+ * rememer ip_xattr_sem also needs to be held if necessary
+ */
+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
+ /*
+ * real extents moving codes will be fulfilled later.
+ *
+ * status = __ocfs2_move_extents_range(di_bh, context);
+ */
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ if (status) {
+ mlog_errno(status);
+ goto out_inode_unlock;
+ }
+
+ /*
+ * We update ctime for these changes
+ */
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_inode_unlock;
+ }
+
+ status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status) {
+ mlog_errno(status);
+ goto out_commit;
+ }
+
+ di = (struct ocfs2_dinode *)di_bh->b_data;
+ inode->i_ctime = CURRENT_TIME;
+ di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+
+ ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+ ocfs2_commit_trans(osb, handle);
+
+out_inode_unlock:
+ brelse(di_bh);
+ ocfs2_inode_unlock(inode, 1);
+out_rw_unlock:
+ ocfs2_rw_unlock(inode, 1);
+out:
+ mutex_unlock(&inode->i_mutex);
+
+ return status;
+}
+
+int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
+{
+ int status;
+
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct ocfs2_move_extents range;
+ struct ocfs2_move_extents_context *context = NULL;
+
+ status = mnt_want_write(filp->f_path.mnt);
+ if (status)
+ return status;
+
+ status = -EINVAL;
+
+ if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
+ goto out;
+
+ if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
+ status = -EPERM;
+ goto out;
+ }
+
+ context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
+ if (!context) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out;
+ }
+
+ context->inode = inode;
+ context->file = filp;
+
+ if (!argp) {
+ memset(&range, 0, sizeof(range));
+ range.me_len = (u64)-1;
+ range.me_flags |= OCFS2_MOVE_EXT_FL_AUTO_DEFRAG;
+ context->auto_defrag = 1;
+ } else {
+ if (copy_from_user(&range, (struct ocfs2_move_extents *)argp,
+ sizeof(range))) {
+ status = -EFAULT;
+ goto out;
+ }
+ }
+
+ if (range.me_start > i_size_read(inode))
+ goto out;
+
+ if (range.me_start + range.me_len > i_size_read(inode))
+ range.me_len = i_size_read(inode) - range.me_start;
+
+ context->range = ⦥
+
+ if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
+ context->auto_defrag = 1;
+ if (!range.me_thresh)
+ /*
+ * ok, the default theshold for the defragmentation
+ * is 1M, since our maximum clustersize was 1M also.
+ * any thought?
+ */
+ range.me_thresh = 1024 * 1024;
+ } else {
+ /*
+ * first best-effort attempt to validate and adjust the goal
+ * (physical address in block), while it can't guarantee later
+ * operation can succeed all the time since global_bitmap may
+ * change a bit over time.
+ */
+ /*
+ * real validation codes will be fulfilled later.
+ *
+ * status = ocfs2_validate_and_adjust_move_goal(inode, &range);
+ * if (status)
+ * goto out;
+ */
+ }
+
+ status = ocfs2_move_extents(context);
+ if (status)
+ mlog_errno(status);
+out:
+ /*
+ * movement/defragmentation may end up being partially completed,
+ * that's the reason why we need to return userspace the finished
+ * length and new_offset even if failure happens somewhere.
+ */
+ if (argp) {
+ if (copy_to_user((struct ocfs2_move_extents *)argp, &range,
+ sizeof(range)))
+ status = -EFAULT;
+ }
+
+ kfree(context);
+
+ mnt_drop_write(filp->f_path.mnt);
+
+ return status;
+}
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h
new file mode 100644
index 0000000..d9f0f29
--- /dev/null
+++ b/fs/ocfs2/move_extents.h
@@ -0,0 +1,22 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * move_extents.h
+ *
+ * Copyright (C) 2010 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef OCFS2_MOVE_EXTENTS_H
+#define OCFS2_MOVE_EXTENTS_H
+
+int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp);
+
+#endif /* OCFS2_MOVE_EXTENTS_H */
--
1.5.5
More information about the Ocfs2-devel
mailing list