[Ocfs2-devel] [PATCH 2/8] Ocfs2: Add basic framework and source files for extent moving.
Tristan Ye
tristan.ye at oracle.com
Tue Dec 28 07:38:00 PST 2010
On 12/28/2010 11:11 PM, Tao Ma wrote:
> Hi Tristan,
> Some comments inlined.
Tao,
It's good to see your comments here, very nice;)
> On 12/28/2010 07:40 PM, Tristan Ye wrote:
> <snip>
>> +static int ocfs2_move_extents(struct ocfs2_move_extents_context
>> *context)
>> +{
>> + int status;
>> + handle_t *handle;
>> + struct inode *inode = context->inode;
>> + struct buffer_head *di_bh = NULL;
>> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>> +
>> + if (!inode)
>> + return -ENOENT;
>> +
>> + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
>> + return -EROFS;
>> +
>> + mutex_lock(&inode->i_mutex);
>> +
>> + /*
>> + * This prevents concurrent writes from other nodes
>> + */
>> + status = ocfs2_rw_lock(inode, 1);
>> + if (status) {
>> + mlog_errno(status);
>> + goto out;
>> + }
>> +
>> + status = ocfs2_inode_lock(inode,&di_bh, 1);
>> + if (status) {
>> + mlog_errno(status);
>> + goto out_rw_unlock;
>> + }
>> +
>> + /*
>> + * rememer ip_xattr_sem also needs to be held if necessary
>> + */
>> + down_write(&OCFS2_I(inode)->ip_alloc_sem);
>> +
>> + /*
>> + * real extents moving codes will be fulfilled later.
>> + *
>> + * status = __ocfs2_move_extents_range(di_bh, context);
>> + */
> OK, so this function does nothing by now? ;)
Actually, this function will be added in following patches, I commented
this for a better understanding ONLY.
So without collecting the last patch, it did nothing actually;-)
>> +
>> + up_write(&OCFS2_I(inode)->ip_alloc_sem);
>> + if (status) {
>> + mlog_errno(status);
>> + goto out_inode_unlock;
>> + }
>> +
>> + /*
>> + * We update mtime for these changes
>> + */
>> + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
>> + if (IS_ERR(handle)) {
>> + status = PTR_ERR(handle);
>> + mlog_errno(status);
>> + goto out_inode_unlock;
>> + }
>> +
>> + inode->i_mtime = CURRENT_TIME;
>> + status = ocfs2_mark_inode_dirty(handle, inode, di_bh);
> We really need such a heavy function in case you just want to set
> di->i_mtime and di->i_mtime_nsec?
These above codes were almost borrowed from __ocfs2_change_file_space(),
it just called
ocfs2_mark_inode_dirty() when updating the mtime/ctime.
Maybe you're right, I'll rethink about it.
>> + if (status< 0)
>> + mlog_errno(status);
>> +
>> + ocfs2_commit_trans(osb, handle);
>> +
>> +out_inode_unlock:
>> + brelse(di_bh);
>> + ocfs2_inode_unlock(inode, 1);
>> +out_rw_unlock:
>> + ocfs2_rw_unlock(inode, 1);
>> +out:
>> + mutex_unlock(&inode->i_mutex);
>> +
>> + return status;
>> +}
>> +
>> +int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
>> +{
>> + int status;
>> +
>> + struct inode *inode = filp->f_path.dentry->d_inode;
>> + struct ocfs2_move_extents range;
>> + struct ocfs2_move_extents_context *context = NULL;
>> +
>> + status = mnt_want_write(filp->f_path.mnt);
>> + if (status)
>> + return status;
>> +
>> + status = -EINVAL;
>> +
>> + if (!S_ISREG(inode->i_mode)) {
>> + goto out;
>> + } else {
>> + if (!(filp->f_mode& FMODE_WRITE))
>> + goto out;
>> + }
> these can be just changed to:
> if (!S_ISREG(inode->i_mode) || !(filp->f_mode & FMODE_WRITE))
> goto out;
Good point.
>> +
>> + if (inode->i_flags& (S_IMMUTABLE|S_APPEND)) {
>> + status = -EPERM;
>> + goto out;
>> + }
>> +
>> + context = kzalloc(sizeof(struct ocfs2_move_extents_context),
>> GFP_NOFS);
>> + if (!context) {
>> + status = -ENOMEM;
>> + mlog_errno(status);
>> + goto out;
>> + }
>> +
>> + context->inode = inode;
>> + context->file = filp;
>> +
>> + if (!argp) {
>> + memset(&range, 0, sizeof(range));
>> + range.me_len = (u64)-1;
>> + range.me_flags |= OCFS2_MOVE_EXT_FL_AUTO_DEFRAG;
>> + context->auto_defrag = 1;
>> + } else {
>> + if (copy_from_user(&range, (struct ocfs2_move_extents *)argp,
>> + sizeof(range))) {
>> + status = -EFAULT;
>> + goto out;
>> + }
>> + }
>> +
>> + context->range =⦥
>> +
>> + if (range.me_flags& OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
>> + context->auto_defrag = 1;
>> + if (!range.me_thresh)
>> + range.me_thresh = 1024 * 1024;
>> + } else {
>> + /*
>> + * TO-DO XXX: validate the range.me_goal here.
>> + *
>> + * - should be cluster aligned.
>> + * - should contain enough free clusters around range.me_goal.
>> + * - strategy of moving extent to an appropriate goal is still
>> + * being discussed.
>> + */
>> + }
>> +
>> + /*
>> + * returning -EINVAL here.
>> + */
>> + if (range.me_start> i_size_read(inode))
> Do we allow overlap here? If no, I guess it should be
> if (range.me_start + range.me_len > i_size_read(inode))
What did you mean? 'range.me_start + range.me_len> i_size_read(inode)'
will be judged by following codes,
It has nothing to do with the overlap issue, range.me_start was not
range.me_new_start;)
>> + goto out;
>> +
>> + if (range.me_start + range.me_len> i_size_read(inode))
>> + range.me_len = i_size_read(inode) - range.me_start;
>> +
>> + status = ocfs2_move_extents(context);
>> + if (status)
>> + mlog_errno(status);
>> +
>> + if (argp) {
>> + if (copy_to_user((struct ocfs2_move_extents *)argp,&range,
>> + sizeof(range)))
>> + status = -EFAULT;
>> + }
>> +out:
>> + mnt_drop_write(filp->f_path.mnt);
>> +
>> + kfree(context);
>> +
>> + return status;
>> +}
>
> Regards,
> Tao
More information about the Ocfs2-devel
mailing list