[Ocfs2-devel] [PATCH 2/8] Ocfs2: Add basic framework and source files for extent moving.

Tue Dec 28 07:11:31 PST 2010

Hi Tristan,
	Some comments inlined.
On 12/28/2010 07:40 PM, Tristan Ye wrote:
<snip>
> +static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
> +{
> +	int status;
> +	handle_t *handle;
> +	struct inode *inode = context->inode;
> +	struct buffer_head *di_bh = NULL;
> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> +
> +	if (!inode)
> +		return -ENOENT;
> +
> +	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
> +		return -EROFS;
> +
> +	mutex_lock(&inode->i_mutex);
> +
> +	/*
> +	 * This prevents concurrent writes from other nodes
> +	 */
> +	status = ocfs2_rw_lock(inode, 1);
> +	if (status) {
> +		mlog_errno(status);
> +		goto out;
> +	}
> +
> +	status = ocfs2_inode_lock(inode,&di_bh, 1);
> +	if (status) {
> +		mlog_errno(status);
> +		goto out_rw_unlock;
> +	}
> +
> +	/*
> +	 * rememer ip_xattr_sem also needs to be held if necessary
> +	 */
> +	down_write(&OCFS2_I(inode)->ip_alloc_sem);
> +
> +	/*
> +	 * real extents moving codes will be fulfilled later.
> +	 *
> +	 * status = __ocfs2_move_extents_range(di_bh, context);
> +	 */
OK, so this function does nothing by now? ;)
> +
> +	up_write(&OCFS2_I(inode)->ip_alloc_sem);
> +	if (status) {
> +		mlog_errno(status);
> +		goto out_inode_unlock;
> +	}
> +
> +	/*
> +	 * We update mtime for these changes
> +	 */
> +	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
> +	if (IS_ERR(handle)) {
> +		status = PTR_ERR(handle);
> +		mlog_errno(status);
> +		goto out_inode_unlock;
> +	}
> +
> +	inode->i_mtime = CURRENT_TIME;
> +	status = ocfs2_mark_inode_dirty(handle, inode, di_bh);
We really need such a heavy function in case you just want to set 
di->i_mtime and di->i_mtime_nsec?
> +	if (status<  0)
> +		mlog_errno(status);
> +
> +	ocfs2_commit_trans(osb, handle);
> +
> +out_inode_unlock:
> +	brelse(di_bh);
> +	ocfs2_inode_unlock(inode, 1);
> +out_rw_unlock:
> +	ocfs2_rw_unlock(inode, 1);
> +out:
> +	mutex_unlock(&inode->i_mutex);
> +
> +	return status;
> +}
> +
> +int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
> +{
> +	int status;
> +
> +	struct inode *inode = filp->f_path.dentry->d_inode;
> +	struct ocfs2_move_extents range;
> +	struct ocfs2_move_extents_context *context = NULL;
> +
> +	status = mnt_want_write(filp->f_path.mnt);
> +	if (status)
> +		return status;
> +
> +	status = -EINVAL;
> +
> +	if (!S_ISREG(inode->i_mode)) {
> +		goto out;
> +	} else {
> +		if (!(filp->f_mode&  FMODE_WRITE))
> +			goto out;
> +	}
these can be just changed to:
	if (!S_ISREG(inode->i_mode) || !(filp->f_mode & FMODE_WRITE))
		goto out;
> +
> +	if (inode->i_flags&  (S_IMMUTABLE|S_APPEND)) {
> +		status = -EPERM;
> +		goto out;
> +	}
> +
> +	context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
> +	if (!context) {
> +		status = -ENOMEM;
> +		mlog_errno(status);
> +		goto out;
> +	}
> +
> +	context->inode = inode;
> +	context->file = filp;
> +
> +	if (!argp) {
> +		memset(&range, 0, sizeof(range));
> +		range.me_len = (u64)-1;
> +		range.me_flags |= OCFS2_MOVE_EXT_FL_AUTO_DEFRAG;
> +		context->auto_defrag = 1;
> +	} else {
> +		if (copy_from_user(&range, (struct ocfs2_move_extents *)argp,
> +				   sizeof(range))) {
> +			status = -EFAULT;
> +			goto out;
> +		}
> +	}
> +
> +	context->range =&range;
> +
> +	if (range.me_flags&  OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
> +		context->auto_defrag = 1;
> +		if (!range.me_thresh)
> +			range.me_thresh = 1024 * 1024;
> +	} else {
> +		/*
> +		 * TO-DO XXX: validate the range.me_goal here.
> +		 *
> +		 * - should be cluster aligned.
> +		 * - should contain enough free clusters around range.me_goal.
> +		 * - strategy of moving extent to an appropriate goal is still
> +		 *   being discussed.
> +		 */
> +	}
> +
> +	/*
> +	 * returning -EINVAL here.
> +	 */
> +	if (range.me_start>  i_size_read(inode))
Do we allow overlap here? If no, I guess it should be
	if (range.me_start + range.me_len > i_size_read(inode))
> +		goto out;
> +
> +	if (range.me_start + range.me_len>  i_size_read(inode))
> +			range.me_len = i_size_read(inode) - range.me_start;
> +
> +	status = ocfs2_move_extents(context);
> +	if (status)
> +		mlog_errno(status);
> +
> +	if (argp) {
> +		if (copy_to_user((struct ocfs2_move_extents *)argp,&range,
> +				sizeof(range)))
> +			status = -EFAULT;
> +	}
> +out:
> +	mnt_drop_write(filp->f_path.mnt);
> +
> +	kfree(context);
> +
> +	return status;
> +}

Regards,
Tao