[Btrfs-devel] [RFC] All my fsync changes

Josef Bacik jwhiter at redhat.com
Thu Aug 9 14:56:18 PDT 2007


Hello,

Ok here are all my fsync changes, including the fix for the put_transaction
stuff that I posted earlier.  I figure it'll be easier to just commit it as one
thing then each individual peice.  The additional thing that I did was add a
radix tree that tracks the current outstanding transactions.  I also added a
field to the in memory btrfs inode that keeps track of the last transaction that
modified the inode.  So when we go to do a fsync on the inode if the commit was
already done or the transaction was removed (meaning it had been committed and
freed) we just exit out.  So here are the numbers. This is running fs_mark with
the following command

fs_mark -d /mnt/btrfs-test/default/ -s 10240 -n 1000

run 10 times.  First run is without any of the patches, with the exception of
the put_transaction/trans_mutex fix.

FSUse%        Count         Size    Files/sec     App Overhead
     0         1000        10240         72.4             6366
     0         1000        10240         54.4             6340
     0         1000        10240         62.3             6451
     0         1000        10240         54.9             6427
     0         1000        10240         65.7             6385
     0         1000        10240         56.0             6440
     0         1000        10240         59.9             6541
     0         1000        10240         60.7             6365
     0         1000        10240         66.7             6407
     0         1000        10240         56.9             6402

AVG: 61 files/sec

And this is the numbers with the patch thats attached to this email

FSUse%        Count         Size    Files/sec     App Overhead
     0         1000        10240         73.8             6315
     0         1000        10240        146.9             6396
     0         1000        10240        110.9             6269
     0         1000        10240         78.1             6279
     0         1000        10240         91.5             6330
     0         1000        10240         82.9             6219
     0         1000        10240         84.4             6221
     0         1000        10240        108.6             6345
     0         1000        10240         86.5             6293
     0         1000        10240         79.8             6321

AVG: 94.32 files/sec

Now I'm kind of uncertain about how I track the last_trans for the inode, I
think its right but I'm not entirely sure, so if I need to be doing this
elsewhere please let me know.  Thanks much,

Josef

diff -r f6da57af2473 btrfs_inode.h
--- a/btrfs_inode.h	Wed Aug 08 20:17:12 2007 -0400
+++ b/btrfs_inode.h	Thu Aug 09 16:04:17 2007 -0400
@@ -25,6 +25,11 @@ struct btrfs_inode {
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_key location;
 	struct inode vfs_inode;
+
+	/*
+	 * transid of the trans_handle that last modified this inode
+	 */
+	u64 last_trans;
 };
 static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
 {
diff -r f6da57af2473 ctree.h
--- a/ctree.h	Wed Aug 08 20:17:12 2007 -0400
+++ b/ctree.h	Thu Aug 09 16:18:12 2007 -0400
@@ -300,6 +300,7 @@ struct btrfs_fs_info {
 	struct radix_tree_root block_group_data_radix;
 	struct radix_tree_root extent_map_radix;
 	struct radix_tree_root extent_ins_radix;
+	struct radix_tree_root transaction_map_radix;
 	u64 generation;
 	struct btrfs_transaction *running_transaction;
 	struct btrfs_super_block *disk_super;
diff -r f6da57af2473 disk-io.c
--- a/disk-io.c	Wed Aug 08 20:17:12 2007 -0400
+++ b/disk-io.c	Thu Aug 09 16:52:26 2007 -0400
@@ -431,6 +431,7 @@ struct btrfs_root *open_ctree(struct sup
 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
 	INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
 	INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL);
+	INIT_RADIX_TREE(&fs_info->transaction_map_radix, GFP_KERNEL);
 	INIT_LIST_HEAD(&fs_info->trans_list);
 	INIT_LIST_HEAD(&fs_info->dead_roots);
 	sb_set_blocksize(sb, 4096);
diff -r f6da57af2473 file.c
--- a/file.c	Wed Aug 08 20:17:12 2007 -0400
+++ b/file.c	Thu Aug 09 17:24:26 2007 -0400
@@ -694,22 +694,41 @@ static int btrfs_sync_file(struct file *
 {
 	struct inode *inode = dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	int ret;
+	int ret = 0;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_transaction *last_trans;
 
 	/*
-	 * FIXME, use inode generation number to check if we can skip the
-	 * commit
+	 * check the transaction that last modified this inode
+	 * and see if its already been committed
 	 */
 	mutex_lock(&root->fs_info->fs_mutex);
+	if (!BTRFS_I(inode)->last_trans)
+		goto out;
+	mutex_lock(&root->fs_info->trans_mutex);
+	last_trans = radix_tree_lookup(&root->fs_info->transaction_map_radix,
+					BTRFS_I(inode)->last_trans);
+	if (!last_trans) {
+		BTRFS_I(inode)->last_trans = 0;
+		mutex_unlock(&root->fs_info->trans_mutex);
+		goto out;
+	} else if (last_trans->commit_done) {
+		mutex_unlock(&root->fs_info->trans_mutex);
+		goto out;
+	}
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	/*
+ 	 * ok we haven't committed the transaction yet, lets do a commit
+ 	 */
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	ret = btrfs_commit_transaction(trans, root);
+out:
 	mutex_unlock(&root->fs_info->fs_mutex);
-out:
 	return ret > 0 ? EIO : ret;
 }
 
diff -r f6da57af2473 inode.c
--- a/inode.c	Wed Aug 08 20:17:12 2007 -0400
+++ b/inode.c	Thu Aug 09 17:23:57 2007 -0400
@@ -193,6 +193,7 @@ static int btrfs_update_inode(struct btr
 
 	fill_inode_item(inode_item, inode);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
+	btrfs_set_inode_last_trans(trans, inode);
 	ret = 0;
 failed:
 	btrfs_release_path(root, path);
@@ -2253,6 +2254,7 @@ struct inode *btrfs_alloc_inode(struct s
 	ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
+	ei->last_trans = 0;
 	return &ei->vfs_inode;
 }
 
diff -r f6da57af2473 transaction.c
--- a/transaction.c	Wed Aug 08 20:17:12 2007 -0400
+++ b/transaction.c	Thu Aug 09 17:33:22 2007 -0400
@@ -31,7 +31,8 @@ static struct workqueue_struct *trans_wq
 #define BTRFS_ROOT_TRANS_TAG 0
 #define BTRFS_ROOT_DEFRAG_TAG 1
 
-static void put_transaction(struct btrfs_transaction *transaction)
+static void put_transaction(struct btrfs_transaction *transaction,
+			    struct btrfs_root *root)
 {
 	WARN_ON(transaction->use_count == 0);
 	transaction->use_count--;
@@ -39,6 +40,8 @@ static void put_transaction(struct btrfs
 		WARN_ON(total_trans == 0);
 		total_trans--;
 		list_del_init(&transaction->list);
+		radix_tree_delete(&root->fs_info->transaction_map_radix,
+				  (unsigned long)transaction->transid);
 		memset(transaction, 0, sizeof(*transaction));
 		kmem_cache_free(btrfs_transaction_cachep, transaction);
 	}
@@ -49,13 +52,15 @@ static int join_transaction(struct btrfs
 	struct btrfs_transaction *cur_trans;
 	cur_trans = root->fs_info->running_transaction;
 	if (!cur_trans) {
+		int ret;
 		cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
 					     GFP_NOFS);
 		total_trans++;
 		BUG_ON(!cur_trans);
 		root->fs_info->generation++;
 		root->fs_info->running_transaction = cur_trans;
-		cur_trans->num_writers = 0;
+		cur_trans->num_writers = 1;
+		cur_trans->num_joined = 0;
 		cur_trans->transid = root->fs_info->generation;
 		init_waitqueue_head(&cur_trans->writer_wait);
 		init_waitqueue_head(&cur_trans->commit_wait);
@@ -65,8 +70,15 @@ static int join_transaction(struct btrfs
 		cur_trans->start_time = get_seconds();
 		list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
 		init_bit_radix(&cur_trans->dirty_pages);
-	}
-	cur_trans->num_writers++;
+		ret = radix_tree_insert(&root->fs_info->transaction_map_radix,
+                                (unsigned long)cur_trans->transid,
+                                cur_trans);
+		BUG_ON(ret);
+	} else {
+		cur_trans->num_writers++;
+		cur_trans->num_joined++;
+	}
+
 	return 0;
 }
 
@@ -128,7 +140,7 @@ int btrfs_end_transaction(struct btrfs_t
 	cur_trans->num_writers--;
 	if (waitqueue_active(&cur_trans->writer_wait))
 		wake_up(&cur_trans->writer_wait);
-	put_transaction(cur_trans);
+	put_transaction(cur_trans, root);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	memset(trans, 0, sizeof(*trans));
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -426,7 +438,8 @@ int btrfs_commit_transaction(struct btrf
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
-	int ret = 0;
+	int ret = 0, joined = 0;
+	unsigned long timeout = 1;
 	struct btrfs_transaction *cur_trans;
 	struct btrfs_transaction *prev_trans = NULL;
 	struct list_head dirty_fs_roots;
@@ -446,7 +459,11 @@ int btrfs_commit_transaction(struct btrf
 		mutex_unlock(&root->fs_info->fs_mutex);
 		ret = wait_for_commit(root, cur_trans);
 		BUG_ON(ret);
-		put_transaction(cur_trans);
+
+		mutex_lock(&root->fs_info->trans_mutex);
+		put_transaction(cur_trans, root);
+		mutex_unlock(&root->fs_info->trans_mutex);
+
 		mutex_lock(&root->fs_info->fs_mutex);
 		return 0;
 	}
@@ -461,25 +478,35 @@ int btrfs_commit_transaction(struct btrf
 			mutex_unlock(&root->fs_info->trans_mutex);
 
 			wait_for_commit(root, prev_trans);
-			put_transaction(prev_trans);
 
 			mutex_lock(&root->fs_info->fs_mutex);
 			mutex_lock(&root->fs_info->trans_mutex);
+			put_transaction(prev_trans, root);
 		}
 	}
-	while (trans->transaction->num_writers > 1) {
+	
+	do {
+		joined = cur_trans->num_joined;
 		WARN_ON(cur_trans != trans->transaction);
-		prepare_to_wait(&trans->transaction->writer_wait, &wait,
+		prepare_to_wait(&cur_trans->writer_wait, &wait,
 				TASK_UNINTERRUPTIBLE);
-		if (trans->transaction->num_writers <= 1)
-			break;
+		if (cur_trans->num_writers <= 1 && 
+		    (cur_trans->num_joined != joined))
+			break;
+
+		if (cur_trans->num_writers > 1)
+			timeout = MAX_SCHEDULE_TIMEOUT;
+		else
+			timeout = 1;
 		mutex_unlock(&root->fs_info->fs_mutex);
 		mutex_unlock(&root->fs_info->trans_mutex);
-		schedule();
+		schedule_timeout(timeout);
 		mutex_lock(&root->fs_info->fs_mutex);
 		mutex_lock(&root->fs_info->trans_mutex);
-		finish_wait(&trans->transaction->writer_wait, &wait);
-	}
+		finish_wait(&cur_trans->writer_wait, &wait);
+	} while (cur_trans->num_writers > 1 || 
+		 (cur_trans->num_joined != joined));
+
 	finish_wait(&trans->transaction->writer_wait, &wait);
 	WARN_ON(cur_trans != trans->transaction);
 	ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
@@ -510,8 +537,8 @@ int btrfs_commit_transaction(struct btrf
 	mutex_lock(&root->fs_info->trans_mutex);
 	cur_trans->commit_done = 1;
 	wake_up(&cur_trans->commit_wait);
-	put_transaction(cur_trans);
-	put_transaction(cur_trans);
+	put_transaction(cur_trans, root);
+	put_transaction(cur_trans, root);
 	if (root->fs_info->closing)
 		list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
 	else
diff -r f6da57af2473 transaction.h
--- a/transaction.h	Wed Aug 08 20:17:12 2007 -0400
+++ b/transaction.h	Thu Aug 09 16:32:04 2007 -0400
@@ -23,6 +23,7 @@ struct btrfs_transaction {
 struct btrfs_transaction {
 	u64 transid;
 	unsigned long num_writers;
+	unsigned long num_joined;
 	int in_commit;
 	int use_count;
 	int commit_done;
@@ -57,6 +58,12 @@ static inline void btrfs_update_inode_bl
 	BTRFS_I(inode)->block_group = trans->block_group;
 }
 
+static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
+					      struct inode *inode)
+{
+	BTRFS_I(inode)->last_trans = trans->transaction->transid;
+}
+
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,



More information about the Btrfs-devel mailing list