[Btrfs-devel] [RFS] wait for more writers to join the transaction take 2

Chris Mason chris.mason at oracle.com
Wed Aug 8 16:52:28 PDT 2007


On Wed, 8 Aug 2007 17:29:22 -0400
Josef Bacik <jwhiter at redhat.com> wrote:

> Hello,
> 
> Ok so now with this patch we're walloping ext3.  ext3 was getting
> about 35 files/sec, whereas btrfs was getting 25 files/sec.  With
> this change btrfs gets around 170 files/sec.  I yanked out the writer
> waitqueue as it was causing a hang b/c we wouldn't wake up.

The waitqueue part made a big difference for reiserfs in multi-threaded
workloads, lets try to keep it.  I think the reason you're missing
wakeups is because you always wait, even when there's no one else
around.  If you used a schedule_timeout in the case where the number of
writers was zero, it should all work out.

> The only
> thing thats weird about this, and I think this is in part because of
> fs_mutex, is that we _never_ get anybody add anything to the
> transaction.  I put debug statements in the do/while loop to see if
> we ever loop, and we don't, however we manage to push alot more
> through so I'm kind of confused, so if anybody has any ideas I'd love
> to hear them. Thanks much,

Now that is confusing because the fs_mutex gets dropped during the
wait.  btrfs_transaction_cleaner should be hopping in every 5 seconds
or so, could you please toss in some printks there?  It would be the
only other writer in a single threaded fs_mark run.

-chris


> 
> Josef
> 
> diff -r 1765acb6d268 transaction.c
> --- a/transaction.c	Tue Aug 07 16:35:25 2007 -0400
> +++ b/transaction.c	Wed Aug 08 17:29:58 2007 -0400
> @@ -56,8 +56,8 @@ static int join_transaction(struct btrfs
>  		root->fs_info->generation++;
>  		root->fs_info->running_transaction = cur_trans;
>  		cur_trans->num_writers = 0;
> +		cur_trans->writers_while_asleep = 0;
>  		cur_trans->transid = root->fs_info->generation;
> -		init_waitqueue_head(&cur_trans->writer_wait);
>  		init_waitqueue_head(&cur_trans->commit_wait);
>  		cur_trans->in_commit = 0;
>  		cur_trans->use_count = 1;
> @@ -67,6 +67,7 @@ static int join_transaction(struct btrfs
>  		init_bit_radix(&cur_trans->dirty_pages);
>  	}
>  	cur_trans->num_writers++;
> +	cur_trans->writers_while_asleep++;
>  	return 0;
>  }
>  
> @@ -124,8 +125,6 @@ int btrfs_end_transaction(struct btrfs_t
>  	WARN_ON(cur_trans != trans->transaction);
>  	WARN_ON(cur_trans->num_writers < 1);
>  	cur_trans->num_writers--;
> -	if (waitqueue_active(&cur_trans->writer_wait))
> -		wake_up(&cur_trans->writer_wait);
>  	put_transaction(cur_trans);
>  	mutex_unlock(&root->fs_info->trans_mutex);
>  	memset(trans, 0, sizeof(*trans));
> @@ -212,6 +211,7 @@ static int wait_for_commit(struct btrfs_
>  			   struct btrfs_transaction *commit)
>  {
>  	DEFINE_WAIT(wait);
> +
>  	mutex_lock(&root->fs_info->trans_mutex);
>  	while(!commit->commit_done) {
>  		prepare_to_wait(&commit->commit_wait, &wait,
> @@ -413,12 +413,11 @@ int btrfs_commit_transaction(struct btrf
>  int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>  			     struct btrfs_root *root)
>  {
> -	int ret = 0;
> +	int ret = 0, writers = 0;
>  	struct btrfs_transaction *cur_trans;
>  	struct btrfs_transaction *prev_trans = NULL;
>  	struct list_head dirty_fs_roots;
>  	struct radix_tree_root pinned_copy;
> -	DEFINE_WAIT(wait);
>  
>  	init_bit_radix(&pinned_copy);
>  	INIT_LIST_HEAD(&dirty_fs_roots);
> @@ -454,20 +453,19 @@ int btrfs_commit_transaction(struct btrf
>  			mutex_lock(&root->fs_info->trans_mutex);
>  		}
>  	}
> -	while (trans->transaction->num_writers > 1) {
> +	do{
> +		writers = trans->transaction->writers_while_asleep;
>  		WARN_ON(cur_trans != trans->transaction);
> -		prepare_to_wait(&trans->transaction->writer_wait,
> &wait,
> -				TASK_UNINTERRUPTIBLE);
> -		if (trans->transaction->num_writers <= 1)
> -			break;
>  		mutex_unlock(&root->fs_info->fs_mutex);
>  		mutex_unlock(&root->fs_info->trans_mutex);
> -		schedule();
> +
> +		schedule_timeout_uninterruptible(1);
> +
>  		mutex_lock(&root->fs_info->fs_mutex);
>  		mutex_lock(&root->fs_info->trans_mutex);
> -		finish_wait(&trans->transaction->writer_wait, &wait);
> -	}
> -	finish_wait(&trans->transaction->writer_wait, &wait);
> +	} while (trans->transaction->num_writers > 1 || 
> +		 writers !=
> trans->transaction->writers_while_asleep); +
>  	WARN_ON(cur_trans != trans->transaction);
>  	ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
>  			      &dirty_fs_roots);
> diff -r 1765acb6d268 transaction.h
> --- a/transaction.h	Tue Aug 07 16:35:25 2007 -0400
> +++ b/transaction.h	Wed Aug 08 17:10:51 2007 -0400
> @@ -26,10 +26,10 @@ struct btrfs_transaction {
>  	int in_commit;
>  	int use_count;
>  	int commit_done;
> +	unsigned long writers_while_asleep;
>  	struct list_head list;
>  	struct radix_tree_root dirty_pages;
>  	unsigned long start_time;
> -	wait_queue_head_t writer_wait;
>  	wait_queue_head_t commit_wait;
>  };
>  
> 
> _______________________________________________
> Btrfs-devel mailing list
> Btrfs-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/btrfs-devel



More information about the Btrfs-devel mailing list