Subject: [PATCH] RPC: separate TCP and UDP write space callbacks Split the socket write space callback function into a TCP version and UDP version, eliminating one dependence on the "xprt->stream" variable. Also, make both callbacks more CPU efficient by reducing the number of conditional branches taken in the hot path in each function. Test-plan: Write-intensive workload on a single mount point. Created: Fri, 22 Oct 2004 17:23:54 -0400 Signed-off-by: Chuck Lever --- net/sunrpc/xprtsock.c | 71 ++++++++++++++++++++++++++++++++---------- 1 files changed, 55 insertions(+), 16 deletions(-) diff -X /home/cel/src/linux/dont-diff -Naurp 23-rpc-switch-cleanup/net/sunrpc/xprtsock.c 24-rpc-write_space/net/sunrpc/xprtsock.c --- 23-rpc-switch-cleanup/net/sunrpc/xprtsock.c 2004-10-21 12:32:35.767572000 -0400 +++ 24-rpc-write_space/net/sunrpc/xprtsock.c 2004-10-21 12:32:39.023699000 -0400 @@ -304,7 +304,7 @@ static fastcall int xs_send_request(stru if (status == -EAGAIN) { if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { - /* Protect against races with xs_write_space */ + /* Protect against races with write_space */ spin_lock_bh(&xprt->xprt_lock); /* Don't race with disconnect */ @@ -735,8 +735,8 @@ static void xs_tcp_state_change(struct s } /** - * xs_write_space - callback invoked when socket buffer space becomes - * available + * xs_udp_write_space - callback invoked when socket buffer space + * becomes available * @sk: socket whose state has changed * * Called when more output buffer space is available for this socket. @@ -744,27 +744,64 @@ static void xs_tcp_state_change(struct s * progress, otherwise we'll waste resources thrashing sock_sendmsg * with a bunch of small requests. */ -static void xs_write_space(struct sock *sk) +static void xs_udp_write_space(struct sock *sk) { struct rpc_xprt *xprt; struct socket *sock; read_lock(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) + + /* from net/core/sock.c:sock_def_write_space */ + if (!sock_writeable(sk)) + goto out; + if (!(sock = sk->sk_socket)) + goto out; + + if (!(xprt = xprt_from_sock(sk))) goto out; if (xprt->shutdown) goto out; - /* Wait until we have enough socket memory */ - if (xprt->stream) { - /* from net/core/stream.c:sk_stream_write_space */ - if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) - goto out; - } else { - /* from net/core/sock.c:sock_def_write_space */ - if (!sock_writeable(sk)) - goto out; - } + if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) + goto out; + + spin_lock_bh(&xprt->xprt_lock); + if (xprt->snd_task) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt->xprt_lock); + +out: + read_unlock(&sk->sk_callback_lock); +} + +/** + * xs_tcp_write_space - callback invoked when socket buffer space + * becomes available + * @sk: socket whose state has changed + * + * Called when more output buffer space is available for this socket. + * We try not to wake our writers until they can make "significant" + * progress, otherwise we'll waste resources thrashing sock_sendmsg + * with a bunch of small requests. + */ +static void xs_tcp_write_space(struct sock *sk) +{ + struct rpc_xprt *xprt; + struct socket *sock; + + read_lock(&sk->sk_callback_lock); + + /* from net/core/stream.c:sk_stream_write_space */ + if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) + goto out; + if (!(sock = sk->sk_socket)) + goto out; + + /* if the xprt is closed or shutting down, punt */ + if (!(xprt = xprt_from_sock(sk))) + goto out; + if (xprt->shutdown) + goto out; if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) goto out; @@ -773,6 +810,7 @@ static void xs_write_space(struct sock * if (xprt->snd_task) rpc_wake_up_task(xprt->snd_task); spin_unlock_bh(&xprt->xprt_lock); + out: read_unlock(&sk->sk_callback_lock); } @@ -900,6 +938,7 @@ static void xs_bind(struct rpc_xprt *xpr xprt->old_write_space = sk->sk_write_space; if (xprt->prot == IPPROTO_UDP) { sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; xprt_set_connected(xprt); } else { @@ -907,9 +946,9 @@ static void xs_bind(struct rpc_xprt *xpr tp->nonagle = 1; /* disable Nagle's algorithm */ sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; xprt_clear_connected(xprt); } - sk->sk_write_space = xs_write_space; /* Reset to new socket */ xprt->sock = sock;