[rds-devel] [PATCH 04/19] Make rds bind hash aware of struct net, we use one hash to deal with multiple network namespace.
Jie Liu
jeff.liu at oracle.com
Sun Oct 7 05:14:03 PDT 2012
- To make this change simple, We using global bind hash list to deal with multiple netns.
- Teach get preferred transport know this change, will improve it later.
Signed-off-by: Jie Liu <jeff.liu at oracle.com>
---
net/rds/bind.c | 42 ++++++++++++++++++++++++++----------------
net/rds/connection.c | 2 +-
net/rds/rds.h | 4 ++--
net/rds/recv.c | 3 ++-
net/rds/transport.c | 2 +-
5 files changed, 32 insertions(+), 21 deletions(-)
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 637bde5..b7e7b04 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -32,33 +32,40 @@
*/
#include <linux/kernel.h>
#include <net/sock.h>
+#include <net/netns/hash.h>
#include <linux/in.h>
#include <linux/if_arp.h>
#include <linux/jhash.h>
#include <linux/ratelimit.h>
+
#include "rds.h"
#define BIND_HASH_SIZE 1024
static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
static DEFINE_SPINLOCK(rds_bind_lock);
-static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
+static struct hlist_head *hash_to_bucket(struct net *net,
+ __be32 addr, __be16 port)
{
- return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
- (BIND_HASH_SIZE - 1));
+ u32 tmp = (net_hash_mix(net) + jhash_2words((u32)addr, (u32)port, 0)) &
+ (BIND_HASH_SIZE - 1);
+
+ return bind_hash_table + tmp;
}
-static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
+static struct rds_sock *rds_bind_lookup(struct net *net,
+ __be32 addr, __be16 port,
struct rds_sock *insert)
{
- struct rds_sock *rs;
- struct hlist_node *node;
- struct hlist_head *head = hash_to_bucket(addr, port);
- u64 cmp;
u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
+ struct hlist_head *head = hash_to_bucket(net, addr, port);
+ struct hlist_node *node;
+ struct rds_sock *rs;
rcu_read_lock();
hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
+ u64 cmp;
+
cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
be16_to_cpu(rs->rs_bound_port);
@@ -91,11 +98,11 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
* The rx path can race with rds_release. We notice if rds_release() has
* marked this socket and don't return a rs ref to the rx path.
*/
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
+struct rds_sock *rds_find_bound(struct net *net, __be32 addr, __be16 port)
{
struct rds_sock *rs;
- rs = rds_bind_lookup(addr, port, NULL);
+ rs = rds_bind_lookup(net, addr, port, NULL);
if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
rds_sock_addref(rs);
@@ -110,8 +117,9 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
/* returns -ve errno or +ve port */
static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
{
- unsigned long flags;
+ struct net *net = sock_net(&rs->rs_sk);
int ret = -EADDRINUSE;
+ unsigned long flags;
u16 rover, last;
if (*port != 0) {
@@ -127,7 +135,8 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
do {
if (rover == 0)
rover++;
- if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
+
+ if (!rds_bind_lookup(net, addr, cpu_to_be16(rover), rs)) {
*port = rs->rs_bound_port;
ret = 0;
rdsdebug("rs %p binding to %pI4:%d\n",
@@ -149,8 +158,8 @@ void rds_remove_bound(struct rds_sock *rs)
if (rs->rs_bound_addr) {
rdsdebug("rs %p unbinding from %pI4:%d\n",
- rs, &rs->rs_bound_addr,
- ntohs(rs->rs_bound_port));
+ rs, &rs->rs_bound_addr,
+ ntohs(rs->rs_bound_port));
hlist_del_init_rcu(&rs->rs_bound_node);
rds_sock_put(rs);
@@ -163,6 +172,7 @@ void rds_remove_bound(struct rds_sock *rs)
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct rds_sock *rs = rds_sk_to_rs(sk);
struct rds_transport *trans;
@@ -182,12 +192,12 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (ret)
goto out;
- trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
+ trans = rds_trans_get_preferred(net, sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
rds_remove_bound(rs);
printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, "
- "load rds_tcp or rds_rdma?\n");
+ "load rds_tcp or rds_rdma?\n");
goto out;
}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index ba0878c..d477916 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -169,7 +169,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
* can bind to the destination address then we'd rather the messages
* flow through loopback rather than either transport.
*/
- loop_trans = rds_trans_get_preferred(faddr);
+ loop_trans = rds_trans_get_preferred(net, faddr);
if (loop_trans) {
rds_trans_put(loop_trans);
conn->c_loopback = 1;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 02ef0e5..9d9e247 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -599,7 +599,7 @@ extern wait_queue_head_t rds_poll_waitq;
/* bind.c */
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
void rds_remove_bound(struct rds_sock *rs);
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
+struct rds_sock *rds_find_bound(struct net *net, __be32 addr, __be16 port);
/* cong.c */
int rds_cong_get_maps(struct rds_connection *conn);
@@ -812,7 +812,7 @@ void rds_connect_complete(struct rds_connection *conn);
/* transport.c */
int rds_trans_register(struct rds_transport *trans);
void rds_trans_unregister(struct rds_transport *trans);
-struct rds_transport *rds_trans_get_preferred(__be32 addr);
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 9f0f17c..2e1bee2 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -157,6 +157,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
struct rds_incoming *inc, gfp_t gfp)
{
+ struct net *net = rds_conn_to_net(conn);
struct rds_sock *rs = NULL;
struct sock *sk;
unsigned long flags;
@@ -208,7 +209,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
goto out;
}
- rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
+ rs = rds_find_bound(net, daddr, inc->i_hdr.h_dport);
if (!rs) {
rds_stats_inc(s_recv_drop_no_sock);
goto out;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4f..a5b187b 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -77,7 +77,7 @@ void rds_trans_put(struct rds_transport *trans)
module_put(trans->t_owner);
}
-struct rds_transport *rds_trans_get_preferred(__be32 addr)
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
{
struct rds_transport *ret = NULL;
struct rds_transport *trans;
--
1.7.4.1
More information about the rds-devel
mailing list