[rds-devel] [PATCH 04/19] Make rds bind hash aware of struct net, we use one hash to deal with multiple network namespace.

Jie Liu jeff.liu at oracle.com
Sun Oct 7 05:14:03 PDT 2012


- To make this change simple, We using global bind hash list to deal with multiple netns.
- Teach get preferred transport know this change, will improve it later.

Signed-off-by: Jie Liu <jeff.liu at oracle.com>
---
 net/rds/bind.c       |   42 ++++++++++++++++++++++++++----------------
 net/rds/connection.c |    2 +-
 net/rds/rds.h        |    4 ++--
 net/rds/recv.c       |    3 ++-
 net/rds/transport.c  |    2 +-
 5 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 637bde5..b7e7b04 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -32,33 +32,40 @@
  */
 #include <linux/kernel.h>
 #include <net/sock.h>
+#include <net/netns/hash.h>
 #include <linux/in.h>
 #include <linux/if_arp.h>
 #include <linux/jhash.h>
 #include <linux/ratelimit.h>
+
 #include "rds.h"
 
 #define BIND_HASH_SIZE 1024
 static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
 static DEFINE_SPINLOCK(rds_bind_lock);
 
-static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
+static struct hlist_head *hash_to_bucket(struct net *net,
+					 __be32 addr, __be16 port)
 {
-	return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
-				  (BIND_HASH_SIZE - 1));
+	u32 tmp = (net_hash_mix(net) + jhash_2words((u32)addr, (u32)port, 0)) &
+		  (BIND_HASH_SIZE - 1);
+
+	return bind_hash_table + tmp;
 }
 
-static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
+static struct rds_sock *rds_bind_lookup(struct net *net,
+					__be32 addr, __be16 port,
 					struct rds_sock *insert)
 {
-	struct rds_sock *rs;
-	struct hlist_node *node;
-	struct hlist_head *head = hash_to_bucket(addr, port);
-	u64 cmp;
 	u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
+	struct hlist_head *head = hash_to_bucket(net, addr, port);
+	struct hlist_node *node;
+	struct rds_sock *rs;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
+		u64 cmp;
+
 		cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
 		      be16_to_cpu(rs->rs_bound_port);
 
@@ -91,11 +98,11 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
  * The rx path can race with rds_release.  We notice if rds_release() has
  * marked this socket and don't return a rs ref to the rx path.
  */
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
+struct rds_sock *rds_find_bound(struct net *net, __be32 addr, __be16 port)
 {
 	struct rds_sock *rs;
 
-	rs = rds_bind_lookup(addr, port, NULL);
+	rs = rds_bind_lookup(net, addr, port, NULL);
 
 	if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
 		rds_sock_addref(rs);
@@ -110,8 +117,9 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
 /* returns -ve errno or +ve port */
 static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 {
-	unsigned long flags;
+	struct net *net = sock_net(&rs->rs_sk);
 	int ret = -EADDRINUSE;
+	unsigned long flags;
 	u16 rover, last;
 
 	if (*port != 0) {
@@ -127,7 +135,8 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 	do {
 		if (rover == 0)
 			rover++;
-		if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
+
+		if (!rds_bind_lookup(net, addr, cpu_to_be16(rover), rs)) {
 			*port = rs->rs_bound_port;
 			ret = 0;
 			rdsdebug("rs %p binding to %pI4:%d\n",
@@ -149,8 +158,8 @@ void rds_remove_bound(struct rds_sock *rs)
 
 	if (rs->rs_bound_addr) {
 		rdsdebug("rs %p unbinding from %pI4:%d\n",
-		  rs, &rs->rs_bound_addr,
-		  ntohs(rs->rs_bound_port));
+			 rs, &rs->rs_bound_addr,
+			 ntohs(rs->rs_bound_port));
 
 		hlist_del_init_rcu(&rs->rs_bound_node);
 		rds_sock_put(rs);
@@ -163,6 +172,7 @@ void rds_remove_bound(struct rds_sock *rs)
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
 	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 	struct rds_sock *rs = rds_sk_to_rs(sk);
 	struct rds_transport *trans;
@@ -182,12 +192,12 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (ret)
 		goto out;
 
-	trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
+	trans = rds_trans_get_preferred(net, sin->sin_addr.s_addr);
 	if (!trans) {
 		ret = -EADDRNOTAVAIL;
 		rds_remove_bound(rs);
 		printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, "
-				"load rds_tcp or rds_rdma?\n");
+				   "load rds_tcp or rds_rdma?\n");
 		goto out;
 	}
 
diff --git a/net/rds/connection.c b/net/rds/connection.c
index ba0878c..d477916 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -169,7 +169,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	 * can bind to the destination address then we'd rather the messages
 	 * flow through loopback rather than either transport.
 	 */
-	loop_trans = rds_trans_get_preferred(faddr);
+	loop_trans = rds_trans_get_preferred(net, faddr);
 	if (loop_trans) {
 		rds_trans_put(loop_trans);
 		conn->c_loopback = 1;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 02ef0e5..9d9e247 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -599,7 +599,7 @@ extern wait_queue_head_t rds_poll_waitq;
 /* bind.c */
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 void rds_remove_bound(struct rds_sock *rs);
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
+struct rds_sock *rds_find_bound(struct net *net, __be32 addr, __be16 port);
 
 /* cong.c */
 int rds_cong_get_maps(struct rds_connection *conn);
@@ -812,7 +812,7 @@ void rds_connect_complete(struct rds_connection *conn);
 /* transport.c */
 int rds_trans_register(struct rds_transport *trans);
 void rds_trans_unregister(struct rds_transport *trans);
-struct rds_transport *rds_trans_get_preferred(__be32 addr);
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
 void rds_trans_put(struct rds_transport *trans);
 unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
 				       unsigned int avail);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 9f0f17c..2e1bee2 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -157,6 +157,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
 void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 		       struct rds_incoming *inc, gfp_t gfp)
 {
+	struct net *net = rds_conn_to_net(conn);
 	struct rds_sock *rs = NULL;
 	struct sock *sk;
 	unsigned long flags;
@@ -208,7 +209,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 		goto out;
 	}
 
-	rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
+	rs = rds_find_bound(net, daddr, inc->i_hdr.h_dport);
 	if (!rs) {
 		rds_stats_inc(s_recv_drop_no_sock);
 		goto out;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4f..a5b187b 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -77,7 +77,7 @@ void rds_trans_put(struct rds_transport *trans)
 		module_put(trans->t_owner);
 }
 
-struct rds_transport *rds_trans_get_preferred(__be32 addr)
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
 {
 	struct rds_transport *ret = NULL;
 	struct rds_transport *trans;
-- 
1.7.4.1




More information about the rds-devel mailing list