[rds-devel] [PATCH 13/19] Add infrastructure for RDS over TCP per network namespace.

Jie Liu jeff.liu at oracle.com
Sun Oct 7 05:18:05 PDT 2012


- Move global tcp conn list/lock as well as info exporting stuff to netns_rds.
- Initialize hose stuff per netns clone, and destroy them per netns exit.
- Make those stuff works for basic tcp connection alloc, destroy, etc...

Signed-off-by: Jie Liu <jeff.liu at oracle.com>
---
 include/net/netns/rds.h |   10 ++++
 net/rds/rds.h           |    2 +-
 net/rds/tcp.c           |  105 +++++++++++++++++++++++++++++++----------------
 3 files changed, 81 insertions(+), 36 deletions(-)

diff --git a/include/net/netns/rds.h b/include/net/netns/rds.h
index e47bfe6..78918fa 100644
--- a/include/net/netns/rds.h
+++ b/include/net/netns/rds.h
@@ -32,6 +32,16 @@ struct netns_rds {
 	struct list_head rds_cong_monitor;
 	rwlock_t rds_cong_monitor_lock;
 
+#if defined(CONFIG_RDS_TCP) || defined(CONFIG_RDS_MODULE)
+	/* track rds_tcp_connection structs so they can be cleaned up */
+	struct list_head rds_tcp_conn_list;
+	spinlock_t rds_tcp_conn_lock;
+	/* exporting info */
+	struct list_head rds_tcp_tc_list;
+	spinlock_t rds_tcp_tc_list_lock;
+	unsigned int rds_tcp_tc_count;
+#endif
+
 	/* global statistics */
 	struct rds_statistics __percpu *rds_stats;
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 896ed6c..f282b7e 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -450,7 +450,7 @@ struct rds_transport {
 
 	unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
 					unsigned int avail);
-	void (*exit)(void);
+	void (*exit)(struct net *net);
 	void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
 			struct rds_sock *rs, u32 *key_ret);
 	void (*sync_mr)(void *trans_private, int direction);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 82b692a..7a49899 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -39,15 +39,6 @@
 #include "rds.h"
 #include "tcp.h"
 
-/* only for info exporting */
-static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
-static LIST_HEAD(rds_tcp_tc_list);
-static unsigned int rds_tcp_tc_count;
-
-/* Track rds_tcp_connection structs so they can be cleaned up */
-static DEFINE_SPINLOCK(rds_tcp_conn_lock);
-static LIST_HEAD(rds_tcp_conn_list);
-
 static struct kmem_cache *rds_tcp_conn_slab;
 
 #define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
@@ -94,14 +85,17 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
 void rds_tcp_restore_callbacks(struct socket *sock,
 			       struct rds_tcp_connection *tc)
 {
+	struct net *net = sock_net(sock->sk);
+
 	rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc);
+
 	write_lock_bh(&sock->sk->sk_callback_lock);
 
 	/* done under the callback_lock to serialize with write_space */
-	spin_lock(&rds_tcp_tc_list_lock);
+	spin_lock(&net->rds.rds_tcp_tc_list_lock);
 	list_del_init(&tc->t_list_item);
-	rds_tcp_tc_count--;
-	spin_unlock(&rds_tcp_tc_list_lock);
+	net->rds.rds_tcp_tc_count--;
+	spin_unlock(&net->rds.rds_tcp_tc_list_lock);
 
 	tc->t_sock = NULL;
 
@@ -120,16 +114,19 @@ void rds_tcp_restore_callbacks(struct socket *sock,
  */
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
 {
+	struct net *net = sock_net(sock->sk);
+
 	struct rds_tcp_connection *tc = conn->c_transport_data;
 
 	rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
+
 	write_lock_bh(&sock->sk->sk_callback_lock);
 
 	/* done under the callback_lock to serialize with write_space */
-	spin_lock(&rds_tcp_tc_list_lock);
-	list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
-	rds_tcp_tc_count++;
-	spin_unlock(&rds_tcp_tc_list_lock);
+	spin_lock(&net->rds.rds_tcp_tc_list_lock);
+	list_add_tail(&tc->t_list_item, &net->rds.rds_tcp_tc_list);
+	net->rds.rds_tcp_tc_count++;
+	spin_unlock(&net->rds.rds_tcp_tc_list_lock);
 
 	/* accepted sockets need our listen data ready undone */
 	if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
@@ -153,18 +150,19 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
 			    struct rds_info_iterator *iter,
 			    struct rds_info_lengths *lens)
 {
+	struct net *net = sock_net(sock->sk);
 	struct rds_info_tcp_socket tsinfo;
 	struct rds_tcp_connection *tc;
-	unsigned long flags;
 	struct sockaddr_in sin;
+	unsigned long flags;
 	int sinlen;
 
-	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
+	spin_lock_irqsave(&net->rds.rds_tcp_tc_list_lock, flags);
 
-	if (len / sizeof(tsinfo) < rds_tcp_tc_count)
+	if (len / sizeof(tsinfo) < net->rds.rds_tcp_tc_count)
 		goto out;
 
-	list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+	list_for_each_entry(tc, &net->rds.rds_tcp_tc_list, t_list_item) {
 
 		sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
 		tsinfo.local_addr = sin.sin_addr.s_addr;
@@ -183,10 +181,10 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
 	}
 
 out:
-	lens->nr = rds_tcp_tc_count;
+	lens->nr = net->rds.rds_tcp_tc_count;
 	lens->each = sizeof(tsinfo);
 
-	spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
+	spin_unlock_irqrestore(&net->rds.rds_tcp_tc_list_lock, flags);
 }
 
 static int rds_tcp_laddr_check(__be32 addr)
@@ -198,6 +196,7 @@ static int rds_tcp_laddr_check(__be32 addr)
 
 static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 {
+	struct net *net = rds_conn_to_net(conn);
 	struct rds_tcp_connection *tc;
 
 	tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
@@ -211,9 +210,9 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 
 	conn->c_transport_data = tc;
 
-	spin_lock_irq(&rds_tcp_conn_lock);
-	list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
-	spin_unlock_irq(&rds_tcp_conn_lock);
+	spin_lock_irq(&net->rds.rds_tcp_conn_lock);
+	list_add_tail(&tc->t_tcp_node, &net->rds.rds_tcp_conn_list);
+	spin_unlock_irq(&net->rds.rds_tcp_conn_lock);
 
 	rdsdebug("alloced tc %p\n", conn->c_transport_data);
 	return 0;
@@ -225,23 +224,23 @@ static void rds_tcp_conn_free(struct net *net, void *arg)
 	unsigned long flags;
 	rdsdebug("freeing tc %p\n", tc);
 
-	spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+	spin_lock_irqsave(&net->rds.rds_tcp_conn_lock, flags);
 	list_del(&tc->t_tcp_node);
-	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+	spin_unlock_irqrestore(&net->rds.rds_tcp_conn_lock, flags);
 
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
 
-static void rds_tcp_destroy_conns(void)
+static void rds_tcp_destroy_conns(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
 	LIST_HEAD(tmp_list);
 
 	/* avoid calling conn_destroy with irqs off */
-	spin_lock_irq(&rds_tcp_conn_lock);
-	list_splice(&rds_tcp_conn_list, &tmp_list);
-	INIT_LIST_HEAD(&rds_tcp_conn_list);
-	spin_unlock_irq(&rds_tcp_conn_lock);
+	spin_lock_irq(&net->rds.rds_tcp_conn_lock);
+	list_splice(&net->rds.rds_tcp_conn_list, &tmp_list);
+	INIT_LIST_HEAD(&net->rds.rds_tcp_conn_list);
+	spin_unlock_irq(&net->rds.rds_tcp_conn_lock);
 
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
 		if (tc->conn->c_passive)
@@ -250,11 +249,41 @@ static void rds_tcp_destroy_conns(void)
 	}
 }
 
+static int __net_init rds_tcp_pernet_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->rds.rds_tcp_tc_list);
+	spin_lock_init(&net->rds.rds_tcp_tc_list_lock);
+	net->rds.rds_tcp_tc_count = 0;
+	INIT_LIST_HEAD(&net->rds.rds_tcp_conn_list);
+	spin_lock_init(&net->rds.rds_tcp_conn_lock);
+
+	return 0;
+}
+
+static void __net_exit rds_tcp_pernet_exit(struct net *net)
+{
+	rds_tcp_destroy_conns(net);
+}
+
+static struct pernet_operations __net_initdata rds_tcp_net_ops = {
+	.init = rds_tcp_pernet_init,
+	.exit = rds_tcp_pernet_exit,
+};
+
+static void rds_tcp_net_exit(void)
+{
+	unregister_pernet_subsys(&rds_tcp_net_ops);
+}
+
+static int rds_tcp_net_init(void)
+{
+	return register_pernet_subsys(&rds_tcp_net_ops);
+}
+
 static void rds_tcp_exit(void)
 {
 	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 	rds_tcp_listen_stop();
-	rds_tcp_destroy_conns();
 	rds_trans_unregister(&rds_tcp_transport);
 	rds_tcp_recv_exit();
 	kmem_cache_destroy(rds_tcp_conn_slab);
@@ -274,7 +303,7 @@ struct rds_transport rds_tcp_transport = {
 	.inc_copy_to_user	= rds_tcp_inc_copy_to_user,
 	.inc_free		= rds_tcp_inc_free,
 	.stats_info_copy	= rds_tcp_stats_info_copy,
-	.exit			= rds_tcp_exit,
+	.exit			= rds_tcp_pernet_exit,
 	.t_owner		= THIS_MODULE,
 	.t_name			= "tcp",
 	.t_type			= RDS_TRANS_TCP,
@@ -301,14 +330,20 @@ static int rds_tcp_init(void)
 	if (ret)
 		goto out_recv;
 
-	ret = rds_tcp_listen_init();
+	ret = rds_tcp_net_init();
 	if (ret)
 		goto out_register;
 
+	ret = rds_tcp_listen_init();
+	if (ret)
+		goto out_net;
+
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
 	goto out;
 
+out_net:
+	rds_tcp_net_exit();
 out_register:
 	rds_trans_unregister(&rds_tcp_transport);
 out_recv:
-- 
1.7.4.1




More information about the rds-devel mailing list