[rds-devel] [PATCH 13/19] Add infrastructure for RDS over TCP per network namespace.
Jie Liu
jeff.liu at oracle.com
Sun Oct 7 05:18:05 PDT 2012
- Move global tcp conn list/lock as well as info exporting stuff to netns_rds.
- Initialize hose stuff per netns clone, and destroy them per netns exit.
- Make those stuff works for basic tcp connection alloc, destroy, etc...
Signed-off-by: Jie Liu <jeff.liu at oracle.com>
---
include/net/netns/rds.h | 10 ++++
net/rds/rds.h | 2 +-
net/rds/tcp.c | 105 +++++++++++++++++++++++++++++++----------------
3 files changed, 81 insertions(+), 36 deletions(-)
diff --git a/include/net/netns/rds.h b/include/net/netns/rds.h
index e47bfe6..78918fa 100644
--- a/include/net/netns/rds.h
+++ b/include/net/netns/rds.h
@@ -32,6 +32,16 @@ struct netns_rds {
struct list_head rds_cong_monitor;
rwlock_t rds_cong_monitor_lock;
+#if defined(CONFIG_RDS_TCP) || defined(CONFIG_RDS_MODULE)
+ /* track rds_tcp_connection structs so they can be cleaned up */
+ struct list_head rds_tcp_conn_list;
+ spinlock_t rds_tcp_conn_lock;
+ /* exporting info */
+ struct list_head rds_tcp_tc_list;
+ spinlock_t rds_tcp_tc_list_lock;
+ unsigned int rds_tcp_tc_count;
+#endif
+
/* global statistics */
struct rds_statistics __percpu *rds_stats;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 896ed6c..f282b7e 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -450,7 +450,7 @@ struct rds_transport {
unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
unsigned int avail);
- void (*exit)(void);
+ void (*exit)(struct net *net);
void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
struct rds_sock *rs, u32 *key_ret);
void (*sync_mr)(void *trans_private, int direction);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 82b692a..7a49899 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -39,15 +39,6 @@
#include "rds.h"
#include "tcp.h"
-/* only for info exporting */
-static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
-static LIST_HEAD(rds_tcp_tc_list);
-static unsigned int rds_tcp_tc_count;
-
-/* Track rds_tcp_connection structs so they can be cleaned up */
-static DEFINE_SPINLOCK(rds_tcp_conn_lock);
-static LIST_HEAD(rds_tcp_conn_list);
-
static struct kmem_cache *rds_tcp_conn_slab;
#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
@@ -94,14 +85,17 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc)
{
+ struct net *net = sock_net(sock->sk);
+
rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc);
+
write_lock_bh(&sock->sk->sk_callback_lock);
/* done under the callback_lock to serialize with write_space */
- spin_lock(&rds_tcp_tc_list_lock);
+ spin_lock(&net->rds.rds_tcp_tc_list_lock);
list_del_init(&tc->t_list_item);
- rds_tcp_tc_count--;
- spin_unlock(&rds_tcp_tc_list_lock);
+ net->rds.rds_tcp_tc_count--;
+ spin_unlock(&net->rds.rds_tcp_tc_list_lock);
tc->t_sock = NULL;
@@ -120,16 +114,19 @@ void rds_tcp_restore_callbacks(struct socket *sock,
*/
void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
{
+ struct net *net = sock_net(sock->sk);
+
struct rds_tcp_connection *tc = conn->c_transport_data;
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
+
write_lock_bh(&sock->sk->sk_callback_lock);
/* done under the callback_lock to serialize with write_space */
- spin_lock(&rds_tcp_tc_list_lock);
- list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
- rds_tcp_tc_count++;
- spin_unlock(&rds_tcp_tc_list_lock);
+ spin_lock(&net->rds.rds_tcp_tc_list_lock);
+ list_add_tail(&tc->t_list_item, &net->rds.rds_tcp_tc_list);
+ net->rds.rds_tcp_tc_count++;
+ spin_unlock(&net->rds.rds_tcp_tc_list_lock);
/* accepted sockets need our listen data ready undone */
if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
@@ -153,18 +150,19 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
+ struct net *net = sock_net(sock->sk);
struct rds_info_tcp_socket tsinfo;
struct rds_tcp_connection *tc;
- unsigned long flags;
struct sockaddr_in sin;
+ unsigned long flags;
int sinlen;
- spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
+ spin_lock_irqsave(&net->rds.rds_tcp_tc_list_lock, flags);
- if (len / sizeof(tsinfo) < rds_tcp_tc_count)
+ if (len / sizeof(tsinfo) < net->rds.rds_tcp_tc_count)
goto out;
- list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+ list_for_each_entry(tc, &net->rds.rds_tcp_tc_list, t_list_item) {
sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
tsinfo.local_addr = sin.sin_addr.s_addr;
@@ -183,10 +181,10 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
}
out:
- lens->nr = rds_tcp_tc_count;
+ lens->nr = net->rds.rds_tcp_tc_count;
lens->each = sizeof(tsinfo);
- spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
+ spin_unlock_irqrestore(&net->rds.rds_tcp_tc_list_lock, flags);
}
static int rds_tcp_laddr_check(__be32 addr)
@@ -198,6 +196,7 @@ static int rds_tcp_laddr_check(__be32 addr)
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
{
+ struct net *net = rds_conn_to_net(conn);
struct rds_tcp_connection *tc;
tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
@@ -211,9 +210,9 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
conn->c_transport_data = tc;
- spin_lock_irq(&rds_tcp_conn_lock);
- list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
- spin_unlock_irq(&rds_tcp_conn_lock);
+ spin_lock_irq(&net->rds.rds_tcp_conn_lock);
+ list_add_tail(&tc->t_tcp_node, &net->rds.rds_tcp_conn_list);
+ spin_unlock_irq(&net->rds.rds_tcp_conn_lock);
rdsdebug("alloced tc %p\n", conn->c_transport_data);
return 0;
@@ -225,23 +224,23 @@ static void rds_tcp_conn_free(struct net *net, void *arg)
unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
- spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ spin_lock_irqsave(&net->rds.rds_tcp_conn_lock, flags);
list_del(&tc->t_tcp_node);
- spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+ spin_unlock_irqrestore(&net->rds.rds_tcp_conn_lock, flags);
kmem_cache_free(rds_tcp_conn_slab, tc);
}
-static void rds_tcp_destroy_conns(void)
+static void rds_tcp_destroy_conns(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */
- spin_lock_irq(&rds_tcp_conn_lock);
- list_splice(&rds_tcp_conn_list, &tmp_list);
- INIT_LIST_HEAD(&rds_tcp_conn_list);
- spin_unlock_irq(&rds_tcp_conn_lock);
+ spin_lock_irq(&net->rds.rds_tcp_conn_lock);
+ list_splice(&net->rds.rds_tcp_conn_list, &tmp_list);
+ INIT_LIST_HEAD(&net->rds.rds_tcp_conn_list);
+ spin_unlock_irq(&net->rds.rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
if (tc->conn->c_passive)
@@ -250,11 +249,41 @@ static void rds_tcp_destroy_conns(void)
}
}
+static int __net_init rds_tcp_pernet_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->rds.rds_tcp_tc_list);
+ spin_lock_init(&net->rds.rds_tcp_tc_list_lock);
+ net->rds.rds_tcp_tc_count = 0;
+ INIT_LIST_HEAD(&net->rds.rds_tcp_conn_list);
+ spin_lock_init(&net->rds.rds_tcp_conn_lock);
+
+ return 0;
+}
+
+static void __net_exit rds_tcp_pernet_exit(struct net *net)
+{
+ rds_tcp_destroy_conns(net);
+}
+
+static struct pernet_operations __net_initdata rds_tcp_net_ops = {
+ .init = rds_tcp_pernet_init,
+ .exit = rds_tcp_pernet_exit,
+};
+
+static void rds_tcp_net_exit(void)
+{
+ unregister_pernet_subsys(&rds_tcp_net_ops);
+}
+
+static int rds_tcp_net_init(void)
+{
+ return register_pernet_subsys(&rds_tcp_net_ops);
+}
+
static void rds_tcp_exit(void)
{
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
rds_tcp_listen_stop();
- rds_tcp_destroy_conns();
rds_trans_unregister(&rds_tcp_transport);
rds_tcp_recv_exit();
kmem_cache_destroy(rds_tcp_conn_slab);
@@ -274,7 +303,7 @@ struct rds_transport rds_tcp_transport = {
.inc_copy_to_user = rds_tcp_inc_copy_to_user,
.inc_free = rds_tcp_inc_free,
.stats_info_copy = rds_tcp_stats_info_copy,
- .exit = rds_tcp_exit,
+ .exit = rds_tcp_pernet_exit,
.t_owner = THIS_MODULE,
.t_name = "tcp",
.t_type = RDS_TRANS_TCP,
@@ -301,14 +330,20 @@ static int rds_tcp_init(void)
if (ret)
goto out_recv;
- ret = rds_tcp_listen_init();
+ ret = rds_tcp_net_init();
if (ret)
goto out_register;
+ ret = rds_tcp_listen_init();
+ if (ret)
+ goto out_net;
+
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
goto out;
+out_net:
+ rds_tcp_net_exit();
out_register:
rds_trans_unregister(&rds_tcp_transport);
out_recv:
--
1.7.4.1
More information about the rds-devel
mailing list