[rds-devel] [PATCH] RDS: Correctly reset connections to the nodev list.
Andy Grover
andy.grover at oracle.com
Wed Jan 14 16:13:28 PST 2009
Hi Steve,
Thanks for doing this work.
As I mentioned on irc I'd prefer to make do with a single list_head, and
perhaps a backpointer from rds_xx_connection to rds_xx_device to
differentiate between dev-associated connections and those which are not.
You've already indicated you were willing make this change, so if you
whip that up, I'll get it in pronto.
Thanks again -- Regards -- Andy
Steve Wise wrote:
> From: Steve Wise <swise at opengridcomputing.com>
>
> If a connection gets shut down, then the connection struct needs to be
> put back on the nodev list for proper reconnect handling.
>
> In order to correctly handle things, I ended up adding two list_head
> entries in the rds_xx_connection struct so I can tell at shutdown time
> which list the entry is already on...
>
> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> ---
>
> drivers/infiniband/ulp/rds/ib.h | 3 ++-
> drivers/infiniband/ulp/rds/ib_cm.c | 36 ++++++++++++++++++++++++++++++----
> drivers/infiniband/ulp/rds/ib_rdma.c | 11 ++++++----
> drivers/infiniband/ulp/rds/iw.h | 3 ++-
> drivers/infiniband/ulp/rds/iw_cm.c | 34 +++++++++++++++++++++++++++++---
> drivers/infiniband/ulp/rds/iw_rdma.c | 11 ++++++----
> 6 files changed, 79 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/rds/ib.h b/drivers/infiniband/ulp/rds/ib.h
> index f7ee4d6..bf3af62 100644
> --- a/drivers/infiniband/ulp/rds/ib.h
> +++ b/drivers/infiniband/ulp/rds/ib.h
> @@ -76,7 +76,8 @@ struct rds_ib_work_ring {
>
> struct rds_ib_connection {
>
> - struct list_head ib_node;
> + struct list_head ib_node_nodev;
> + struct list_head ib_node_conn;
> struct rds_connection *conn;
>
> /* alphabet soup, IBTA style */
> diff --git a/drivers/infiniband/ulp/rds/ib_cm.c b/drivers/infiniband/ulp/rds/ib_cm.c
> index 79afc66..3de2f18 100644
> --- a/drivers/infiniband/ulp/rds/ib_cm.c
> +++ b/drivers/infiniband/ulp/rds/ib_cm.c
> @@ -661,6 +661,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
>
> if (ic->i_cm_id) {
> struct ib_device *dev = ic->i_cm_id->device;
> + struct rds_ib_device *rds_ibdev;
> +
> + BUG_ON(!dev);
> + rds_ibdev = ib_get_client_data(dev, &rds_ib_client);
> + BUG_ON(!rds_ibdev);
>
> rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
> err = rdma_disconnect(ic->i_cm_id);
> @@ -715,6 +720,15 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
> ic->i_send_hdrs = NULL;
> ic->i_recv_hdrs = NULL;
> ic->i_ack = NULL;
> +
> + /* remove from device connection list */
> + spin_lock_irq(&rds_ibdev->spinlock);
> + if (!list_empty(&ic->ib_node_conn)) {
> + BUG_ON(!list_empty(&ic->ib_node_nodev));
> + list_del_init(&ic->ib_node_conn);
> + }
> + spin_unlock_irq(&rds_ibdev->spinlock);
> +
> }
>
> /* Clear pending transmit */
> @@ -748,6 +762,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
> ic->i_sends = NULL;
> vfree(ic->i_recvs);
> ic->i_recvs = NULL;
> +
> + /* add back to nodev list */
> + spin_lock_irq(&ib_nodev_conns_lock);
> + if (list_empty(&ic->ib_node_nodev))
> + list_add_tail(&ic->ib_node_nodev, &ib_nodev_conns);
> + spin_unlock_irq(&ib_nodev_conns_lock);
> }
>
> int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> @@ -760,7 +780,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> if (ic == NULL)
> return -ENOMEM;
>
> - INIT_LIST_HEAD(&ic->ib_node);
> + INIT_LIST_HEAD(&ic->ib_node_nodev);
> + INIT_LIST_HEAD(&ic->ib_node_conn);
> mutex_init(&ic->i_recv_mutex);
> #ifndef KERNEL_HAS_ATOMIC64
> spin_lock_init(&ic->i_ack_lock);
> @@ -777,10 +798,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> conn->c_transport_data = ic;
>
> spin_lock_irqsave(&ib_nodev_conns_lock, flags);
> - list_add_tail(&ic->ib_node, &ib_nodev_conns);
> + list_add_tail(&ic->ib_node_nodev, &ib_nodev_conns);
> spin_unlock_irqrestore(&ib_nodev_conns_lock, flags);
>
> -
> rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
> return 0;
> }
> @@ -788,8 +808,16 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> void rds_ib_conn_free(void *arg)
> {
> struct rds_ib_connection *ic = arg;
> +
> rdsdebug("ic %p\n", ic);
> - list_del(&ic->ib_node);
> +
> + BUG_ON(list_empty(&ic->ib_node_nodev));
> + BUG_ON(!list_empty(&ic->ib_node_conn));
> +
> + spin_lock_irq(&ib_nodev_conns_lock);
> + list_del(&ic->ib_node_nodev);
> + spin_unlock_irq(&ib_nodev_conns_lock);
> +
> kfree(ic);
> }
>
> diff --git a/drivers/infiniband/ulp/rds/ib_rdma.c b/drivers/infiniband/ulp/rds/ib_rdma.c
> index 79d92ee..e8da202 100644
> --- a/drivers/infiniband/ulp/rds/ib_rdma.c
> +++ b/drivers/infiniband/ulp/rds/ib_rdma.c
> @@ -146,12 +146,13 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
> /* conn was previously on the nodev_conns_list */
> spin_lock_irq(&ib_nodev_conns_lock);
> BUG_ON(list_empty(&ib_nodev_conns));
> - BUG_ON(list_empty(&ic->ib_node));
> - list_del(&ic->ib_node);
> + BUG_ON(list_empty(&ic->ib_node_nodev));
> + list_del_init(&ic->ib_node_nodev);
> spin_unlock_irq(&ib_nodev_conns_lock);
>
> spin_lock_irq(&rds_ibdev->spinlock);
> - list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
> + BUG_ON(!list_empty(&ic->ib_node_conn));
> + list_add_tail(&ic->ib_node_conn, &rds_ibdev->conn_list);
> spin_unlock_irq(&rds_ibdev->spinlock);
>
> return 0;
> @@ -168,7 +169,7 @@ void rds_ib_remove_nodev_conns(void)
> INIT_LIST_HEAD(&ib_nodev_conns);
> spin_unlock_irq(&ib_nodev_conns_lock);
>
> - list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
> + list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node_nodev) {
> if (ic->conn->c_passive)
> rds_conn_destroy(ic->conn->c_passive);
> rds_conn_destroy(ic->conn);
> @@ -186,7 +187,7 @@ void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev)
> INIT_LIST_HEAD(&rds_ibdev->conn_list);
> spin_unlock_irq(&rds_ibdev->spinlock);
>
> - list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
> + list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node_conn) {
> if (ic->conn->c_passive)
> rds_conn_destroy(ic->conn->c_passive);
> rds_conn_destroy(ic->conn);
> diff --git a/drivers/infiniband/ulp/rds/iw.h b/drivers/infiniband/ulp/rds/iw.h
> index 10eebe4..d3cd237 100644
> --- a/drivers/infiniband/ulp/rds/iw.h
> +++ b/drivers/infiniband/ulp/rds/iw.h
> @@ -99,7 +99,8 @@ struct rds_iw_work_ring {
>
> struct rds_iw_connection {
>
> - struct list_head iw_node;
> + struct list_head iw_node_nodev;
> + struct list_head iw_node_conn;
> struct rds_connection *conn;
>
> /* alphabet soup, IBTA style */
> diff --git a/drivers/infiniband/ulp/rds/iw_cm.c b/drivers/infiniband/ulp/rds/iw_cm.c
> index d540e21..bbd3d51 100644
> --- a/drivers/infiniband/ulp/rds/iw_cm.c
> +++ b/drivers/infiniband/ulp/rds/iw_cm.c
> @@ -723,6 +723,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
>
> if (ic->i_cm_id) {
> struct ib_device *dev = ic->i_cm_id->device;
> + struct rds_iw_device *rds_ibdev;
> +
> + BUG_ON(!dev);
> + rds_ibdev = ib_get_client_data(dev, &rds_iw_client);
> + BUG_ON(!rds_ibdev);
>
> rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
> err = rdma_disconnect(ic->i_cm_id);
> @@ -782,6 +787,13 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
> ic->i_send_hdrs = NULL;
> ic->i_recv_hdrs = NULL;
> ic->i_ack = NULL;
> +
> + /* remove from device connection list */
> + spin_lock_irq(&rds_ibdev->spinlock);
> + if (!list_empty(&ic->iw_node_conn)) {
> + list_del_init(&ic->iw_node_conn);
> + }
> + spin_unlock_irq(&rds_ibdev->spinlock);
> }
>
> /* Clear pending transmit */
> @@ -815,6 +827,13 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
> ic->i_sends = NULL;
> vfree(ic->i_recvs);
> ic->i_recvs = NULL;
> +
> + /* add back to nodev list */
> + spin_lock_irq(&iw_nodev_conns_lock);
> + if (list_empty(&ic->iw_node_nodev))
> + list_add_tail(&ic->iw_node_nodev, &iw_nodev_conns);
> + spin_unlock_irq(&iw_nodev_conns_lock);
> +
> rdsdebug("shutdown complete\n");
> }
>
> @@ -828,7 +847,8 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> if (ic == NULL)
> return -ENOMEM;
>
> - INIT_LIST_HEAD(&ic->iw_node);
> + INIT_LIST_HEAD(&ic->iw_node_nodev);
> + INIT_LIST_HEAD(&ic->iw_node_conn);
> mutex_init(&ic->i_recv_mutex);
> #ifndef KERNEL_HAS_ATOMIC64
> spin_lock_init(&ic->i_ack_lock);
> @@ -845,7 +865,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> conn->c_transport_data = ic;
>
> spin_lock_irqsave(&iw_nodev_conns_lock, flags);
> - list_add_tail(&ic->iw_node, &iw_nodev_conns);
> + list_add_tail(&ic->iw_node_nodev, &iw_nodev_conns);
> spin_unlock_irqrestore(&iw_nodev_conns_lock, flags);
>
>
> @@ -856,8 +876,16 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
> void rds_iw_conn_free(void *arg)
> {
> struct rds_iw_connection *ic = arg;
> +
> rdsdebug("ic %p\n", ic);
> - list_del(&ic->iw_node);
> +
> + BUG_ON(list_empty(&ic->iw_node_nodev));
> + BUG_ON(!list_empty(&ic->iw_node_conn));
> +
> + spin_lock_irq(&iw_nodev_conns_lock);
> + list_del(&ic->iw_node_nodev);
> + spin_unlock_irq(&iw_nodev_conns_lock);
> +
> kfree(ic);
> }
>
> diff --git a/drivers/infiniband/ulp/rds/iw_rdma.c b/drivers/infiniband/ulp/rds/iw_rdma.c
> index def6ee6..426d4d3 100644
> --- a/drivers/infiniband/ulp/rds/iw_rdma.c
> +++ b/drivers/infiniband/ulp/rds/iw_rdma.c
> @@ -201,12 +201,13 @@ int rds_iw_add_conn(struct rds_iw_device *rds_ibdev, struct rds_connection *conn
> /* conn was previously on the nodev_conns_list */
> spin_lock_irq(&iw_nodev_conns_lock);
> BUG_ON(list_empty(&iw_nodev_conns));
> - BUG_ON(list_empty(&ic->iw_node));
> - list_del(&ic->iw_node);
> + BUG_ON(list_empty(&ic->iw_node_nodev));
> + list_del_init(&ic->iw_node_nodev);
> spin_unlock_irq(&iw_nodev_conns_lock);
>
> spin_lock_irq(&rds_ibdev->spinlock);
> - list_add_tail(&ic->iw_node, &rds_ibdev->conn_list);
> + BUG_ON(!list_empty(&ic->iw_node_conn));
> + list_add_tail(&ic->iw_node_conn, &rds_ibdev->conn_list);
> spin_unlock_irq(&rds_ibdev->spinlock);
>
> return 0;
> @@ -223,7 +224,7 @@ void rds_iw_remove_nodev_conns(void)
> INIT_LIST_HEAD(&iw_nodev_conns);
> spin_unlock_irq(&iw_nodev_conns_lock);
>
> - list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
> + list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node_nodev) {
> if (ic->conn->c_passive)
> rds_conn_destroy(ic->conn->c_passive);
> rds_conn_destroy(ic->conn);
> @@ -241,7 +242,7 @@ void rds_iw_remove_conns(struct rds_iw_device *rds_ibdev)
> INIT_LIST_HEAD(&rds_ibdev->conn_list);
> spin_unlock_irq(&rds_ibdev->spinlock);
>
> - list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
> + list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node_conn) {
> if (ic->conn->c_passive)
> rds_conn_destroy(ic->conn->c_passive);
> rds_conn_destroy(ic->conn);
More information about the rds-devel
mailing list