diff --git a/net/rds/connection.c b/net/rds/connection.c index 7c902bc..5694112 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -127,7 +127,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, struct rds_transport *trans, gfp_t gfp, int is_outgoing) { - struct rds_connection *conn, *tmp, *parent = NULL; + struct rds_connection *conn, *parent = NULL; struct hlist_head *head = rds_conn_bucket(laddr, faddr); unsigned long flags; int ret; @@ -217,26 +217,40 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, trans->t_name ? trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : ""); + /* + * Since we ran without holding the conn lock, someone could + * have created the same conn (either normal or passive) in the + * interim. We check while holding the lock. If we won, we complete + * init and return our conn. If we lost, we rollback and return the + * other one. + */ spin_lock_irqsave(&rds_conn_lock, flags); - if (parent == NULL) { - tmp = rds_conn_lookup(head, laddr, faddr, trans); - if (tmp == NULL) - hlist_add_head(&conn->c_hash_node, head); - } else { - tmp = parent->c_passive; - if (!tmp) + if (parent) { + /* Creating passive conn */ + if (parent->c_passive) { + trans->conn_free(conn->c_transport_data); + kmem_cache_free(rds_conn_slab, conn); + conn = parent->c_passive; + } else { parent->c_passive = conn; - } - - if (tmp) { - trans->conn_free(conn->c_transport_data); - kmem_cache_free(rds_conn_slab, conn); - conn = tmp; + rds_cong_add_conn(conn); + rds_conn_count++; + } } else { - rds_cong_add_conn(conn); - rds_conn_count++; + /* Creating normal conn */ + struct rds_connection *found; + + found = rds_conn_lookup(head, laddr, faddr, trans); + if (found) { + trans->conn_free(conn->c_transport_data); + kmem_cache_free(rds_conn_slab, conn); + conn = found; + } else { + hlist_add_head(&conn->c_hash_node, head); + rds_cong_add_conn(conn); + rds_conn_count++; + } } - spin_unlock_irqrestore(&rds_conn_lock, flags); out: diff --git a/net/rds/ib.c b/net/rds/ib.c index dd99b44..88870d1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -85,9 +85,6 @@ void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_wrs = dev_attr->max_qp_wr; rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); - rds_ibdev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); - rds_ibdev->fmr_page_size = 1 << rds_ibdev->fmr_page_shift; - rds_ibdev->fmr_page_mask = ~((u64) rds_ibdev->fmr_page_size - 1); rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; rds_ibdev->max_fmrs = dev_attr->max_fmr ? min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : diff --git a/net/rds/ib.h b/net/rds/ib.h index 1aca6d5..8b3becc 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -98,6 +98,7 @@ struct rds_ib_connection { struct rds_ib_send_work *i_sends; /* rx */ + struct tasklet_struct i_recv_tasklet; struct mutex i_recv_mutex; struct rds_ib_work_ring i_recv_ring; struct rds_ib_incoming *i_ibinc; @@ -159,9 +160,6 @@ struct rds_ib_device { struct ib_pd *pd; struct ib_mr *mr; struct rds_ib_mr_pool *mr_pool; - int fmr_page_shift; - int fmr_page_size; - u64 fmr_page_mask; unsigned int fmr_max_remaps; unsigned int max_fmrs; int max_sge; @@ -306,6 +304,7 @@ void rds_ib_inc_free(struct rds_incoming *inc); int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, size_t size); void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_ib_recv_tasklet_fn(unsigned long data); void rds_ib_recv_init_ring(struct rds_ib_connection *ic); void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); void rds_ib_recv_init_ack(struct rds_ib_connection *ic); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b376bd1..20191ba 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -203,10 +203,11 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: - rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u " + rdsdebug("Fatal QP Event %u " "- connection %u.%u.%u.%u->%u.%u.%u.%u...reconnecting\n", event->event, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr)); + rds_conn_drop(conn); break; } } @@ -695,6 +696,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) return -ENOMEM; INIT_LIST_HEAD(&ic->ib_node); + tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, + (unsigned long) ic); mutex_init(&ic->i_recv_mutex); #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&ic->i_ack_lock); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 81033af..9c42434 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -187,11 +187,8 @@ void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) INIT_LIST_HEAD(list); spin_unlock_irq(list_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); + list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) rds_conn_destroy(ic->conn); - } } struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) @@ -211,7 +208,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) pool->fmr_attr.max_pages = fmr_message_size; pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; - pool->fmr_attr.page_shift = rds_ibdev->fmr_page_shift; + pool->fmr_attr.page_shift = PAGE_SHIFT; pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4; /* We never allow more than max_items MRs to be allocated. @@ -237,8 +234,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) { flush_workqueue(rds_wq); rds_ib_flush_mr_pool(pool, 1); - BUG_ON(atomic_read(&pool->item_count)); - BUG_ON(atomic_read(&pool->free_pinned)); + WARN_ON(atomic_read(&pool->item_count)); + WARN_ON(atomic_read(&pool->free_pinned)); kfree(pool); } @@ -349,13 +346,13 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); - if (dma_addr & ~rds_ibdev->fmr_page_mask) { + if (dma_addr & ~PAGE_MASK) { if (i > 0) return -EINVAL; else ++page_cnt; } - if ((dma_addr + dma_len) & ~rds_ibdev->fmr_page_mask) { + if ((dma_addr + dma_len) & ~PAGE_MASK) { if (i < sg_dma_len - 1) return -EINVAL; else @@ -365,7 +362,7 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm len += dma_len; } - page_cnt += len >> rds_ibdev->fmr_page_shift; + page_cnt += len >> PAGE_SHIFT; if (page_cnt > fmr_message_size) return -EINVAL; @@ -378,9 +375,9 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); - for (j = 0; j < dma_len; j += rds_ibdev->fmr_page_size) + for (j = 0; j < dma_len; j += PAGE_SIZE) dma_pages[page_cnt++] = - (dma_addr & rds_ibdev->fmr_page_mask) + j; + (dma_addr & PAGE_MASK) + j; } ret = ib_map_phys_fmr(ibmr->fmr, @@ -443,6 +440,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) /* FIXME we need a way to tell a r/w MR * from a r/o MR */ + BUG_ON(irqs_disabled()); set_page_dirty(page); put_page(page); } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 29ec8be..3513c43 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -143,15 +143,16 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, int ret = -ENOMEM; if (recv->r_ibinc == NULL) { - if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) { + if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { rds_ib_stats_inc(s_ib_rx_alloc_limit); goto out; } recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, kptr_gfp); - if (recv->r_ibinc == NULL) + if (recv->r_ibinc == NULL) { + atomic_dec(&rds_ib_allocation); goto out; - atomic_inc(&rds_ib_allocation); + } INIT_LIST_HEAD(&recv->r_ibinc->ii_frags); rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr); } @@ -824,17 +825,22 @@ void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; - struct ib_wc wc; - struct rds_ib_ack_state state = { 0, }; - struct rds_ib_recv_work *recv; rdsdebug("conn %p cq %p\n", conn, cq); rds_ib_stats_inc(s_ib_rx_cq_call); - ib_req_notify_cq(cq, IB_CQ_SOLICITED); + tasklet_schedule(&ic->i_recv_tasklet); +} + +static inline void rds_poll_cq(struct rds_ib_connection *ic, + struct rds_ib_ack_state *state) +{ + struct rds_connection *conn = ic->conn; + struct ib_wc wc; + struct rds_ib_recv_work *recv; - while (ib_poll_cq(cq, 1, &wc) > 0) { + while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, wc.byte_len, be32_to_cpu(wc.ex.imm_data)); @@ -852,7 +858,7 @@ void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) if (rds_conn_up(conn) || rds_conn_connecting(conn)) { /* We expect errors as the qp is drained during shutdown */ if (wc.status == IB_WC_SUCCESS) { - rds_ib_process_recv(conn, recv, wc.byte_len, &state); + rds_ib_process_recv(conn, recv, wc.byte_len, state); } else { rds_ib_conn_error(conn, "recv completion on " "%u.%u.%u.%u had status %u, disconnecting and " @@ -864,6 +870,19 @@ void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_ring_free(&ic->i_recv_ring, 1); } +} + +void rds_ib_recv_tasklet_fn(unsigned long data) +{ + struct rds_ib_connection *ic = (struct rds_ib_connection *) data; + struct rds_connection *conn = ic->conn; + struct rds_ib_ack_state state = { 0, }; + + + rds_poll_cq(ic, &state); + ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); + rds_poll_cq(ic, &state); + if (state.ack_next_valid) rds_ib_set_ack(ic, state.ack_next, state.ack_required); if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f42c512..fff7a57 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) struct rds_message *rm; rm = rds_send_get_message(conn, send->s_op); - if (rm) + if (rm) { + if (rm->m_rdma_op) + rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); rds_ib_send_rdma_complete(rm, wc.status); + rds_message_put(rm); + } } oldest = (oldest + 1) % ic->i_send_ring.w_nr; @@ -482,6 +486,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, BUG_ON(off % RDS_FRAG_SIZE); BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); + /* Do not send cong updates to IB loopback */ + if (conn->c_loopback + && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { + rds_cong_map_updated(conn->c_fcong, ~(u64) 0); + return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + } + /* FIXME we may overallocate here */ if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) i = 1; diff --git a/net/rds/iw.c b/net/rds/iw.c index 23f6e31..c0dbbee 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -83,23 +83,16 @@ void rds_iw_add_one(struct ib_device *device) rds_iwdev->max_wrs = dev_attr->max_qp_wr; rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE); - rds_iwdev->page_shift = max(PAGE_SHIFT, ffs(dev_attr->page_size_cap) - 1); - rds_iwdev->dev = device; rds_iwdev->pd = ib_alloc_pd(device); if (IS_ERR(rds_iwdev->pd)) goto free_dev; if (!rds_iwdev->dma_local_lkey) { - if (device->node_type != RDMA_NODE_RNIC) { - rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, - IB_ACCESS_LOCAL_WRITE); - } else { - rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE); - } + rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rds_iwdev->mr)) goto err_pd; } else diff --git a/net/rds/iw.h b/net/rds/iw.h index 60ab5d6..1084832 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -181,7 +181,6 @@ struct rds_iw_device { struct ib_pd *pd; struct ib_mr *mr; struct rds_iw_mr_pool *mr_pool; - int page_shift; int max_sge; unsigned int max_wrs; unsigned int dma_local_lkey:1; diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index f4e2eef..33f98fb 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -156,9 +156,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data) case IB_EVENT_QP_REQ_ERR: case IB_EVENT_QP_FATAL: default: - rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %u.%u.%u.%u->%u.%u.%u.%u...reconnecting\n", + rdsdebug("Fatal QP Event %u " + "- connection %u.%u.%u.%u->%u.%u.%u.%u...reconnecting\n", event->event, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr)); + rds_conn_drop(conn); break; } } diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index a697b55..e1dd2bd 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -243,11 +243,8 @@ void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock) INIT_LIST_HEAD(list); spin_unlock_irq(list_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); + list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) rds_conn_destroy(ic->conn); - } } static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, @@ -261,18 +258,12 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, } static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, - struct rds_iw_scatterlist *sg, - unsigned int dma_page_shift) + struct rds_iw_scatterlist *sg) { struct ib_device *dev = rds_iwdev->dev; u64 *dma_pages = NULL; - u64 dma_mask; - unsigned int dma_page_size; int i, j, ret; - dma_page_size = 1 << dma_page_shift; - dma_mask = dma_page_size - 1; - WARN_ON(sg->dma_len); sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); @@ -293,18 +284,18 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, sg->bytes += dma_len; end_addr = dma_addr + dma_len; - if (dma_addr & dma_mask) { + if (dma_addr & PAGE_MASK) { if (i > 0) goto out_unmap; - dma_addr &= ~dma_mask; + dma_addr &= ~PAGE_MASK; } - if (end_addr & dma_mask) { + if (end_addr & PAGE_MASK) { if (i < sg->dma_len - 1) goto out_unmap; - end_addr = (end_addr + dma_mask) & ~dma_mask; + end_addr = (end_addr + PAGE_MASK) & ~PAGE_MASK; } - sg->dma_npages += (end_addr - dma_addr) >> dma_page_shift; + sg->dma_npages += (end_addr - dma_addr) >> PAGE_SHIFT; } /* Now gather the dma addrs into one list */ @@ -323,8 +314,8 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, u64 end_addr; end_addr = dma_addr + dma_len; - dma_addr &= ~dma_mask; - for (; dma_addr < end_addr; dma_addr += dma_page_size) + dma_addr &= ~PAGE_MASK; + for (; dma_addr < end_addr; dma_addr += PAGE_SIZE) dma_pages[j++] = dma_addr; BUG_ON(j > sg->dma_npages); } @@ -724,7 +715,7 @@ static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) f_wr.wr.fast_reg.rkey = mapping->m_rkey; f_wr.wr.fast_reg.page_list = ibmr->page_list; f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len; - f_wr.wr.fast_reg.page_shift = ibmr->device->page_shift; + f_wr.wr.fast_reg.page_shift = PAGE_SHIFT; f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; @@ -781,9 +772,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); - dma_pages = rds_iw_map_scatterlist(rds_iwdev, - &mapping->m_sg, - rds_iwdev->page_shift); + dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); if (IS_ERR(dma_pages)) { ret = PTR_ERR(dma_pages); dma_pages = NULL; diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 02f0d98..3b6d418 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -143,15 +143,16 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn, int ret = -ENOMEM; if (recv->r_iwinc == NULL) { - if (atomic_read(&rds_iw_allocation) >= rds_iw_sysctl_max_recv_allocation) { + if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { rds_iw_stats_inc(s_iw_rx_alloc_limit); goto out; } recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, kptr_gfp); - if (recv->r_iwinc == NULL) + if (recv->r_iwinc == NULL) { + atomic_dec(&rds_iw_allocation); goto out; - atomic_inc(&rds_iw_allocation); + } INIT_LIST_HEAD(&recv->r_iwinc->ii_frags); rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 2a78fda..d8d8c18 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -780,7 +780,7 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; send->s_wr.wr.fast_reg.page_list = send->s_page_list; send->s_wr.wr.fast_reg.page_list_len = nent; - send->s_wr.wr.fast_reg.page_shift = rds_iwdev->page_shift; + send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT; send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE; send->s_wr.wr.fast_reg.iova_start = sg_addr; diff --git a/net/rds/loop.c b/net/rds/loop.c index 40fa729..57ea9de 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -80,16 +80,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn, struct rds_cong_map *map, unsigned long offset) { - unsigned long i; - BUG_ON(offset); BUG_ON(map != conn->c_lcong); - for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { - memcpy((void *)conn->c_fcong->m_page_addrs[i], - (void *)map->m_page_addrs[i], PAGE_SIZE); - } - rds_cong_map_updated(conn->c_fcong, ~(u64) 0); return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 9c0e98a..d15e229 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -419,8 +419,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro) /* Mark page dirty if it was possibly modified, which * is the case for a RDMA_READ which copies from remote * to local memory */ - if (!ro->r_write) + if (!ro->r_write) { + BUG_ON(irqs_disabled()); set_page_dirty(page); + } put_page(page); } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 418f7d5..ddefc56 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_DISCONNECTED: - printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection " + rdsdebug("DISCONNECT event - dropping connection " "%u.%u.%u.%u->%u.%u.%u.%u\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr)); rds_conn_drop(conn); diff --git a/net/rds/recv.c b/net/rds/recv.c index 691f8cb..18aebf2 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -417,18 +417,18 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (msg_flags & MSG_OOB) goto out; - /* If there are pending notifications, do those - and nothing else */ - if (!list_empty(&rs->rs_notify_queue)) { - ret = rds_notify_queue_get(rs, msg); - goto out; - } + while (1) { + /* If there are pending notifications, do those - and nothing else */ + if (!list_empty(&rs->rs_notify_queue)) { + ret = rds_notify_queue_get(rs, msg); + break; + } - if (rs->rs_cong_notify) { - ret = rds_notify_cong(rs, msg); - goto out; - } + if (rs->rs_cong_notify) { + ret = rds_notify_cong(rs, msg); + break; + } - while (1) { if (!rds_next_incoming(rs, &inc)) { if (nonblock) { ret = -EAGAIN; @@ -436,7 +436,9 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, } timeo = wait_event_interruptible_timeout(*sk->sk_sleep, - rds_next_incoming(rs, &inc), + (!list_empty(&rs->rs_notify_queue) + || rs->rs_cong_notify + || rds_next_incoming(rs, &inc)), timeo); rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, timeo); diff --git a/net/rds/send.c b/net/rds/send.c index 841c560..518f130 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -507,12 +507,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message); */ void rds_send_remove_from_sock(struct list_head *messages, int status) { - unsigned long flags = 0; /* silence gcc :P */ + unsigned long flags; struct rds_sock *rs = NULL; struct rds_message *rm; - local_irq_save(flags); while (!list_empty(messages)) { + int was_on_sock = 0; + rm = list_entry(messages->next, struct rds_message, m_conn_item); list_del_init(&rm->m_conn_item); @@ -527,20 +528,19 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) * while we're messing with it. It does not prevent the * message from being removed from the socket, though. */ - spin_lock(&rm->m_rs_lock); + spin_lock_irqsave(&rm->m_rs_lock, flags); if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) goto unlock_and_drop; if (rs != rm->m_rs) { if (rs) { - spin_unlock(&rs->rs_lock); rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } rs = rm->m_rs; - spin_lock(&rs->rs_lock); sock_hold(rds_rs_to_sk(rs)); } + spin_lock(&rs->rs_lock); if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { struct rds_rdma_op *ro = rm->m_rdma_op; @@ -558,21 +558,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) notifier->n_status = status; rm->m_rdma_op->r_notifier = NULL; } - rds_message_put(rm); + was_on_sock = 1; rm->m_rs = NULL; } + spin_unlock(&rs->rs_lock); unlock_and_drop: - spin_unlock(&rm->m_rs_lock); + spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); + if (was_on_sock) + rds_message_put(rm); } if (rs) { - spin_unlock(&rs->rs_lock); rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } - local_irq_restore(flags); } /* @@ -634,9 +635,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) list_move(&rm->m_sock_item, &list); rds_send_sndbuf_remove(rs, rm); clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); - - /* If this is a RDMA operation, notify the app. */ - __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); } /* order flag updates with the rs lock */ @@ -645,9 +643,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) spin_unlock_irqrestore(&rs->rs_lock, flags); - if (wake) - rds_wake_sk_sleep(rs); - conn = NULL; /* now remove the messages from the conn list as needed */ @@ -655,6 +650,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) /* We do this here rather than in the loop above, so that * we don't have to nest m_rs_lock under rs->rs_lock */ spin_lock_irqsave(&rm->m_rs_lock, flags2); + /* If this is a RDMA operation, notify the app. */ + spin_lock(&rs->rs_lock); + __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); + spin_unlock(&rs->rs_lock); rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags2); @@ -683,6 +682,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) if (conn) spin_unlock_irqrestore(&conn->c_lock, flags); + if (wake) + rds_wake_sk_sleep(rs); + while (!list_empty(&list)) { rm = list_entry(list.next, struct rds_message, m_sock_item); list_del_init(&rm->m_sock_item); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index c4dab9c..5734f94 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -657,7 +657,7 @@ out: } module_init(rds_init); -#define DRV_VERSION "4.0" +#define DRV_VERSION "4.0-ora-1.4.2-20" #define DRV_RELDATE "Feb 12, 2009" MODULE_AUTHOR("Oracle Corporation ");