[rds-devel] [PATCH RFC RDS/IB] version 2. increase number of
unsignaled work requests
Vladimir Sokolovsky
vlad at dev.mellanox.co.il
Thu Jan 18 04:03:32 PST 2007
Hi,
Here is the version 2 of the patch which enables number of unsignaled work requests configured by rds_ib_sysctl_max_unsig_wrs parameter:
Receiving side:
Request completions for solicited events only.
Sending side:
Added rds_ib_sysctl_max_unsig_wrs sysctl parameter. The default value is a quarter of the sending ring size.
Post every rds_ib_sysctl_max_unsig_wrs-th fragment and the last fragment with the signaled and solicited event bit.
Signed-off-by: Vladimir Sokolovsky <vlad at mellanox.co.il>
Index: linux/net/rds/ib.h
===================================================================
--- linux/net/rds/ib.h (revision 177)
+++ linux/net/rds/ib.h (working copy)
@@ -155,6 +155,7 @@
void rds_ib_sysctl_exit(void);
extern unsigned long rds_ib_sysctl_max_send_wr;
extern unsigned long rds_ib_sysctl_max_recv_wr;
+extern unsigned long rds_ib_sysctl_max_unsig_wrs;
extern ctl_table rds_ib_sysctl_table[];
#endif
Index: linux/net/rds/ib_send.c
===================================================================
--- linux/net/rds/ib_send.c (revision 177)
+++ linux/net/rds/ib_send.c (working copy)
@@ -51,7 +51,7 @@
send->s_wr.sg_list = send->s_sge;
send->s_wr.num_sge = 1;
send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.send_flags = IB_SEND_SIGNALED;
+ send->s_wr.send_flags = 0;
send->s_wr.imm_data = 0;
send->s_sge[0].lkey = ic->i_mr->lkey;
@@ -86,25 +86,40 @@
struct rds_ib_connection *ic = conn->c_transport_data;
struct ib_wc wc;
struct rds_ib_send_work *send;
+ u32 completed;
+ u32 oldest;
+ u32 i = 0;
+ int ret;
rdsdebug("cq %p conn %p\n", cq, conn);
rds_ib_stats_inc(s_ib_tx_cq_call);
+ ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
+ }
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
while (ib_poll_cq(cq, 1, &wc) > 0 ) {
rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
(unsigned long long)wc.wr_id, wc.status, wc.byte_len,
be32_to_cpu(wc.imm_data));
rds_ib_stats_inc(s_ib_tx_cq_event);
- send = &ic->i_sends[rds_ib_ring_oldest(&ic->i_send_ring)];
+ oldest = rds_ib_ring_oldest(&ic->i_send_ring);
+ send = &ic->i_sends[oldest];
- if (send->s_rm)
- rds_ib_send_unmap_rm(ic, send);
- send->s_wr.num_sge = 1;
+ if (oldest <= (unsigned long long)wc.wr_id)
+ completed = (unsigned long long)wc.wr_id - oldest + 1;
+ else
+ completed = ic->i_send_ring.w_nr - oldest + (unsigned long long)wc.wr_id + 1;
- rds_ib_ring_free(&ic->i_send_ring, 1);
+ for (; i < completed; i++) {
+ if (send->s_rm)
+ rds_ib_send_unmap_rm(ic, send);
+ send->s_wr.num_sge = 1;
+ if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
+ send = ic->i_sends;
+ rds_ib_ring_free(&ic->i_send_ring, 1);
+ }
/* We expect errors as the qp is drained during shutdown */
if (wc.status != IB_WC_SUCCESS && !ic->i_wc_err) {
@@ -146,6 +161,7 @@
u32 work_alloc;
int sent;
int ret;
+ static u32 unsignaled_wrs_count;
BUG_ON(off % RDS_FRAG_SIZE);
BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -155,6 +171,7 @@
rm->m_count = dma_map_sg(ic->i_cm_id->device->dma_device,
rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
+ unsignaled_wrs_count = 0;
if (rm->m_count == 0) {
rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
ret = -ENOMEM; /* XXX ? */
@@ -211,6 +228,15 @@
/* if there's data reference it with a chain of work reqs */
for(; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
+ ++unsignaled_wrs_count;
+
+ if ( rds_ib_sysctl_max_unsig_wrs > 0 && unsignaled_wrs_count >= rds_ib_sysctl_max_unsig_wrs ) {
+ unsignaled_wrs_count = 0;
+ send->s_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ }
+ else
+ send->s_wr.send_flags = 0;
+
send->s_wr.next = NULL;
if (prev)
prev->s_wr.next = &send->s_wr;
@@ -237,6 +263,7 @@
/* if we finished the message then send completion owns it */
if (scat == &rm->m_sg[rm->m_count]) {
prev->s_rm = ic->i_rm;
+ prev->s_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
}
Index: linux/net/rds/ib_recv.c
===================================================================
--- linux/net/rds/ib_recv.c (revision 177)
+++ linux/net/rds/ib_recv.c (working copy)
@@ -465,8 +465,7 @@
rds_ib_stats_inc(s_ib_rx_cq_call);
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
+ ib_req_notify_cq(cq, IB_CQ_SOLICITED);
while (ib_poll_cq(cq, 1, &wc) > 0 ) {
rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
(unsigned long long)wc.wr_id, wc.status, wc.byte_len,
Index: linux/net/rds/ib_sysctl.c
===================================================================
--- linux/net/rds/ib_sysctl.c (revision 177)
+++ linux/net/rds/ib_sysctl.c (working copy)
@@ -31,6 +31,11 @@
/* hardware will fail CQ creation long before this */
static unsigned long rds_ib_sysctl_max_wr_max = (u32)~0;
+/* default to rds_ib_sysctl_max_send_wr/4 */
+unsigned long rds_ib_sysctl_max_unsig_wrs = PAGE_SIZE / (4 * sizeof(struct ib_send_wr));
+static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
+static unsigned long rds_ib_sysctl_max_unsig_wr_max = PAGE_SIZE / sizeof(struct ib_send_wr);
+
ctl_table rds_ib_sysctl_table[] = {
{
.ctl_name = 1,
@@ -52,6 +57,16 @@
.extra1 = &rds_ib_sysctl_max_wr_min,
.extra2 = &rds_ib_sysctl_max_wr_max,
},
+ {
+ .ctl_name = 3,
+ .procname = "max_unsignaled_wr",
+ .data = &rds_ib_sysctl_max_unsig_wrs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ .extra1 = &rds_ib_sysctl_max_unsig_wr_min,
+ .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
+ },
{ .ctl_name = 0}
};
Index: linux/net/rds/ib_cm.c
===================================================================
--- linux/net/rds/ib_cm.c (revision 177)
+++ linux/net/rds/ib_cm.c (working copy)
@@ -116,7 +116,7 @@
goto out;
}
- ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_NEXT_COMP);
+ ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
if (ret) {
rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
goto out;
Index: linux/net/rds/ib_ring.c
===================================================================
--- linux/net/rds/ib_ring.c (revision 177)
+++ linux/net/rds/ib_ring.c (working copy)
@@ -135,7 +135,11 @@
spin_lock_irqsave(&ring->w_lock, flags);
BUG_ON(ring->w_nr_free == ring->w_nr);
- ret = (ring->w_next_free + ring->w_nr_free) % ring->w_nr;
+ if (ring->w_next_free + ring->w_nr_free < ring->w_nr)
+ ret = ring->w_next_free + ring->w_nr_free;
+ else
+ ret = ring->w_next_free + ring->w_nr_free - ring->w_nr;
+
rdsdebug("ring %p ret %u next %u free %u\n", ring, ret,
ring->w_next_free, ring->w_nr_free);
spin_unlock_irqrestore(&ring->w_lock, flags);
More information about the rds-devel
mailing list