[rds-devel] [PATCH RFC RDS/IB] version 2. increase number of unsignaled work requests

Vladimir Sokolovsky vlad at dev.mellanox.co.il
Thu Jan 18 04:03:32 PST 2007


Hi,
Here is the version 2 of the patch which enables number of unsignaled work requests configured by rds_ib_sysctl_max_unsig_wrs parameter:

Receiving side:
        Request completions for solicited events only.
Sending side: 
        Added rds_ib_sysctl_max_unsig_wrs sysctl parameter. The default value is a quarter of the sending ring size.
        Post every rds_ib_sysctl_max_unsig_wrs-th fragment and the last fragment with the signaled and solicited event bit.

Signed-off-by: Vladimir Sokolovsky <vlad at mellanox.co.il>

Index: linux/net/rds/ib.h
===================================================================
--- linux/net/rds/ib.h	(revision 177)
+++ linux/net/rds/ib.h	(working copy)
@@ -155,6 +155,7 @@
 void rds_ib_sysctl_exit(void);
 extern unsigned long rds_ib_sysctl_max_send_wr;
 extern unsigned long rds_ib_sysctl_max_recv_wr;
+extern unsigned long rds_ib_sysctl_max_unsig_wrs;
 extern ctl_table rds_ib_sysctl_table[];
 
 #endif
Index: linux/net/rds/ib_send.c
===================================================================
--- linux/net/rds/ib_send.c	(revision 177)
+++ linux/net/rds/ib_send.c	(working copy)
@@ -51,7 +51,7 @@
 		send->s_wr.sg_list = send->s_sge;
 		send->s_wr.num_sge = 1;
 		send->s_wr.opcode = IB_WR_SEND;
-		send->s_wr.send_flags = IB_SEND_SIGNALED;
+		send->s_wr.send_flags = 0;
 		send->s_wr.imm_data = 0;
 
 		send->s_sge[0].lkey = ic->i_mr->lkey;
@@ -86,25 +86,40 @@
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct ib_wc wc;
 	struct rds_ib_send_work *send;
+	u32 completed;
+	u32 oldest;
+	u32 i = 0;
+	int ret;
 
 	rdsdebug("cq %p conn %p\n", cq, conn);
 	rds_ib_stats_inc(s_ib_tx_cq_call);
+	ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	if (ret) {
+		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
+	}
 
-	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
 	while (ib_poll_cq(cq, 1, &wc) > 0 ) {
 		rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
 			 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
 			 be32_to_cpu(wc.imm_data));
 		rds_ib_stats_inc(s_ib_tx_cq_event);
 
-		send = &ic->i_sends[rds_ib_ring_oldest(&ic->i_send_ring)];
+		oldest = rds_ib_ring_oldest(&ic->i_send_ring);
+		send = &ic->i_sends[oldest];
 
-		if (send->s_rm)
-			rds_ib_send_unmap_rm(ic, send);
-		send->s_wr.num_sge = 1;
+		if (oldest <= (unsigned long long)wc.wr_id)
+			completed = (unsigned long long)wc.wr_id - oldest + 1;
+		else
+			completed = ic->i_send_ring.w_nr - oldest + (unsigned long long)wc.wr_id + 1;
 
-		rds_ib_ring_free(&ic->i_send_ring, 1);
+		for (; i < completed; i++) {
+			if (send->s_rm)
+				rds_ib_send_unmap_rm(ic, send);
+			send->s_wr.num_sge = 1;
+			if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
+				send = ic->i_sends;
+			rds_ib_ring_free(&ic->i_send_ring, 1);
+		}
 
 		/* We expect errors as the qp is drained during shutdown */
 		if (wc.status != IB_WC_SUCCESS && !ic->i_wc_err) {
@@ -146,6 +161,7 @@
 	u32 work_alloc;
 	int sent;
 	int ret;
+	static u32 unsignaled_wrs_count;
 
 	BUG_ON(off % RDS_FRAG_SIZE);
 	BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -155,6 +171,7 @@
 		rm->m_count = dma_map_sg(ic->i_cm_id->device->dma_device,
 					 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
 		rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
+		unsignaled_wrs_count = 0;
 		if (rm->m_count == 0) {
 			rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
 			ret = -ENOMEM; /* XXX ? */
@@ -211,6 +228,15 @@
 	/* if there's data reference it with a chain of work reqs */
 	for(; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
 
+		++unsignaled_wrs_count;
+
+		if ( rds_ib_sysctl_max_unsig_wrs > 0 && unsignaled_wrs_count >= rds_ib_sysctl_max_unsig_wrs ) {
+			unsignaled_wrs_count = 0;
+			send->s_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+		}
+		else
+			send->s_wr.send_flags = 0;
+
 		send->s_wr.next = NULL;
 		if (prev)
 			prev->s_wr.next = &send->s_wr;
@@ -237,6 +263,7 @@
 	/* if we finished the message then send completion owns it */
 	if (scat == &rm->m_sg[rm->m_count]) {
 		prev->s_rm = ic->i_rm;
+		prev->s_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 		ic->i_rm = NULL;
 	}
 
Index: linux/net/rds/ib_recv.c
===================================================================
--- linux/net/rds/ib_recv.c	(revision 177)
+++ linux/net/rds/ib_recv.c	(working copy)
@@ -465,8 +465,7 @@
 
 	rds_ib_stats_inc(s_ib_rx_cq_call);
 
-	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
+	ib_req_notify_cq(cq, IB_CQ_SOLICITED);
 	while (ib_poll_cq(cq, 1, &wc) > 0 ) {
 		rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
 			 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
Index: linux/net/rds/ib_sysctl.c
===================================================================
--- linux/net/rds/ib_sysctl.c	(revision 177)
+++ linux/net/rds/ib_sysctl.c	(working copy)
@@ -31,6 +31,11 @@
 /* hardware will fail CQ creation long before this */
 static unsigned long rds_ib_sysctl_max_wr_max = (u32)~0;
 
+/* default to rds_ib_sysctl_max_send_wr/4 */
+unsigned long rds_ib_sysctl_max_unsig_wrs = PAGE_SIZE / (4 * sizeof(struct ib_send_wr));
+static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
+static unsigned long rds_ib_sysctl_max_unsig_wr_max = PAGE_SIZE / sizeof(struct ib_send_wr);
+
 ctl_table rds_ib_sysctl_table[] = {
 	{
 		.ctl_name       = 1,
@@ -52,6 +57,16 @@
 		.extra1		= &rds_ib_sysctl_max_wr_min,
 		.extra2		= &rds_ib_sysctl_max_wr_max,
 	},
+	{
+		.ctl_name       = 3,
+		.procname       = "max_unsignaled_wr",
+		.data		= &rds_ib_sysctl_max_unsig_wrs,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = &proc_doulongvec_minmax,
+		.extra1		= &rds_ib_sysctl_max_unsig_wr_min,
+		.extra2		= &rds_ib_sysctl_max_unsig_wr_max,
+	},
 	{ .ctl_name = 0}
 };
 
Index: linux/net/rds/ib_cm.c
===================================================================
--- linux/net/rds/ib_cm.c	(revision 177)
+++ linux/net/rds/ib_cm.c	(working copy)
@@ -116,7 +116,7 @@
 		goto out;
 	}
 
-	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_NEXT_COMP);
+	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
 	if (ret) {
 		rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
 		goto out;
Index: linux/net/rds/ib_ring.c
===================================================================
--- linux/net/rds/ib_ring.c	(revision 177)
+++ linux/net/rds/ib_ring.c	(working copy)
@@ -135,7 +135,11 @@
 
 	spin_lock_irqsave(&ring->w_lock, flags);
 	BUG_ON(ring->w_nr_free == ring->w_nr);
-	ret = (ring->w_next_free + ring->w_nr_free) % ring->w_nr;
+	if (ring->w_next_free + ring->w_nr_free < ring->w_nr)
+		ret = ring->w_next_free + ring->w_nr_free;
+	else
+		ret = ring->w_next_free + ring->w_nr_free - ring->w_nr;
+
 	rdsdebug("ring %p ret %u next %u free %u\n", ring, ret,
 		 ring->w_next_free, ring->w_nr_free);
 	spin_unlock_irqrestore(&ring->w_lock, flags);




More information about the rds-devel mailing list