[rds-devel] [PATCH] rds: bug fixes for iwarp dev tree

Jon Mason jon at opengridcomputing.com
Tue Dec 9 15:21:30 PST 2008


Hey Andy,
This patch fixes some of the issues in the RDS iWARP code.

Specifically, it fixes an issue where iWARP will stall on zcopy runs due
to a IB_WC_LOCAL_INV or IB_WC_FAST_REG_MR happening on wr_id 0 (which is
a legitimate possibility).  Setting it to the max value should prevent
this from happening until a better fix comes along.

Also, the len was not being properly set for fastreg ops, which was
causing random memory corruption.  The len is being determined by the
length of the user buffer.  This was not being tracked previously.

Finally, the send fastreg wr can be chained onto the send wr going out.
This should make it more efficient due to less post_send calls.

This patch is based on the origin/future/2008111201 branch.

Thanks,
Jon

Signed-Off-By: Jon Mason <jon at opengridcomputing.com>

diff --git a/drivers/infiniband/ulp/rds/iw_rdma.c b/drivers/infiniband/ulp/rds/iw_rdma.c
index 76109d8..0898bf8 100644
--- a/drivers/infiniband/ulp/rds/iw_rdma.c
+++ b/drivers/infiniband/ulp/rds/iw_rdma.c
@@ -965,6 +965,7 @@ static int rds_ib_rdma_build_fastreg(struct rds_ib_mapping *mapping)
 	mapping->m_rkey = ibmr->fr_mr->rkey;
 
 	memset(&f_wr, 0, sizeof(f_wr));
+	f_wr.wr_id = ~0;
 	f_wr.opcode = IB_WR_FAST_REG_MR;
 	f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
 	f_wr.wr.fast_reg.rkey = mapping->m_rkey;
@@ -999,6 +1000,7 @@ static int rds_ib_rdma_fastreg_inv(struct rds_ib_mr *ibmr)
 		goto out;
 
 	memset(&s_wr, 0, sizeof(s_wr));
+	s_wr.wr_id = ~0;
 	s_wr.opcode = IB_WR_LOCAL_INV;
 	s_wr.ex.invalidate_rkey = ibmr->fr_mr->rkey;
 	s_wr.send_flags = IB_SEND_SIGNALED;
diff --git a/drivers/infiniband/ulp/rds/iw_send.c b/drivers/infiniband/ulp/rds/iw_send.c
index c67a7be..13d01df 100644
--- a/drivers/infiniband/ulp/rds/iw_send.c
+++ b/drivers/infiniband/ulp/rds/iw_send.c
@@ -227,12 +227,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
 			break;
 		}
 
-		if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == 0) {
+		if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == ~0) {
 			ic->i_fastreg_posted = 0;
 			continue;
 		}
 
-		if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == 0) {
+		if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == ~0) {
 			ic->i_fastreg_posted = 1;
 			continue;
 		}
@@ -752,11 +752,11 @@ out:
 	return ret;
 }
 
-static int rds_ib_build_send_fastreg(struct rds_ib_device *rds_ibdev, struct rds_ib_connection *ic, struct rds_ib_send_work *send, int nent, int len, u64 sg_addr)
+static void rds_ib_build_send_fastreg(struct rds_ib_device *rds_ibdev,
+		struct rds_ib_connection *ic, struct rds_ib_send_work *send, int nent,
+		int len, u64 sg_addr)
 {
-	struct ib_send_wr *failed_wr;
-	int ret;
-
+	BUG_ON(nent > send->s_page_list->max_page_list_len);
 	/*
 	 * Perform a WR for the fast_reg_mr. Each individual page
 	 * in the sg list is added to the fast reg page list and placed
@@ -771,19 +771,7 @@ static int rds_ib_build_send_fastreg(struct rds_ib_device *rds_ibdev, struct rds
 	send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
 	send->s_wr.wr.fast_reg.iova_start = sg_addr;
 
-	failed_wr = &send->s_wr;
-	ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
-	BUG_ON(failed_wr != &send->s_wr);
-	if (ret) {
-		printk(KERN_WARNING "RDS/IB: %s %d ib_post_send returned %d\n",
-			__func__, __LINE__, ret);
-		goto out;
-	}
-
 	ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
-
-out:
-	return ret;
 }
 
 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
@@ -850,8 +838,12 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
 	}
 
 	send = &ic->i_sends[pos];
-	first = send;
-	prev = NULL;
+	if (!op->r_write && ic->i_iwarp) {
+		first = prev = &ic->i_sends[fr_pos];
+	} else {
+		first = send;
+		prev = NULL;
+	}
 	scat = &op->r_sg[0];
 	sent = 0;
 	num_sge = op->r_count;
@@ -896,12 +888,12 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
 			prev->s_wr.next = &send->s_wr;
 
 		for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) {
-			len = sg_dma_len(scat);
+			len = ib_sg_dma_len(ic->i_cm_id->device, scat);
 
 			if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
-				send->s_page_list->page_list[j] = sg_dma_address(scat);
+				send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
 			else {
-				send->s_sge[j].addr = sg_dma_address(scat);
+				send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
 				send->s_sge[j].length = len;
 				send->s_sge[j].lkey = rds_ib_local_dma_lkey(ic);
 			}
@@ -916,6 +908,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
 		if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
 			send->s_wr.num_sge = 1;
 			send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
+			send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
 			send->s_sge[0].lkey = ((struct rds_ib_send_work)ic->i_sends[fr_pos]).s_mr->lkey;
 		}
 
@@ -929,7 +922,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
 
 	/* if we finished the message then send completion owns it */
 	if (scat == &op->r_sg[op->r_count])
-		prev->s_wr.send_flags = IB_SEND_SIGNALED;
+		first->s_wr.send_flags = IB_SEND_SIGNALED;
 
 	if (i < work_alloc) {
 		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
diff --git a/drivers/infiniband/ulp/rds/rdma.c b/drivers/infiniband/ulp/rds/rdma.c
index d15d6a9..a5dbd8b 100644
--- a/drivers/infiniband/ulp/rds/rdma.c
+++ b/drivers/infiniband/ulp/rds/rdma.c
@@ -543,6 +543,7 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
 		}
 
 		rs->rs_user_addr = vec.addr;
+		rs->rs_user_bytes = vec.bytes;
 
 		/* did the user change the vec under us? */
 		if (nr > max_pages || op->r_nents + nr > nr_pages) {
diff --git a/drivers/infiniband/ulp/rds/rds.h b/drivers/infiniband/ulp/rds/rds.h
index 6406083..013a7b9 100644
--- a/drivers/infiniband/ulp/rds/rds.h
+++ b/drivers/infiniband/ulp/rds/rds.h
@@ -384,6 +384,8 @@ struct rds_sock {
 #endif
 
 	u64			rs_user_addr;
+	u64			rs_user_bytes;
+
 	/*
 	 * bound_addr used for both incoming and outgoing, no INADDR_ANY
 	 * support.



More information about the rds-devel mailing list