[rds-devel] [PATCH 1/2] RDS/IB: prepare IB specific header extensions

Olaf Kirch olaf.kirch at oracle.com
Tue Nov 13 08:24:48 PST 2007


From: Olaf Kirch <olaf.kirch at oracle.com>

[EXPERIMENTAL] RDS/IB: prepare IB specific header extensions

This patch prepares the scaffolding for RDS/IB flow control.
It creates a struct rds_ib_header, which contains the RDS
header itself, and will provide room for IB specific extensions.

The patch does the necessary code changes to use the new type
instead of struct rds_header, where necessary.

Signed-off-by: Olaf Kirch <olaf.kirch at oracle.com>
---
 net/rds/ib.h      |   13 ++++++++++---
 net/rds/ib_cm.c   |   12 ++++++------
 net/rds/ib_recv.c |   41 ++++++++++++++++++++++-------------------
 net/rds/ib_send.c |   13 +++++++++----
 4 files changed, 47 insertions(+), 32 deletions(-)

Index: ofa_kernel-1.2.5.1/net/rds/ib.h
===================================================================
--- ofa_kernel-1.2.5.1.orig/net/rds/ib.h
+++ ofa_kernel-1.2.5.1/net/rds/ib.h
@@ -53,6 +53,13 @@ struct rds_ib_work_ring {
 	u32		w_nr_free;
 };
 
+/* The IB transport extends the RDS header */
+struct rds_ib_header {
+	struct rds_header	i_base;
+
+	/* IB specific fields to be added by follow-up patch */
+};
+
 struct rds_ib_connection {
 	/* alphabet soup, IBTA style */
 	struct rdma_cm_id	*i_cm_id;
@@ -64,7 +71,7 @@ struct rds_ib_connection {
 	/* tx */
 	struct rds_ib_work_ring	i_send_ring;
 	struct rds_message	*i_rm;
-	struct rds_header	*i_send_hdrs;
+	struct rds_ib_header	*i_send_hdrs;
 	dma_addr_t 		i_send_hdrs_dma;
 	struct rds_ib_send_work *i_sends;
 
@@ -73,7 +80,7 @@ struct rds_ib_connection {
 	struct rds_ib_work_ring	i_recv_ring;
 	struct rds_ib_incoming	*i_ibinc;
 	u32			i_recv_data_rem;
-	struct rds_header	*i_recv_hdrs;
+	struct rds_ib_header	*i_recv_hdrs;
 	dma_addr_t 		i_recv_hdrs_dma;
 	struct rds_ib_recv_work *i_recvs;
 	struct rds_page_frag	i_frag;
@@ -89,7 +96,7 @@ struct rds_ib_connection {
 	atomic64_t		i_ack_next;	/* next ACK to send */
 #endif
 	u64			i_ack_sent;	/* last ACK sent */
-	struct rds_header	*i_ack;
+	struct rds_ib_header	*i_ack;
 	struct ib_send_wr	i_ack_wr;
 	struct ib_sge		i_ack_sge;
 	dma_addr_t 		i_ack_dma;
Index: ofa_kernel-1.2.5.1/net/rds/ib_cm.c
===================================================================
--- ofa_kernel-1.2.5.1.orig/net/rds/ib_cm.c
+++ ofa_kernel-1.2.5.1/net/rds/ib_cm.c
@@ -185,7 +185,7 @@ static int rds_ib_setup_qp(struct rds_co
 
 	ic->i_send_hdrs = dma_alloc_coherent(ic->i_cm_id->device->dma_device,
 					   ic->i_send_ring.w_nr *
-					   	sizeof(struct rds_header),
+					   	sizeof(struct rds_ib_header),
 					   &ic->i_send_hdrs_dma, GFP_KERNEL);
 	if (ic->i_send_hdrs == NULL) {
 		ret = -ENOMEM;
@@ -195,7 +195,7 @@ static int rds_ib_setup_qp(struct rds_co
 
 	ic->i_recv_hdrs = dma_alloc_coherent(ic->i_cm_id->device->dma_device,
 					   ic->i_recv_ring.w_nr *
-					   	sizeof(struct rds_header),
+					   	sizeof(struct rds_ib_header),
 					   &ic->i_recv_hdrs_dma, GFP_KERNEL);
 	if (ic->i_recv_hdrs == NULL) {
 		ret = -ENOMEM;
@@ -204,7 +204,7 @@ static int rds_ib_setup_qp(struct rds_co
 	}
 
 	ic->i_ack = dma_alloc_coherent(ic->i_cm_id->device->dma_device,
-				       sizeof(struct rds_header),
+				       sizeof(struct rds_ib_header),
 				       &ic->i_ack_dma, GFP_KERNEL);
 	if (ic->i_ack == NULL) {
 		ret = -ENOMEM;
@@ -541,20 +541,20 @@ void rds_ib_conn_shutdown(struct rds_con
 		if (ic->i_send_hdrs)
 			dma_free_coherent(ic->i_cm_id->device->dma_device,
 					   ic->i_send_ring.w_nr *
-					   	sizeof(struct rds_header),
+					   	sizeof(struct rds_ib_header),
 					   ic->i_send_hdrs,
 					   ic->i_send_hdrs_dma);
 
 		if (ic->i_recv_hdrs)
 			dma_free_coherent(ic->i_cm_id->device->dma_device,
 					   ic->i_recv_ring.w_nr *
-					   	sizeof(struct rds_header),
+					   	sizeof(struct rds_ib_header),
 					   ic->i_recv_hdrs,
 					   ic->i_recv_hdrs_dma);
 
 		if (ic->i_ack)
 			dma_free_coherent(ic->i_cm_id->device->dma_device,
-					  sizeof(struct rds_header),
+					  sizeof(struct rds_ib_header),
 					  ic->i_ack, ic->i_ack_dma);
 
 		if (ic->i_map_count)
Index: ofa_kernel-1.2.5.1/net/rds/ib_recv.c
===================================================================
--- ofa_kernel-1.2.5.1.orig/net/rds/ib_recv.c
+++ ofa_kernel-1.2.5.1/net/rds/ib_recv.c
@@ -97,8 +97,8 @@ void rds_ib_recv_init_ring(struct rds_ib
 		recv->r_sge[0].lkey = ic->i_mr->lkey;
 
 		recv->r_sge[1].addr = ic->i_recv_hdrs_dma +
-				      (i * sizeof(struct rds_header));
-		recv->r_sge[1].length = sizeof(struct rds_header);
+				      (i * sizeof(struct rds_ib_header));
+		recv->r_sge[1].length = sizeof(struct rds_ib_header);
 		recv->r_sge[1].lkey = ic->i_mr->lkey;
 	}
 }
@@ -343,7 +343,7 @@ void rds_ib_recv_init_ack(struct rds_ib_
 	struct ib_sge *sge = &ic->i_ack_sge;
 
 	sge->addr = ic->i_ack_dma;
-	sge->length = sizeof(struct rds_header);
+	sge->length = sizeof(struct rds_ib_header);
 	sge->lkey = ic->i_mr->lkey;
 
 	wr->sg_list = sge;
@@ -424,7 +424,8 @@ static u64 rds_ib_get_ack(struct rds_ib_
 
 static void rds_ib_send_ack(struct rds_ib_connection *ic)
 {
-	struct rds_header *hdr = ic->i_ack;
+	struct rds_ib_header *ihdr = ic->i_ack;
+	struct rds_header *hdr = &ihdr->i_base;
 	struct ib_send_wr *failed_wr;
 	u64 seq;
 	int ret;
@@ -530,7 +531,7 @@ u64 rds_ib_piggyb_ack(struct rds_ib_conn
  * have to zero the header and possibly the rest of the page.
  */
 static int rds_ib_copy_header(struct rds_connection *conn,
-			       struct rds_header *hdr,
+			       struct rds_ib_header *hdr,
 			       struct rds_ib_recv_work *recv, u32 start)
 {
 	struct rds_ib_connection *ic = conn->c_transport_data;
@@ -541,7 +542,7 @@ static int rds_ib_copy_header(struct rds
 	/* get the start of the header from the tail of the fragment */
 	if (start < RDS_FRAG_SIZE) {
 		len = min_t(u32, RDS_FRAG_SIZE - start,
-				  sizeof(struct rds_header));
+				  sizeof(struct rds_ib_header));
 		addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
 		memcpy(dst,
 		       addr + recv->r_frag->f_offset + start, 
@@ -551,14 +552,14 @@ static int rds_ib_copy_header(struct rds
 	}
 
 	/* and the rest that might have spilled into the posted header space */
-	if (len < sizeof(struct rds_header)) {
+	if (len < sizeof(struct rds_ib_header)) {
 		memcpy(dst,
 		       &ic->i_recv_hdrs[recv - ic->i_recvs],
-		       sizeof(struct rds_header) - len);
+		       sizeof(struct rds_ib_header) - len);
 	}
 
 	/* Validate the checksum. */
-	if (!rds_message_verify_checksum(hdr)) {
+	if (!rds_message_verify_checksum(&hdr->i_base)) {
 		rds_ib_conn_error(conn, "incoming message "
 		       "from %u.%u.%u.%u has corrupted header - "
 		       "forcing a reconnect\n",
@@ -649,14 +650,15 @@ static void rds_ib_process_recv(struct r
 {
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct rds_ib_incoming *ibinc = ic->i_ibinc;
-	struct rds_header hdr_buf, *hdr;
+	struct rds_ib_header hdr_buf;
+	struct rds_header *hdr;
 
 	/* XXX shut down the connection if port 0,0 are seen? */
 
 	rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
 		 byte_len);
 
-	if (byte_len < sizeof(struct rds_header)) {
+	if (byte_len < sizeof(struct rds_ib_header)) {
 		rds_ib_conn_error(conn, "incoming message "
 		       "from %u.%u.%u.%u didn't inclue a "
 		       "header, disconnecting and "
@@ -664,16 +666,17 @@ static void rds_ib_process_recv(struct r
 		       NIPQUAD(conn->c_faddr));
 		return;
 	}
-	byte_len -= sizeof(struct rds_header);
+	byte_len -= sizeof(struct rds_ib_header);
 
 	if (!rds_ib_copy_header(conn, &hdr_buf, recv, byte_len))
 		return;
+	hdr = &hdr_buf.i_base;
 
 	/* Process the ACK sequence which comes with every packet */
-	state->ack_recv = be64_to_cpu(hdr_buf.h_ack);
+	state->ack_recv = be64_to_cpu(hdr->h_ack);
 	state->ack_recv_valid = 1;
 
-	if (hdr_buf.h_sport == 0 && hdr_buf.h_dport == 0 && byte_len == 0) {
+	if (hdr->h_sport == 0 && hdr->h_dport == 0 && byte_len == 0) {
 		/* This is an ACK-only packet. The fact that it gets
 		 * special treatment here is that historically, ACKs
 		 * were rather special beasts.
@@ -706,7 +709,7 @@ static void rds_ib_process_recv(struct r
 		ic->i_ibinc = ibinc;
 
 		hdr = &ibinc->ii_inc.i_hdr;
-		memcpy(hdr, &hdr_buf, sizeof(*hdr));
+		memcpy(hdr, &hdr_buf.i_base, sizeof(*hdr));
 		ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
 
 		rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
@@ -715,10 +718,10 @@ static void rds_ib_process_recv(struct r
 		hdr = &ibinc->ii_inc.i_hdr;
 		/* We can't just use memcmp here; fragments of a
 		 * single message may carry different ACKs */
-		if (hdr->h_sequence != hdr_buf.h_sequence
-		 || hdr->h_len != hdr_buf.h_len
-		 || hdr->h_sport != hdr_buf.h_sport
-		 || hdr->h_dport != hdr_buf.h_dport) {
+		if (hdr->h_sequence != hdr_buf.i_base.h_sequence
+		 || hdr->h_len != hdr_buf.i_base.h_len
+		 || hdr->h_sport != hdr_buf.i_base.h_sport
+		 || hdr->h_dport != hdr_buf.i_base.h_dport) {
 			rds_ib_conn_error(conn,
 				"fragment header mismatch; forcing reconnect\n");
 			return;
Index: ofa_kernel-1.2.5.1/net/rds/ib_send.c
===================================================================
--- ofa_kernel-1.2.5.1.orig/net/rds/ib_send.c
+++ ofa_kernel-1.2.5.1/net/rds/ib_send.c
@@ -67,8 +67,8 @@ void rds_ib_send_init_ring(struct rds_ib
 		send->s_sge[0].lkey = ic->i_mr->lkey;
 
 		send->s_sge[1].addr = ic->i_send_hdrs_dma +
-				      (i * sizeof(struct rds_header));
-		send->s_sge[1].length = sizeof(struct rds_header);
+				      (i * sizeof(struct rds_ib_header));
+		send->s_sge[1].length = sizeof(struct rds_ib_header);
 		send->s_sge[1].lkey = ic->i_mr->lkey;
 	}
 }
@@ -159,6 +159,7 @@ int rds_ib_xmit_cong_map(struct rds_conn
 	struct rds_ib_send_work *prev;
 	struct ib_send_wr *failed_wr;
 	struct scatterlist *scat;
+	struct rds_ib_header *ihdr;
 	struct rds_header *hdr;
 	unsigned long i;
 	unsigned int off;
@@ -228,7 +229,8 @@ int rds_ib_xmit_cong_map(struct rds_conn
 		}
 
 		/* build the header and include it in the wr */
-		hdr = &ic->i_send_hdrs[pos];
+		ihdr = &ic->i_send_hdrs[pos];
+		hdr = &ihdr->i_base;
 		memset(hdr, 0, sizeof(struct rds_header));
 		hdr->h_flags = RDS_FLAG_CONG_BITMAP;
 		hdr->h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
@@ -362,6 +364,8 @@ int rds_ib_xmit(struct rds_connection *c
 
 	/* if there's data reference it with a chain of work reqs */
 	for(; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
+		struct rds_ib_header *ihdr;
+
 		send->s_wr.send_flags = 0;
 		send->s_queued = jiffies;
 
@@ -401,7 +405,8 @@ int rds_ib_xmit(struct rds_connection *c
 add_header:
 		/* Tack on the header after the data. send->s_sge[1] should already
 		 * have been set up to point to the right header buffer. */
-		memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
+		ihdr = &ic->i_send_hdrs[pos];
+		memcpy(&ihdr->i_base, &rm->m_inc.i_hdr, sizeof(struct rds_header));
 		send->s_wr.num_sge = 2;
 
 		prev = send;

-- 
Olaf Kirch  |  --- o --- Nous sommes du soleil we love when we play
okir at lst.de |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax



More information about the rds-devel mailing list