[rds-devel] [PATCH] cxgb3: add iscsi patch for suse.

Steve Wise swise at opengridcomputing.com
Thu Jan 8 13:24:10 PST 2009


From: Steve Wise <swise at opengridcomputing.com>

Apply this to a ofed-1.4 kernel tree, then configure and build as 
normal and the ofed cxgb3 module should support both RDMA and iSCSI.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
Signed-off-by: Karen Xie <kxie at chelsio.com>
---

 0 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/kernel_patches/fixes/cxgb3_00500_iscsi.patch b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
new file mode 100644
index 0000000..4db8816
--- /dev/null
+++ b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
@@ -0,0 +1,302 @@
+cxgb3 - add support for private iscsi ip address.
+
+From: Karen Xie <kxie at chelsio.com>
+
+The accelerated iSCSI traffic uses a private IP address unknown to the OS.
+The driver has to reply to ARP requests dedicated to the private IP address.
+
+Signed-off-by: Karen Xie <kxie at chelsio.com>
+---
+
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/adapter.h linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h
+--- linux-2.6.27.orig/drivers/net/cxgb3/adapter.h	2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h	2008-11-20 13:27:01.000000000 -0800
+@@ -64,6 +64,7 @@
+ 	struct link_config link_config;
+ 	struct net_device_stats netstats;
+ 	int activity;
++	__be32 iscsi_ipv4addr;
+ };
+ 
+ enum {				/* adapter flags */
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h
+--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h	2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h	2008-11-20 13:27:03.000000000 -0800
+@@ -57,6 +57,7 @@
+ 	RDMA_GET_MIB		= 19,
+ 
+ 	GET_RX_PAGE_INFO	= 50,
++	GET_ISCSI_IPV4ADDR	= 51,
+ };
+ 
+ /*
+@@ -86,6 +87,12 @@
+ 	u16 vlan_tag;
+ };
+ 
++/* Structure used to request a port's iSCSI IPv4 address */
++struct iscsi_ipv4addr {
++	struct net_device *dev;	/* the net_device */
++	__be32 ipv4addr;	/* the return iSCSI IPv4 address */
++};
++
+ struct pci_dev;
+ 
+ /*
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c
+--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c	2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c	2008-11-20 13:27:04.000000000 -0800
+@@ -34,6 +34,7 @@
+ #include <linux/init.h>
+ #include <linux/pci.h>
+ #include <linux/dma-mapping.h>
++#include <linux/inet.h>
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/if_vlan.h>
+@@ -687,6 +688,46 @@
+ 
+ static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
+ 
++static ssize_t iscsi_ipv4addr_attr_show(struct device *d, char *buf)
++{
++	struct port_info *pi = netdev_priv(to_net_dev(d));
++	__be32 a = pi->iscsi_ipv4addr;
++
++	return sprintf(buf, NIPQUAD_FMT "\n", NIPQUAD(a));
++}
++
++static ssize_t iscsi_ipv4addr_attr_store(struct device *d,
++					 const char *buf, size_t len)
++{
++	struct port_info *pi = netdev_priv(to_net_dev(d));
++
++	pi->iscsi_ipv4addr = in_aton(buf);
++	return len;
++}
++
++#define ISCSI_IPADDR_ATTR(name) \
++static ssize_t show_##name(struct device *d, struct device_attribute *attr, \
++			   char *buf) \
++{ \
++	return iscsi_ipv4addr_attr_show(d, buf); \
++} \
++static ssize_t store_##name(struct device *d, struct device_attribute *attr, \
++			    const char *buf, size_t len) \
++{ \
++	return iscsi_ipv4addr_attr_store(d, buf, len); \
++} \
++static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name)
++
++ISCSI_IPADDR_ATTR(iscsi_ipv4addr);
++
++static struct attribute *iscsi_offload_attrs[] = {
++	&dev_attr_iscsi_ipv4addr.attr,
++	NULL
++};
++
++static struct attribute_group iscsi_offload_attr_group = {
++	.attrs = iscsi_offload_attrs
++};
+ /*
+  * Sends an sk_buff to an offload queue driver
+  * after dealing with any active network taps.
+@@ -1078,6 +1119,7 @@
+ 		if (err)
+ 			printk(KERN_WARNING
+ 			       "Could not initialize offload capabilities\n");
++		sysfs_create_group(&dev->dev.kobj, &iscsi_offload_attr_group);
+ 	}
+ 
+ 	link_start(dev);
+@@ -1100,6 +1142,9 @@
+ 	netif_carrier_off(dev);
+ 	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
+ 
++	if (is_offload(adapter) && !ofld_disable)
++		sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
++
+ 	spin_lock(&adapter->work_lock);	/* sync with update task */
+ 	clear_bit(pi->port_id, &adapter->open_device_map);
+ 	spin_unlock(&adapter->work_lock);
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c
+--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c	2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c	2008-11-20 13:27:05.000000000 -0800
+@@ -182,7 +182,9 @@
+ static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req,
+ 			      void *data)
+ {
++	int i;
+ 	int ret = 0;
++	unsigned int val = 0;
+ 	struct ulp_iscsi_info *uiip = data;
+ 
+ 	switch (req) {
+@@ -191,6 +193,9 @@
+ 		uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
+ 		uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
+ 		uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
++		val = t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ);
++		for (i = 0; i < 4; i++, val >>= 8)
++			uiip->pgsz_factor[i] = val & 0xFF;
+ 		/*
+ 		 * On tx, the iscsi pdu has to be <= tx page size and has to
+ 		 * fit into the Tx PM FIFO.
+@@ -198,25 +203,25 @@
+ 		uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
+ 				     t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
+ 		/* on rx, the iscsi pdu has to be < rx page size and the
+-		   whole pdu + cpl headers has to fit into one sge buffer */
+-		uiip->max_rxsz = min_t(unsigned int,
+-				       adapter->params.tp.rx_pg_size,
+-				       (adapter->sge.qs[0].fl[1].buf_size -
+-					sizeof(struct cpl_rx_data) * 2 -
+-					sizeof(struct cpl_rx_data_ddp)));
++		   the max rx data length programmed in TP */
++		uiip->max_rxsz = min(adapter->params.tp.rx_pg_size,
++				     ((t3_read_reg(adapter, A_TP_PARA_REG2))
++					>> S_MAXRXDATA) & M_MAXRXDATA);
+ 		break;
+ 	case ULP_ISCSI_SET_PARAMS:
+ 		t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
+ 		/* set MaxRxData and MaxCoalesceSize to 16224 */
+ 		t3_write_reg(adapter, A_TP_PARA_REG2, 0x3f603f60);
+ 		/* program the ddp page sizes */
+-		{
+-			int i;
+-			unsigned int val = 0;
+-			for (i = 0; i < 4; i++)
+-				val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
+-			if (val)
+-				t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
++		for (i = 0; i < 4; i++)
++			val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
++		if (val && (val != t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ))) {
++			printk(KERN_INFO
++				"%s, setting iscsi pgsz 0x%x, %u,%u,%u,%u.\n",
++				adapter->name, val, uiip->pgsz_factor[0],
++				uiip->pgsz_factor[1], uiip->pgsz_factor[2],
++				uiip->pgsz_factor[3]); 
++			t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
+ 		}
+ 		break;
+ 	default:
+@@ -407,6 +412,12 @@
+ 		rx_page_info->page_size = tp->rx_pg_size;
+ 		rx_page_info->num = tp->rx_num_pgs;
+ 		break;
++	case GET_ISCSI_IPV4ADDR: {
++		struct iscsi_ipv4addr *p = data;
++		struct port_info *pi = netdev_priv(p->dev);
++		p->ipv4addr = pi->iscsi_ipv4addr;
++		break;
++	}
+ 	default:
+ 		return -EOPNOTSUPP;
+ 	}
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/sge.c linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c
+--- linux-2.6.27.orig/drivers/net/cxgb3/sge.c	2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c	2008-11-20 13:27:08.000000000 -0800
+@@ -36,6 +36,7 @@
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+ #include <linux/dma-mapping.h>
++#include <net/arp.h>
+ #include "common.h"
+ #include "regs.h"
+ #include "sge_defs.h"
+@@ -1859,6 +1860,54 @@
+ }
+ 
+ /**
++ *	cxgb3_arp_process - process an ARP request probing a private IP address
++ *	@adapter: the adapter
++ *	@skb: the skbuff containing the ARP request
++ *
++ *	Check if the ARP request is probing the private IP address
++ *	dedicated to iSCSI, generate an ARP reply if so.
++ */
++static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
++{
++	struct net_device *dev = skb->dev;
++	struct port_info *pi;
++	struct arphdr *arp;
++	unsigned char *arp_ptr;
++	unsigned char *sha;
++	__be32 sip, tip;
++
++	if (!dev)
++		return;
++
++	skb_reset_network_header(skb);
++	arp = arp_hdr(skb);
++
++	if (arp->ar_op != htons(ARPOP_REQUEST))
++		return;
++
++	arp_ptr = (unsigned char *)(arp + 1);
++	sha = arp_ptr;
++	arp_ptr += dev->addr_len;
++	memcpy(&sip, arp_ptr, sizeof(sip));
++	arp_ptr += sizeof(sip);
++	arp_ptr += dev->addr_len;
++	memcpy(&tip, arp_ptr, sizeof(tip));
++
++	pi = netdev_priv(dev);
++	if (tip != pi->iscsi_ipv4addr)
++		return;
++
++	arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
++		 dev->dev_addr, sha);
++
++}
++
++static inline int is_arp(struct sk_buff *skb)
++{
++	return skb->protocol == htons(ETH_P_ARP);
++}
++
++/**
+  *	rx_eth - process an ingress ethernet packet
+  *	@adap: the adapter
+  *	@rq: the response queue that received the packet
+@@ -1882,7 +1931,7 @@
+ 	pi = netdev_priv(skb->dev);
+ 	if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
+ 	    !p->fragment) {
+-		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
++		qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
+ 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 	} else
+ 		skb->ip_summed = CHECKSUM_NONE;
+@@ -1897,16 +1946,28 @@
+ 							     grp,
+ 							     ntohs(p->vlan),
+ 							     p);
+-			else
++			else {
++				if (unlikely(pi->iscsi_ipv4addr &&
++					     is_arp(skb))) {
++					unsigned short vtag = ntohs(p->vlan) &
++							VLAN_VID_MASK;
++                                        skb->dev = vlan_group_get_device(grp,
++									 vtag);
++					cxgb3_arp_process(adap, skb);
++				}
+ 				__vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
+ 					  	  rq->polling);
++			}
+ 		else
+ 			dev_kfree_skb_any(skb);
+ 	} else if (rq->polling) {
+ 		if (lro)
+ 			lro_receive_skb(&qs->lro_mgr, skb, p);
+-		else
++		else {
++			if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
++				cxgb3_arp_process(adap, skb);
+ 			netif_receive_skb(skb);
++		}
+ 	} else
+ 		netif_rx(skb);
+ }



More information about the rds-devel mailing list