[rds-devel] [PATCH] cxgb3: add iscsi patch for suse.

Steve Wise swise at opengridcomputing.com
Thu Jan 8 14:25:33 PST 2009


Please ignore this.



Steve Wise wrote:
> From: Steve Wise <swise at opengridcomputing.com>
>
> Apply this to a ofed-1.4 kernel tree, then configure and build as 
> normal and the ofed cxgb3 module should support both RDMA and iSCSI.
>
> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> Signed-off-by: Karen Xie <kxie at chelsio.com>
> ---
>
>  0 files changed, 0 insertions(+), 0 deletions(-)
>
> diff --git a/kernel_patches/fixes/cxgb3_00500_iscsi.patch b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
> new file mode 100644
> index 0000000..4db8816
> --- /dev/null
> +++ b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
> @@ -0,0 +1,302 @@
> +cxgb3 - add support for private iscsi ip address.
> +
> +From: Karen Xie <kxie at chelsio.com>
> +
> +The accelerated iSCSI traffic uses a private IP address unknown to the OS.
> +The driver has to reply to ARP requests dedicated to the private IP address.
> +
> +Signed-off-by: Karen Xie <kxie at chelsio.com>
> +---
> +
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/adapter.h linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h
> +--- linux-2.6.27.orig/drivers/net/cxgb3/adapter.h	2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h	2008-11-20 13:27:01.000000000 -0800
> +@@ -64,6 +64,7 @@
> + 	struct link_config link_config;
> + 	struct net_device_stats netstats;
> + 	int activity;
> ++	__be32 iscsi_ipv4addr;
> + };
> + 
> + enum {				/* adapter flags */
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h
> +--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h	2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h	2008-11-20 13:27:03.000000000 -0800
> +@@ -57,6 +57,7 @@
> + 	RDMA_GET_MIB		= 19,
> + 
> + 	GET_RX_PAGE_INFO	= 50,
> ++	GET_ISCSI_IPV4ADDR	= 51,
> + };
> + 
> + /*
> +@@ -86,6 +87,12 @@
> + 	u16 vlan_tag;
> + };
> + 
> ++/* Structure used to request a port's iSCSI IPv4 address */
> ++struct iscsi_ipv4addr {
> ++	struct net_device *dev;	/* the net_device */
> ++	__be32 ipv4addr;	/* the return iSCSI IPv4 address */
> ++};
> ++
> + struct pci_dev;
> + 
> + /*
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c
> +--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c	2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c	2008-11-20 13:27:04.000000000 -0800
> +@@ -34,6 +34,7 @@
> + #include <linux/init.h>
> + #include <linux/pci.h>
> + #include <linux/dma-mapping.h>
> ++#include <linux/inet.h>
> + #include <linux/netdevice.h>
> + #include <linux/etherdevice.h>
> + #include <linux/if_vlan.h>
> +@@ -687,6 +688,46 @@
> + 
> + static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
> + 
> ++static ssize_t iscsi_ipv4addr_attr_show(struct device *d, char *buf)
> ++{
> ++	struct port_info *pi = netdev_priv(to_net_dev(d));
> ++	__be32 a = pi->iscsi_ipv4addr;
> ++
> ++	return sprintf(buf, NIPQUAD_FMT "\n", NIPQUAD(a));
> ++}
> ++
> ++static ssize_t iscsi_ipv4addr_attr_store(struct device *d,
> ++					 const char *buf, size_t len)
> ++{
> ++	struct port_info *pi = netdev_priv(to_net_dev(d));
> ++
> ++	pi->iscsi_ipv4addr = in_aton(buf);
> ++	return len;
> ++}
> ++
> ++#define ISCSI_IPADDR_ATTR(name) \
> ++static ssize_t show_##name(struct device *d, struct device_attribute *attr, \
> ++			   char *buf) \
> ++{ \
> ++	return iscsi_ipv4addr_attr_show(d, buf); \
> ++} \
> ++static ssize_t store_##name(struct device *d, struct device_attribute *attr, \
> ++			    const char *buf, size_t len) \
> ++{ \
> ++	return iscsi_ipv4addr_attr_store(d, buf, len); \
> ++} \
> ++static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name)
> ++
> ++ISCSI_IPADDR_ATTR(iscsi_ipv4addr);
> ++
> ++static struct attribute *iscsi_offload_attrs[] = {
> ++	&dev_attr_iscsi_ipv4addr.attr,
> ++	NULL
> ++};
> ++
> ++static struct attribute_group iscsi_offload_attr_group = {
> ++	.attrs = iscsi_offload_attrs
> ++};
> + /*
> +  * Sends an sk_buff to an offload queue driver
> +  * after dealing with any active network taps.
> +@@ -1078,6 +1119,7 @@
> + 		if (err)
> + 			printk(KERN_WARNING
> + 			       "Could not initialize offload capabilities\n");
> ++		sysfs_create_group(&dev->dev.kobj, &iscsi_offload_attr_group);
> + 	}
> + 
> + 	link_start(dev);
> +@@ -1100,6 +1142,9 @@
> + 	netif_carrier_off(dev);
> + 	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
> + 
> ++	if (is_offload(adapter) && !ofld_disable)
> ++		sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
> ++
> + 	spin_lock(&adapter->work_lock);	/* sync with update task */
> + 	clear_bit(pi->port_id, &adapter->open_device_map);
> + 	spin_unlock(&adapter->work_lock);
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c
> +--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c	2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c	2008-11-20 13:27:05.000000000 -0800
> +@@ -182,7 +182,9 @@
> + static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req,
> + 			      void *data)
> + {
> ++	int i;
> + 	int ret = 0;
> ++	unsigned int val = 0;
> + 	struct ulp_iscsi_info *uiip = data;
> + 
> + 	switch (req) {
> +@@ -191,6 +193,9 @@
> + 		uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
> + 		uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
> + 		uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
> ++		val = t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ);
> ++		for (i = 0; i < 4; i++, val >>= 8)
> ++			uiip->pgsz_factor[i] = val & 0xFF;
> + 		/*
> + 		 * On tx, the iscsi pdu has to be <= tx page size and has to
> + 		 * fit into the Tx PM FIFO.
> +@@ -198,25 +203,25 @@
> + 		uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
> + 				     t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
> + 		/* on rx, the iscsi pdu has to be < rx page size and the
> +-		   whole pdu + cpl headers has to fit into one sge buffer */
> +-		uiip->max_rxsz = min_t(unsigned int,
> +-				       adapter->params.tp.rx_pg_size,
> +-				       (adapter->sge.qs[0].fl[1].buf_size -
> +-					sizeof(struct cpl_rx_data) * 2 -
> +-					sizeof(struct cpl_rx_data_ddp)));
> ++		   the max rx data length programmed in TP */
> ++		uiip->max_rxsz = min(adapter->params.tp.rx_pg_size,
> ++				     ((t3_read_reg(adapter, A_TP_PARA_REG2))
> ++					>> S_MAXRXDATA) & M_MAXRXDATA);
> + 		break;
> + 	case ULP_ISCSI_SET_PARAMS:
> + 		t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
> + 		/* set MaxRxData and MaxCoalesceSize to 16224 */
> + 		t3_write_reg(adapter, A_TP_PARA_REG2, 0x3f603f60);
> + 		/* program the ddp page sizes */
> +-		{
> +-			int i;
> +-			unsigned int val = 0;
> +-			for (i = 0; i < 4; i++)
> +-				val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
> +-			if (val)
> +-				t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
> ++		for (i = 0; i < 4; i++)
> ++			val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
> ++		if (val && (val != t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ))) {
> ++			printk(KERN_INFO
> ++				"%s, setting iscsi pgsz 0x%x, %u,%u,%u,%u.\n",
> ++				adapter->name, val, uiip->pgsz_factor[0],
> ++				uiip->pgsz_factor[1], uiip->pgsz_factor[2],
> ++				uiip->pgsz_factor[3]); 
> ++			t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
> + 		}
> + 		break;
> + 	default:
> +@@ -407,6 +412,12 @@
> + 		rx_page_info->page_size = tp->rx_pg_size;
> + 		rx_page_info->num = tp->rx_num_pgs;
> + 		break;
> ++	case GET_ISCSI_IPV4ADDR: {
> ++		struct iscsi_ipv4addr *p = data;
> ++		struct port_info *pi = netdev_priv(p->dev);
> ++		p->ipv4addr = pi->iscsi_ipv4addr;
> ++		break;
> ++	}
> + 	default:
> + 		return -EOPNOTSUPP;
> + 	}
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/sge.c linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c
> +--- linux-2.6.27.orig/drivers/net/cxgb3/sge.c	2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c	2008-11-20 13:27:08.000000000 -0800
> +@@ -36,6 +36,7 @@
> + #include <linux/ip.h>
> + #include <linux/tcp.h>
> + #include <linux/dma-mapping.h>
> ++#include <net/arp.h>
> + #include "common.h"
> + #include "regs.h"
> + #include "sge_defs.h"
> +@@ -1859,6 +1860,54 @@
> + }
> + 
> + /**
> ++ *	cxgb3_arp_process - process an ARP request probing a private IP address
> ++ *	@adapter: the adapter
> ++ *	@skb: the skbuff containing the ARP request
> ++ *
> ++ *	Check if the ARP request is probing the private IP address
> ++ *	dedicated to iSCSI, generate an ARP reply if so.
> ++ */
> ++static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
> ++{
> ++	struct net_device *dev = skb->dev;
> ++	struct port_info *pi;
> ++	struct arphdr *arp;
> ++	unsigned char *arp_ptr;
> ++	unsigned char *sha;
> ++	__be32 sip, tip;
> ++
> ++	if (!dev)
> ++		return;
> ++
> ++	skb_reset_network_header(skb);
> ++	arp = arp_hdr(skb);
> ++
> ++	if (arp->ar_op != htons(ARPOP_REQUEST))
> ++		return;
> ++
> ++	arp_ptr = (unsigned char *)(arp + 1);
> ++	sha = arp_ptr;
> ++	arp_ptr += dev->addr_len;
> ++	memcpy(&sip, arp_ptr, sizeof(sip));
> ++	arp_ptr += sizeof(sip);
> ++	arp_ptr += dev->addr_len;
> ++	memcpy(&tip, arp_ptr, sizeof(tip));
> ++
> ++	pi = netdev_priv(dev);
> ++	if (tip != pi->iscsi_ipv4addr)
> ++		return;
> ++
> ++	arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
> ++		 dev->dev_addr, sha);
> ++
> ++}
> ++
> ++static inline int is_arp(struct sk_buff *skb)
> ++{
> ++	return skb->protocol == htons(ETH_P_ARP);
> ++}
> ++
> ++/**
> +  *	rx_eth - process an ingress ethernet packet
> +  *	@adap: the adapter
> +  *	@rq: the response queue that received the packet
> +@@ -1882,7 +1931,7 @@
> + 	pi = netdev_priv(skb->dev);
> + 	if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
> + 	    !p->fragment) {
> +-		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
> ++		qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
> + 		skb->ip_summed = CHECKSUM_UNNECESSARY;
> + 	} else
> + 		skb->ip_summed = CHECKSUM_NONE;
> +@@ -1897,16 +1946,28 @@
> + 							     grp,
> + 							     ntohs(p->vlan),
> + 							     p);
> +-			else
> ++			else {
> ++				if (unlikely(pi->iscsi_ipv4addr &&
> ++					     is_arp(skb))) {
> ++					unsigned short vtag = ntohs(p->vlan) &
> ++							VLAN_VID_MASK;
> ++                                        skb->dev = vlan_group_get_device(grp,
> ++									 vtag);
> ++					cxgb3_arp_process(adap, skb);
> ++				}
> + 				__vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
> + 					  	  rq->polling);
> ++			}
> + 		else
> + 			dev_kfree_skb_any(skb);
> + 	} else if (rq->polling) {
> + 		if (lro)
> + 			lro_receive_skb(&qs->lro_mgr, skb, p);
> +-		else
> ++		else {
> ++			if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
> ++				cxgb3_arp_process(adap, skb);
> + 			netif_receive_skb(skb);
> ++		}
> + 	} else
> + 		netif_rx(skb);
> + }
>
> _______________________________________________
> rds-devel mailing list
> rds-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/rds-devel
>   




More information about the rds-devel mailing list