[rds-devel] [PATCH] cxgb3: add iscsi patch for suse.
Steve Wise
swise at opengridcomputing.com
Thu Jan 8 13:24:10 PST 2009
From: Steve Wise <swise at opengridcomputing.com>
Apply this to a ofed-1.4 kernel tree, then configure and build as
normal and the ofed cxgb3 module should support both RDMA and iSCSI.
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
Signed-off-by: Karen Xie <kxie at chelsio.com>
---
0 files changed, 0 insertions(+), 0 deletions(-)
diff --git a/kernel_patches/fixes/cxgb3_00500_iscsi.patch b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
new file mode 100644
index 0000000..4db8816
--- /dev/null
+++ b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
@@ -0,0 +1,302 @@
+cxgb3 - add support for private iscsi ip address.
+
+From: Karen Xie <kxie at chelsio.com>
+
+The accelerated iSCSI traffic uses a private IP address unknown to the OS.
+The driver has to reply to ARP requests dedicated to the private IP address.
+
+Signed-off-by: Karen Xie <kxie at chelsio.com>
+---
+
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/adapter.h linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h
+--- linux-2.6.27.orig/drivers/net/cxgb3/adapter.h 2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h 2008-11-20 13:27:01.000000000 -0800
+@@ -64,6 +64,7 @@
+ struct link_config link_config;
+ struct net_device_stats netstats;
+ int activity;
++ __be32 iscsi_ipv4addr;
+ };
+
+ enum { /* adapter flags */
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h
+--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h 2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h 2008-11-20 13:27:03.000000000 -0800
+@@ -57,6 +57,7 @@
+ RDMA_GET_MIB = 19,
+
+ GET_RX_PAGE_INFO = 50,
++ GET_ISCSI_IPV4ADDR = 51,
+ };
+
+ /*
+@@ -86,6 +87,12 @@
+ u16 vlan_tag;
+ };
+
++/* Structure used to request a port's iSCSI IPv4 address */
++struct iscsi_ipv4addr {
++ struct net_device *dev; /* the net_device */
++ __be32 ipv4addr; /* the return iSCSI IPv4 address */
++};
++
+ struct pci_dev;
+
+ /*
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c
+--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c 2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c 2008-11-20 13:27:04.000000000 -0800
+@@ -34,6 +34,7 @@
+ #include <linux/init.h>
+ #include <linux/pci.h>
+ #include <linux/dma-mapping.h>
++#include <linux/inet.h>
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/if_vlan.h>
+@@ -687,6 +688,46 @@
+
+ static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
+
++static ssize_t iscsi_ipv4addr_attr_show(struct device *d, char *buf)
++{
++ struct port_info *pi = netdev_priv(to_net_dev(d));
++ __be32 a = pi->iscsi_ipv4addr;
++
++ return sprintf(buf, NIPQUAD_FMT "\n", NIPQUAD(a));
++}
++
++static ssize_t iscsi_ipv4addr_attr_store(struct device *d,
++ const char *buf, size_t len)
++{
++ struct port_info *pi = netdev_priv(to_net_dev(d));
++
++ pi->iscsi_ipv4addr = in_aton(buf);
++ return len;
++}
++
++#define ISCSI_IPADDR_ATTR(name) \
++static ssize_t show_##name(struct device *d, struct device_attribute *attr, \
++ char *buf) \
++{ \
++ return iscsi_ipv4addr_attr_show(d, buf); \
++} \
++static ssize_t store_##name(struct device *d, struct device_attribute *attr, \
++ const char *buf, size_t len) \
++{ \
++ return iscsi_ipv4addr_attr_store(d, buf, len); \
++} \
++static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name)
++
++ISCSI_IPADDR_ATTR(iscsi_ipv4addr);
++
++static struct attribute *iscsi_offload_attrs[] = {
++ &dev_attr_iscsi_ipv4addr.attr,
++ NULL
++};
++
++static struct attribute_group iscsi_offload_attr_group = {
++ .attrs = iscsi_offload_attrs
++};
+ /*
+ * Sends an sk_buff to an offload queue driver
+ * after dealing with any active network taps.
+@@ -1078,6 +1119,7 @@
+ if (err)
+ printk(KERN_WARNING
+ "Could not initialize offload capabilities\n");
++ sysfs_create_group(&dev->dev.kobj, &iscsi_offload_attr_group);
+ }
+
+ link_start(dev);
+@@ -1100,6 +1142,9 @@
+ netif_carrier_off(dev);
+ t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
+
++ if (is_offload(adapter) && !ofld_disable)
++ sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
++
+ spin_lock(&adapter->work_lock); /* sync with update task */
+ clear_bit(pi->port_id, &adapter->open_device_map);
+ spin_unlock(&adapter->work_lock);
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c
+--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c 2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c 2008-11-20 13:27:05.000000000 -0800
+@@ -182,7 +182,9 @@
+ static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req,
+ void *data)
+ {
++ int i;
+ int ret = 0;
++ unsigned int val = 0;
+ struct ulp_iscsi_info *uiip = data;
+
+ switch (req) {
+@@ -191,6 +193,9 @@
+ uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
+ uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
+ uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
++ val = t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ);
++ for (i = 0; i < 4; i++, val >>= 8)
++ uiip->pgsz_factor[i] = val & 0xFF;
+ /*
+ * On tx, the iscsi pdu has to be <= tx page size and has to
+ * fit into the Tx PM FIFO.
+@@ -198,25 +203,25 @@
+ uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
+ t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
+ /* on rx, the iscsi pdu has to be < rx page size and the
+- whole pdu + cpl headers has to fit into one sge buffer */
+- uiip->max_rxsz = min_t(unsigned int,
+- adapter->params.tp.rx_pg_size,
+- (adapter->sge.qs[0].fl[1].buf_size -
+- sizeof(struct cpl_rx_data) * 2 -
+- sizeof(struct cpl_rx_data_ddp)));
++ the max rx data length programmed in TP */
++ uiip->max_rxsz = min(adapter->params.tp.rx_pg_size,
++ ((t3_read_reg(adapter, A_TP_PARA_REG2))
++ >> S_MAXRXDATA) & M_MAXRXDATA);
+ break;
+ case ULP_ISCSI_SET_PARAMS:
+ t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
+ /* set MaxRxData and MaxCoalesceSize to 16224 */
+ t3_write_reg(adapter, A_TP_PARA_REG2, 0x3f603f60);
+ /* program the ddp page sizes */
+- {
+- int i;
+- unsigned int val = 0;
+- for (i = 0; i < 4; i++)
+- val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
+- if (val)
+- t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
++ for (i = 0; i < 4; i++)
++ val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
++ if (val && (val != t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ))) {
++ printk(KERN_INFO
++ "%s, setting iscsi pgsz 0x%x, %u,%u,%u,%u.\n",
++ adapter->name, val, uiip->pgsz_factor[0],
++ uiip->pgsz_factor[1], uiip->pgsz_factor[2],
++ uiip->pgsz_factor[3]);
++ t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
+ }
+ break;
+ default:
+@@ -407,6 +412,12 @@
+ rx_page_info->page_size = tp->rx_pg_size;
+ rx_page_info->num = tp->rx_num_pgs;
+ break;
++ case GET_ISCSI_IPV4ADDR: {
++ struct iscsi_ipv4addr *p = data;
++ struct port_info *pi = netdev_priv(p->dev);
++ p->ipv4addr = pi->iscsi_ipv4addr;
++ break;
++ }
+ default:
+ return -EOPNOTSUPP;
+ }
+diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/sge.c linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c
+--- linux-2.6.27.orig/drivers/net/cxgb3/sge.c 2008-10-09 15:13:53.000000000 -0700
++++ linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c 2008-11-20 13:27:08.000000000 -0800
+@@ -36,6 +36,7 @@
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+ #include <linux/dma-mapping.h>
++#include <net/arp.h>
+ #include "common.h"
+ #include "regs.h"
+ #include "sge_defs.h"
+@@ -1859,6 +1860,54 @@
+ }
+
+ /**
++ * cxgb3_arp_process - process an ARP request probing a private IP address
++ * @adapter: the adapter
++ * @skb: the skbuff containing the ARP request
++ *
++ * Check if the ARP request is probing the private IP address
++ * dedicated to iSCSI, generate an ARP reply if so.
++ */
++static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
++{
++ struct net_device *dev = skb->dev;
++ struct port_info *pi;
++ struct arphdr *arp;
++ unsigned char *arp_ptr;
++ unsigned char *sha;
++ __be32 sip, tip;
++
++ if (!dev)
++ return;
++
++ skb_reset_network_header(skb);
++ arp = arp_hdr(skb);
++
++ if (arp->ar_op != htons(ARPOP_REQUEST))
++ return;
++
++ arp_ptr = (unsigned char *)(arp + 1);
++ sha = arp_ptr;
++ arp_ptr += dev->addr_len;
++ memcpy(&sip, arp_ptr, sizeof(sip));
++ arp_ptr += sizeof(sip);
++ arp_ptr += dev->addr_len;
++ memcpy(&tip, arp_ptr, sizeof(tip));
++
++ pi = netdev_priv(dev);
++ if (tip != pi->iscsi_ipv4addr)
++ return;
++
++ arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
++ dev->dev_addr, sha);
++
++}
++
++static inline int is_arp(struct sk_buff *skb)
++{
++ return skb->protocol == htons(ETH_P_ARP);
++}
++
++/**
+ * rx_eth - process an ingress ethernet packet
+ * @adap: the adapter
+ * @rq: the response queue that received the packet
+@@ -1882,7 +1931,7 @@
+ pi = netdev_priv(skb->dev);
+ if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
+ !p->fragment) {
+- rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
++ qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ } else
+ skb->ip_summed = CHECKSUM_NONE;
+@@ -1897,16 +1946,28 @@
+ grp,
+ ntohs(p->vlan),
+ p);
+- else
++ else {
++ if (unlikely(pi->iscsi_ipv4addr &&
++ is_arp(skb))) {
++ unsigned short vtag = ntohs(p->vlan) &
++ VLAN_VID_MASK;
++ skb->dev = vlan_group_get_device(grp,
++ vtag);
++ cxgb3_arp_process(adap, skb);
++ }
+ __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
+ rq->polling);
++ }
+ else
+ dev_kfree_skb_any(skb);
+ } else if (rq->polling) {
+ if (lro)
+ lro_receive_skb(&qs->lro_mgr, skb, p);
+- else
++ else {
++ if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
++ cxgb3_arp_process(adap, skb);
+ netif_receive_skb(skb);
++ }
+ } else
+ netif_rx(skb);
+ }
More information about the rds-devel
mailing list