[rds-devel] [PATCH] cxgb3: add iscsi patch for suse.
Steve Wise
swise at opengridcomputing.com
Thu Jan 8 14:25:33 PST 2009
Please ignore this.
Steve Wise wrote:
> From: Steve Wise <swise at opengridcomputing.com>
>
> Apply this to a ofed-1.4 kernel tree, then configure and build as
> normal and the ofed cxgb3 module should support both RDMA and iSCSI.
>
> Signed-off-by: Steve Wise <swise at opengridcomputing.com>
> Signed-off-by: Karen Xie <kxie at chelsio.com>
> ---
>
> 0 files changed, 0 insertions(+), 0 deletions(-)
>
> diff --git a/kernel_patches/fixes/cxgb3_00500_iscsi.patch b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
> new file mode 100644
> index 0000000..4db8816
> --- /dev/null
> +++ b/kernel_patches/fixes/cxgb3_00500_iscsi.patch
> @@ -0,0 +1,302 @@
> +cxgb3 - add support for private iscsi ip address.
> +
> +From: Karen Xie <kxie at chelsio.com>
> +
> +The accelerated iSCSI traffic uses a private IP address unknown to the OS.
> +The driver has to reply to ARP requests dedicated to the private IP address.
> +
> +Signed-off-by: Karen Xie <kxie at chelsio.com>
> +---
> +
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/adapter.h linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h
> +--- linux-2.6.27.orig/drivers/net/cxgb3/adapter.h 2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/adapter.h 2008-11-20 13:27:01.000000000 -0800
> +@@ -64,6 +64,7 @@
> + struct link_config link_config;
> + struct net_device_stats netstats;
> + int activity;
> ++ __be32 iscsi_ipv4addr;
> + };
> +
> + enum { /* adapter flags */
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h
> +--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_ctl_defs.h 2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_ctl_defs.h 2008-11-20 13:27:03.000000000 -0800
> +@@ -57,6 +57,7 @@
> + RDMA_GET_MIB = 19,
> +
> + GET_RX_PAGE_INFO = 50,
> ++ GET_ISCSI_IPV4ADDR = 51,
> + };
> +
> + /*
> +@@ -86,6 +87,12 @@
> + u16 vlan_tag;
> + };
> +
> ++/* Structure used to request a port's iSCSI IPv4 address */
> ++struct iscsi_ipv4addr {
> ++ struct net_device *dev; /* the net_device */
> ++ __be32 ipv4addr; /* the return iSCSI IPv4 address */
> ++};
> ++
> + struct pci_dev;
> +
> + /*
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c
> +--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_main.c 2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_main.c 2008-11-20 13:27:04.000000000 -0800
> +@@ -34,6 +34,7 @@
> + #include <linux/init.h>
> + #include <linux/pci.h>
> + #include <linux/dma-mapping.h>
> ++#include <linux/inet.h>
> + #include <linux/netdevice.h>
> + #include <linux/etherdevice.h>
> + #include <linux/if_vlan.h>
> +@@ -687,6 +688,46 @@
> +
> + static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
> +
> ++static ssize_t iscsi_ipv4addr_attr_show(struct device *d, char *buf)
> ++{
> ++ struct port_info *pi = netdev_priv(to_net_dev(d));
> ++ __be32 a = pi->iscsi_ipv4addr;
> ++
> ++ return sprintf(buf, NIPQUAD_FMT "\n", NIPQUAD(a));
> ++}
> ++
> ++static ssize_t iscsi_ipv4addr_attr_store(struct device *d,
> ++ const char *buf, size_t len)
> ++{
> ++ struct port_info *pi = netdev_priv(to_net_dev(d));
> ++
> ++ pi->iscsi_ipv4addr = in_aton(buf);
> ++ return len;
> ++}
> ++
> ++#define ISCSI_IPADDR_ATTR(name) \
> ++static ssize_t show_##name(struct device *d, struct device_attribute *attr, \
> ++ char *buf) \
> ++{ \
> ++ return iscsi_ipv4addr_attr_show(d, buf); \
> ++} \
> ++static ssize_t store_##name(struct device *d, struct device_attribute *attr, \
> ++ const char *buf, size_t len) \
> ++{ \
> ++ return iscsi_ipv4addr_attr_store(d, buf, len); \
> ++} \
> ++static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name)
> ++
> ++ISCSI_IPADDR_ATTR(iscsi_ipv4addr);
> ++
> ++static struct attribute *iscsi_offload_attrs[] = {
> ++ &dev_attr_iscsi_ipv4addr.attr,
> ++ NULL
> ++};
> ++
> ++static struct attribute_group iscsi_offload_attr_group = {
> ++ .attrs = iscsi_offload_attrs
> ++};
> + /*
> + * Sends an sk_buff to an offload queue driver
> + * after dealing with any active network taps.
> +@@ -1078,6 +1119,7 @@
> + if (err)
> + printk(KERN_WARNING
> + "Could not initialize offload capabilities\n");
> ++ sysfs_create_group(&dev->dev.kobj, &iscsi_offload_attr_group);
> + }
> +
> + link_start(dev);
> +@@ -1100,6 +1142,9 @@
> + netif_carrier_off(dev);
> + t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
> +
> ++ if (is_offload(adapter) && !ofld_disable)
> ++ sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
> ++
> + spin_lock(&adapter->work_lock); /* sync with update task */
> + clear_bit(pi->port_id, &adapter->open_device_map);
> + spin_unlock(&adapter->work_lock);
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c
> +--- linux-2.6.27.orig/drivers/net/cxgb3/cxgb3_offload.c 2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/cxgb3_offload.c 2008-11-20 13:27:05.000000000 -0800
> +@@ -182,7 +182,9 @@
> + static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req,
> + void *data)
> + {
> ++ int i;
> + int ret = 0;
> ++ unsigned int val = 0;
> + struct ulp_iscsi_info *uiip = data;
> +
> + switch (req) {
> +@@ -191,6 +193,9 @@
> + uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
> + uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
> + uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
> ++ val = t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ);
> ++ for (i = 0; i < 4; i++, val >>= 8)
> ++ uiip->pgsz_factor[i] = val & 0xFF;
> + /*
> + * On tx, the iscsi pdu has to be <= tx page size and has to
> + * fit into the Tx PM FIFO.
> +@@ -198,25 +203,25 @@
> + uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
> + t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
> + /* on rx, the iscsi pdu has to be < rx page size and the
> +- whole pdu + cpl headers has to fit into one sge buffer */
> +- uiip->max_rxsz = min_t(unsigned int,
> +- adapter->params.tp.rx_pg_size,
> +- (adapter->sge.qs[0].fl[1].buf_size -
> +- sizeof(struct cpl_rx_data) * 2 -
> +- sizeof(struct cpl_rx_data_ddp)));
> ++ the max rx data length programmed in TP */
> ++ uiip->max_rxsz = min(adapter->params.tp.rx_pg_size,
> ++ ((t3_read_reg(adapter, A_TP_PARA_REG2))
> ++ >> S_MAXRXDATA) & M_MAXRXDATA);
> + break;
> + case ULP_ISCSI_SET_PARAMS:
> + t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
> + /* set MaxRxData and MaxCoalesceSize to 16224 */
> + t3_write_reg(adapter, A_TP_PARA_REG2, 0x3f603f60);
> + /* program the ddp page sizes */
> +- {
> +- int i;
> +- unsigned int val = 0;
> +- for (i = 0; i < 4; i++)
> +- val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
> +- if (val)
> +- t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
> ++ for (i = 0; i < 4; i++)
> ++ val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i);
> ++ if (val && (val != t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ))) {
> ++ printk(KERN_INFO
> ++ "%s, setting iscsi pgsz 0x%x, %u,%u,%u,%u.\n",
> ++ adapter->name, val, uiip->pgsz_factor[0],
> ++ uiip->pgsz_factor[1], uiip->pgsz_factor[2],
> ++ uiip->pgsz_factor[3]);
> ++ t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val);
> + }
> + break;
> + default:
> +@@ -407,6 +412,12 @@
> + rx_page_info->page_size = tp->rx_pg_size;
> + rx_page_info->num = tp->rx_num_pgs;
> + break;
> ++ case GET_ISCSI_IPV4ADDR: {
> ++ struct iscsi_ipv4addr *p = data;
> ++ struct port_info *pi = netdev_priv(p->dev);
> ++ p->ipv4addr = pi->iscsi_ipv4addr;
> ++ break;
> ++ }
> + default:
> + return -EOPNOTSUPP;
> + }
> +diff -uNr linux-2.6.27.orig/drivers/net/cxgb3/sge.c linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c
> +--- linux-2.6.27.orig/drivers/net/cxgb3/sge.c 2008-10-09 15:13:53.000000000 -0700
> ++++ linux-2.6.27.iscsi/drivers/net/cxgb3/sge.c 2008-11-20 13:27:08.000000000 -0800
> +@@ -36,6 +36,7 @@
> + #include <linux/ip.h>
> + #include <linux/tcp.h>
> + #include <linux/dma-mapping.h>
> ++#include <net/arp.h>
> + #include "common.h"
> + #include "regs.h"
> + #include "sge_defs.h"
> +@@ -1859,6 +1860,54 @@
> + }
> +
> + /**
> ++ * cxgb3_arp_process - process an ARP request probing a private IP address
> ++ * @adapter: the adapter
> ++ * @skb: the skbuff containing the ARP request
> ++ *
> ++ * Check if the ARP request is probing the private IP address
> ++ * dedicated to iSCSI, generate an ARP reply if so.
> ++ */
> ++static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
> ++{
> ++ struct net_device *dev = skb->dev;
> ++ struct port_info *pi;
> ++ struct arphdr *arp;
> ++ unsigned char *arp_ptr;
> ++ unsigned char *sha;
> ++ __be32 sip, tip;
> ++
> ++ if (!dev)
> ++ return;
> ++
> ++ skb_reset_network_header(skb);
> ++ arp = arp_hdr(skb);
> ++
> ++ if (arp->ar_op != htons(ARPOP_REQUEST))
> ++ return;
> ++
> ++ arp_ptr = (unsigned char *)(arp + 1);
> ++ sha = arp_ptr;
> ++ arp_ptr += dev->addr_len;
> ++ memcpy(&sip, arp_ptr, sizeof(sip));
> ++ arp_ptr += sizeof(sip);
> ++ arp_ptr += dev->addr_len;
> ++ memcpy(&tip, arp_ptr, sizeof(tip));
> ++
> ++ pi = netdev_priv(dev);
> ++ if (tip != pi->iscsi_ipv4addr)
> ++ return;
> ++
> ++ arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
> ++ dev->dev_addr, sha);
> ++
> ++}
> ++
> ++static inline int is_arp(struct sk_buff *skb)
> ++{
> ++ return skb->protocol == htons(ETH_P_ARP);
> ++}
> ++
> ++/**
> + * rx_eth - process an ingress ethernet packet
> + * @adap: the adapter
> + * @rq: the response queue that received the packet
> +@@ -1882,7 +1931,7 @@
> + pi = netdev_priv(skb->dev);
> + if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
> + !p->fragment) {
> +- rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
> ++ qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
> + skb->ip_summed = CHECKSUM_UNNECESSARY;
> + } else
> + skb->ip_summed = CHECKSUM_NONE;
> +@@ -1897,16 +1946,28 @@
> + grp,
> + ntohs(p->vlan),
> + p);
> +- else
> ++ else {
> ++ if (unlikely(pi->iscsi_ipv4addr &&
> ++ is_arp(skb))) {
> ++ unsigned short vtag = ntohs(p->vlan) &
> ++ VLAN_VID_MASK;
> ++ skb->dev = vlan_group_get_device(grp,
> ++ vtag);
> ++ cxgb3_arp_process(adap, skb);
> ++ }
> + __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
> + rq->polling);
> ++ }
> + else
> + dev_kfree_skb_any(skb);
> + } else if (rq->polling) {
> + if (lro)
> + lro_receive_skb(&qs->lro_mgr, skb, p);
> +- else
> ++ else {
> ++ if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
> ++ cxgb3_arp_process(adap, skb);
> + netif_receive_skb(skb);
> ++ }
> + } else
> + netif_rx(skb);
> + }
>
> _______________________________________________
> rds-devel mailing list
> rds-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/rds-devel
>
More information about the rds-devel
mailing list