diff options
author | Michael S. Tsirkin <mst@mellanox.co.il> | 2007-02-05 23:12:23 +0300 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-02-10 19:00:48 +0300 |
commit | 839fcaba355abaffb7b44f0f4504093acb0b11cf (patch) | |
tree | 9e23f61ab0569ff144e6d9d4cb6a0887783f923c /drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |
parent | 9a6b090c0d1cd5c90f21db772dbe2fbcf14366de (diff) | |
download | linux-839fcaba355abaffb7b44f0f4504093acb0b11cf.tar.xz |
IPoIB: Connected mode experimental support
The following patch adds experimental support for IPoIB connected
mode, as defined by the draft from the IETF ipoib working group. The
idea is to increase performance by increasing the MTU from the maximum
of 2K (theoretically 4K) supported by IPoIB on top of UD. With this
code, I'm able to get 800MByte/sec or more with netperf without
options on a Mellanox 4x back-to-back DDR system.
Some notes on code:
1. SRQ is used for scalability to large cluster sizes
2. Only RC connections are used (UC does not support SRQ now)
3. Retry count is set to 0 since spec draft warns against retries
4. Each connection is used for data transfers in only 1 direction, so
each connection is either active(TX) or passive (RX). 2 sides that
want to communicate create 2 connections.
5. Each active (TX) connection has a separate CQ for send completions -
this keeps the code simple without CQ resize and other tricks
6. To detect stale passive side connections (where the remote side is
down), we keep an LRU list of passive connections (updated once per
second per connection) and destroy a connection after it has been
unused for several seconds. The LRU rule makes it possible to avoid
scanning connections that have recently been active.
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_verbs.c')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b717c648f72..3cb551b88756 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .qp_type = IB_QPT_UD }; + int ret, size; + priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; } - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, - ipoib_sendq_size + ipoib_recvq_size + 1); + priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(priv->mr)) { + printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); + goto out_free_pd; + } + + size = ipoib_sendq_size + ipoib_recvq_size + 1; + ret = ipoib_cm_dev_init(dev); + if (!ret) + size += ipoib_recvq_size; + + priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size); if (IS_ERR(priv->cq)) { printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); - goto out_free_pd; + goto out_free_mr; } if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) goto out_free_cq; - priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(priv->mr)) { - printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); - goto out_free_cq; - } - init_attr.send_cq = priv->cq; init_attr.recv_cq = priv->cq, priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_mr; + goto out_free_cq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return 0; -out_free_mr: - ib_dereg_mr(priv->mr); - out_free_cq: ib_destroy_cq(priv->cq); +out_free_mr: + ib_dereg_mr(priv->mr); + out_free_pd: ib_dealloc_pd(priv->pd); return -ENODEV; @@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_dereg_mr(priv->mr)) - ipoib_warn(priv, "ib_dereg_mr failed\n"); - if (ib_destroy_cq(priv->cq)) ipoib_warn(priv, "ib_cq_destroy failed\n"); + ipoib_cm_dev_cleanup(dev); + + if (ib_dereg_mr(priv->mr)) + ipoib_warn(priv, "ib_dereg_mr failed\n"); + if (ib_dealloc_pd(priv->pd)) ipoib_warn(priv, "ib_dealloc_pd failed\n"); } |