summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig16
-rw-r--r--drivers/infiniband/hw/mthca/Makefile12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c179
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c241
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1767
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h310
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h51
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c918
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h437
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h95
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c964
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c323
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c1123
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c376
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c465
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h161
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c416
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c80
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c266
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h58
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c660
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h251
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2056
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c232
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c78
25 files changed, 11535 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
new file mode 100644
index 000000000000..e88be85b3d5c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -0,0 +1,16 @@
+config INFINIBAND_MTHCA
+ tristate "Mellanox HCA support"
+ depends on PCI && INFINIBAND
+ ---help---
+ This is a low-level driver for Mellanox InfiniHost host
+ channel adapters (HCAs), including the MT23108 PCI-X HCA
+ ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
+
+config INFINIBAND_MTHCA_DEBUG
+ bool "Verbose debugging output"
+ depends on INFINIBAND_MTHCA
+ default n
+ ---help---
+ This option causes the mthca driver produce a bunch of debug
+ messages. Select this is you are developing the driver or
+ trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
new file mode 100644
index 000000000000..5dcbd43073e2
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -0,0 +1,12 @@
+EXTRA_CFLAGS += -Idrivers/infiniband/include
+
+ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
+
+ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
+ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
+ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
+ mthca_provider.o mthca_memfree.o mthca_uar.o
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
new file mode 100644
index 000000000000..b1db48dd91d6
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "mthca_dev.h"
+
+/* Trivial bitmap-based allocator */
+u32 mthca_alloc(struct mthca_alloc *alloc)
+{
+ u32 obj;
+
+ spin_lock(&alloc->lock);
+ obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
+ if (obj >= alloc->max) {
+ alloc->top = (alloc->top + alloc->max) & alloc->mask;
+ obj = find_first_zero_bit(alloc->table, alloc->max);
+ }
+
+ if (obj < alloc->max) {
+ set_bit(obj, alloc->table);
+ obj |= alloc->top;
+ } else
+ obj = -1;
+
+ spin_unlock(&alloc->lock);
+
+ return obj;
+}
+
+void mthca_free(struct mthca_alloc *alloc, u32 obj)
+{
+ obj &= alloc->max - 1;
+ spin_lock(&alloc->lock);
+ clear_bit(obj, alloc->table);
+ alloc->last = min(alloc->last, obj);
+ alloc->top = (alloc->top + alloc->max) & alloc->mask;
+ spin_unlock(&alloc->lock);
+}
+
+int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
+ u32 reserved)
+{
+ int i;
+
+ /* num must be a power of 2 */
+ if (num != 1 << (ffs(num) - 1))
+ return -EINVAL;
+
+ alloc->last = 0;
+ alloc->top = 0;
+ alloc->max = num;
+ alloc->mask = mask;
+ spin_lock_init(&alloc->lock);
+ alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
+ GFP_KERNEL);
+ if (!alloc->table)
+ return -ENOMEM;
+
+ bitmap_zero(alloc->table, num);
+ for (i = 0; i < reserved; ++i)
+ set_bit(i, alloc->table);
+
+ return 0;
+}
+
+void mthca_alloc_cleanup(struct mthca_alloc *alloc)
+{
+ kfree(alloc->table);
+}
+
+/*
+ * Array of pointers with lazy allocation of leaf pages. Callers of
+ * _get, _set and _clear methods must use a lock or otherwise
+ * serialize access to the array.
+ */
+
+void *mthca_array_get(struct mthca_array *array, int index)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ if (array->page_list[p].page) {
+ int i = index & (PAGE_SIZE / sizeof (void *) - 1);
+ return array->page_list[p].page[i];
+ } else
+ return NULL;
+}
+
+int mthca_array_set(struct mthca_array *array, int index, void *value)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ /* Allocate with GFP_ATOMIC because we'll be called with locks held. */
+ if (!array->page_list[p].page)
+ array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
+
+ if (!array->page_list[p].page)
+ return -ENOMEM;
+
+ array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] =
+ value;
+ ++array->page_list[p].used;
+
+ return 0;
+}
+
+void mthca_array_clear(struct mthca_array *array, int index)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ if (--array->page_list[p].used == 0) {
+ free_page((unsigned long) array->page_list[p].page);
+ array->page_list[p].page = NULL;
+ }
+
+ if (array->page_list[p].used < 0)
+ pr_debug("Array %p index %d page %d with ref count %d < 0\n",
+ array, index, p, array->page_list[p].used);
+}
+
+int mthca_array_init(struct mthca_array *array, int nent)
+{
+ int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
+ int i;
+
+ array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
+ if (!array->page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < npage; ++i) {
+ array->page_list[i].page = NULL;
+ array->page_list[i].used = 0;
+ }
+
+ return 0;
+}
+
+void mthca_array_cleanup(struct mthca_array *array, int nent)
+{
+ int i;
+
+ for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+ free_page((unsigned long) array->page_list[i].page);
+
+ kfree(array->page_list);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
new file mode 100644
index 000000000000..426d32778e9c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/init.h>
+
+#include <ib_verbs.h>
+#include <ib_cache.h>
+
+#include "mthca_dev.h"
+
+struct mthca_av {
+ u32 port_pd;
+ u8 reserved1;
+ u8 g_slid;
+ u16 dlid;
+ u8 reserved2;
+ u8 gid_index;
+ u8 msg_sr;
+ u8 hop_limit;
+ u32 sl_tclass_flowlabel;
+ u32 dgid[4];
+};
+
+int mthca_create_ah(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct mthca_ah *ah)
+{
+ u32 index = -1;
+ struct mthca_av *av = NULL;
+
+ ah->type = MTHCA_AH_PCI_POOL;
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ ah->av = kmalloc(sizeof *ah->av, GFP_KERNEL);
+ if (!ah->av)
+ return -ENOMEM;
+
+ ah->type = MTHCA_AH_KMALLOC;
+ av = ah->av;
+ } else if (!atomic_read(&pd->sqp_count) &&
+ !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ index = mthca_alloc(&dev->av_table.alloc);
+
+ /* fall back to allocate in host memory */
+ if (index == -1)
+ goto on_hca_fail;
+
+ av = kmalloc(sizeof *av, GFP_KERNEL);
+ if (!av)
+ goto on_hca_fail;
+
+ ah->type = MTHCA_AH_ON_HCA;
+ ah->avdma = dev->av_table.ddr_av_base +
+ index * MTHCA_AV_SIZE;
+ }
+
+on_hca_fail:
+ if (ah->type == MTHCA_AH_PCI_POOL) {
+ ah->av = pci_pool_alloc(dev->av_table.pool,
+ SLAB_KERNEL, &ah->avdma);
+ if (!ah->av)
+ return -ENOMEM;
+
+ av = ah->av;
+ }
+
+ ah->key = pd->ntmr.ibmr.lkey;
+
+ memset(av, 0, MTHCA_AV_SIZE);
+
+ av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
+ av->g_slid = ah_attr->src_path_bits;
+ av->dlid = cpu_to_be16(ah_attr->dlid);
+ av->msg_sr = (3 << 4) | /* 2K message */
+ ah_attr->static_rate;
+ av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ av->g_slid |= 0x80;
+ av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
+ ah_attr->grh.sgid_index;
+ av->hop_limit = ah_attr->grh.hop_limit;
+ av->sl_tclass_flowlabel |=
+ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label);
+ memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
+ } else {
+ /* Arbel workaround -- low byte of GID must be 2 */
+ av->dgid[3] = cpu_to_be32(2);
+ }
+
+ if (0) {
+ int j;
+
+ mthca_dbg(dev, "Created UDAV at %p/%08lx:\n",
+ av, (unsigned long) ah->avdma);
+ for (j = 0; j < 8; ++j)
+ printk(KERN_DEBUG " [%2x] %08x\n",
+ j * 4, be32_to_cpu(((u32 *) av)[j]));
+ }
+
+ if (ah->type == MTHCA_AH_ON_HCA) {
+ memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE,
+ av, MTHCA_AV_SIZE);
+ kfree(av);
+ }
+
+ return 0;
+}
+
+int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
+{
+ switch (ah->type) {
+ case MTHCA_AH_ON_HCA:
+ mthca_free(&dev->av_table.alloc,
+ (ah->avdma - dev->av_table.ddr_av_base) /
+ MTHCA_AV_SIZE);
+ break;
+
+ case MTHCA_AH_PCI_POOL:
+ pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
+ break;
+
+ case MTHCA_AH_KMALLOC:
+ kfree(ah->av);
+ break;
+ }
+
+ return 0;
+}
+
+int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
+ struct ib_ud_header *header)
+{
+ if (ah->type == MTHCA_AH_ON_HCA)
+ return -EINVAL;
+
+ header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
+ header->lrh.destination_lid = ah->av->dlid;
+ header->lrh.source_lid = ah->av->g_slid & 0x7f;
+ if (ah->av->g_slid & 0x80) {
+ header->grh_present = 1;
+ header->grh.traffic_class =
+ (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
+ header->grh.flow_label =
+ ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ ib_get_cached_gid(&dev->ib_dev,
+ be32_to_cpu(ah->av->port_pd) >> 24,
+ ah->av->gid_index,
+ &header->grh.source_gid);
+ memcpy(header->grh.destination_gid.raw,
+ ah->av->dgid, 16);
+ } else {
+ header->grh_present = 0;
+ }
+
+ return 0;
+}
+
+int __devinit mthca_init_av_table(struct mthca_dev *dev)
+{
+ int err;
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ return 0;
+
+ err = mthca_alloc_init(&dev->av_table.alloc,
+ dev->av_table.num_ddr_avs,
+ dev->av_table.num_ddr_avs - 1,
+ 0);
+ if (err)
+ return err;
+
+ dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev,
+ MTHCA_AV_SIZE,
+ MTHCA_AV_SIZE, 0);
+ if (!dev->av_table.pool)
+ goto out_free_alloc;
+
+ if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) +
+ dev->av_table.ddr_av_base -
+ dev->ddr_start,
+ dev->av_table.num_ddr_avs *
+ MTHCA_AV_SIZE);
+ if (!dev->av_table.av_map)
+ goto out_free_pool;
+ } else
+ dev->av_table.av_map = NULL;
+
+ return 0;
+
+ out_free_pool:
+ pci_pool_destroy(dev->av_table.pool);
+
+ out_free_alloc:
+ mthca_alloc_cleanup(&dev->av_table.alloc);
+ return -ENOMEM;
+}
+
+void __devexit mthca_cleanup_av_table(struct mthca_dev *dev)
+{
+ if (dev->hca_type == ARBEL_NATIVE)
+ return;
+
+ if (dev->av_table.av_map)
+ iounmap(dev->av_table.av_map);
+ pci_pool_destroy(dev->av_table.pool);
+ mthca_alloc_cleanup(&dev->av_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
new file mode 100644
index 000000000000..9def0981f630
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -0,0 +1,1767 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <asm/io.h>
+#include <ib_mad.h>
+
+#include "mthca_dev.h"
+#include "mthca_config_reg.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+
+#define CMD_POLL_TOKEN 0xffff
+
+enum {
+ HCR_IN_PARAM_OFFSET = 0x00,
+ HCR_IN_MODIFIER_OFFSET = 0x08,
+ HCR_OUT_PARAM_OFFSET = 0x0c,
+ HCR_TOKEN_OFFSET = 0x14,
+ HCR_STATUS_OFFSET = 0x18,
+
+ HCR_OPMOD_SHIFT = 12,
+ HCA_E_BIT = 22,
+ HCR_GO_BIT = 23
+};
+
+enum {
+ /* initialization and general commands */
+ CMD_SYS_EN = 0x1,
+ CMD_SYS_DIS = 0x2,
+ CMD_MAP_FA = 0xfff,
+ CMD_UNMAP_FA = 0xffe,
+ CMD_RUN_FW = 0xff6,
+ CMD_MOD_STAT_CFG = 0x34,
+ CMD_QUERY_DEV_LIM = 0x3,
+ CMD_QUERY_FW = 0x4,
+ CMD_ENABLE_LAM = 0xff8,
+ CMD_DISABLE_LAM = 0xff7,
+ CMD_QUERY_DDR = 0x5,
+ CMD_QUERY_ADAPTER = 0x6,
+ CMD_INIT_HCA = 0x7,
+ CMD_CLOSE_HCA = 0x8,
+ CMD_INIT_IB = 0x9,
+ CMD_CLOSE_IB = 0xa,
+ CMD_QUERY_HCA = 0xb,
+ CMD_SET_IB = 0xc,
+ CMD_ACCESS_DDR = 0x2e,
+ CMD_MAP_ICM = 0xffa,
+ CMD_UNMAP_ICM = 0xff9,
+ CMD_MAP_ICM_AUX = 0xffc,
+ CMD_UNMAP_ICM_AUX = 0xffb,
+ CMD_SET_ICM_SIZE = 0xffd,
+
+ /* TPT commands */
+ CMD_SW2HW_MPT = 0xd,
+ CMD_QUERY_MPT = 0xe,
+ CMD_HW2SW_MPT = 0xf,
+ CMD_READ_MTT = 0x10,
+ CMD_WRITE_MTT = 0x11,
+ CMD_SYNC_TPT = 0x2f,
+
+ /* EQ commands */
+ CMD_MAP_EQ = 0x12,
+ CMD_SW2HW_EQ = 0x13,
+ CMD_HW2SW_EQ = 0x14,
+ CMD_QUERY_EQ = 0x15,
+
+ /* CQ commands */
+ CMD_SW2HW_CQ = 0x16,
+ CMD_HW2SW_CQ = 0x17,
+ CMD_QUERY_CQ = 0x18,
+ CMD_RESIZE_CQ = 0x2c,
+
+ /* SRQ commands */
+ CMD_SW2HW_SRQ = 0x35,
+ CMD_HW2SW_SRQ = 0x36,
+ CMD_QUERY_SRQ = 0x37,
+
+ /* QP/EE commands */
+ CMD_RST2INIT_QPEE = 0x19,
+ CMD_INIT2RTR_QPEE = 0x1a,
+ CMD_RTR2RTS_QPEE = 0x1b,
+ CMD_RTS2RTS_QPEE = 0x1c,
+ CMD_SQERR2RTS_QPEE = 0x1d,
+ CMD_2ERR_QPEE = 0x1e,
+ CMD_RTS2SQD_QPEE = 0x1f,
+ CMD_SQD2SQD_QPEE = 0x38,
+ CMD_SQD2RTS_QPEE = 0x20,
+ CMD_ERR2RST_QPEE = 0x21,
+ CMD_QUERY_QPEE = 0x22,
+ CMD_INIT2INIT_QPEE = 0x2d,
+ CMD_SUSPEND_QPEE = 0x32,
+ CMD_UNSUSPEND_QPEE = 0x33,
+ /* special QPs and management commands */
+ CMD_CONF_SPECIAL_QP = 0x23,
+ CMD_MAD_IFC = 0x24,
+
+ /* multicast commands */
+ CMD_READ_MGM = 0x25,
+ CMD_WRITE_MGM = 0x26,
+ CMD_MGID_HASH = 0x27,
+
+ /* miscellaneous commands */
+ CMD_DIAG_RPRT = 0x30,
+ CMD_NOP = 0x31,
+
+ /* debug commands */
+ CMD_QUERY_DEBUG_MSG = 0x2a,
+ CMD_SET_DEBUG_MSG = 0x2b,
+};
+
+/*
+ * According to Mellanox code, FW may be starved and never complete
+ * commands. So we can't use strict timeouts described in PRM -- we
+ * just arbitrarily select 60 seconds for now.
+ */
+#if 0
+/*
+ * Round up and add 1 to make sure we get the full wait time (since we
+ * will be starting in the middle of a jiffy)
+ */
+enum {
+ CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1,
+ CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1,
+ CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1
+};
+#else
+enum {
+ CMD_TIME_CLASS_A = 60 * HZ,
+ CMD_TIME_CLASS_B = 60 * HZ,
+ CMD_TIME_CLASS_C = 60 * HZ
+};
+#endif
+
+enum {
+ GO_BIT_TIMEOUT = HZ * 10
+};
+
+struct mthca_cmd_context {
+ struct completion done;
+ struct timer_list timer;
+ int result;
+ int next;
+ u64 out_param;
+ u16 token;
+ u8 status;
+};
+
+static inline int go_bit(struct mthca_dev *dev)
+{
+ return readl(dev->hcr + HCR_STATUS_OFFSET) &
+ swab32(1 << HCR_GO_BIT);
+}
+
+static int mthca_cmd_post(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
+ int err = 0;
+
+ if (down_interruptible(&dev->cmd.hcr_sem))
+ return -EINTR;
+
+ if (event) {
+ unsigned long end = jiffies + GO_BIT_TIMEOUT;
+
+ while (go_bit(dev) && time_before(jiffies, end)) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ }
+ }
+
+ if (go_bit(dev)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ /*
+ * We use writel (instead of something like memcpy_toio)
+ * because writes of less than 32 bits to the HCR don't work
+ * (and some architectures such as ia64 implement memcpy_toio
+ * in terms of writeb).
+ */
+ __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4);
+ __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4);
+ __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4);
+ __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4);
+ __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
+ __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4);
+
+ /* __raw_writel may not order writes. */
+ wmb();
+
+ __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) |
+ (event ? (1 << HCA_E_BIT) : 0) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), dev->hcr + 6 * 4);
+
+out:
+ up(&dev->cmd.hcr_sem);
+ return err;
+}
+
+static int mthca_cmd_poll(struct mthca_dev *dev,
+ u64 in_param,
+ u64 *out_param,
+ int out_is_imm,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ int err = 0;
+ unsigned long end;
+
+ if (down_interruptible(&dev->cmd.poll_sem))
+ return -EINTR;
+
+ err = mthca_cmd_post(dev, in_param,
+ out_param ? *out_param : 0,
+ in_modifier, op_modifier,
+ op, CMD_POLL_TOKEN, 0);
+ if (err)
+ goto out;
+
+ end = timeout + jiffies;
+ while (go_bit(dev) && time_before(jiffies, end)) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ }
+
+ if (go_bit(dev)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (out_is_imm) {
+ memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64));
+ be64_to_cpus(out_param);
+ }
+
+ *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
+
+out:
+ up(&dev->cmd.poll_sem);
+ return err;
+}
+
+void mthca_cmd_event(struct mthca_dev *dev,
+ u16 token,
+ u8 status,
+ u64 out_param)
+{
+ struct mthca_cmd_context *context =
+ &dev->cmd.context[token & dev->cmd.token_mask];
+
+ /* previously timed out command completing at long last */
+ if (token != context->token)
+ return;
+
+ context->result = 0;
+ context->status = status;
+ context->out_param = out_param;
+
+ context->token += dev->cmd.token_mask + 1;
+
+ complete(&context->done);
+}
+
+static void event_timeout(unsigned long context_ptr)
+{
+ struct mthca_cmd_context *context =
+ (struct mthca_cmd_context *) context_ptr;
+
+ context->result = -EBUSY;
+ complete(&context->done);
+}
+
+static int mthca_cmd_wait(struct mthca_dev *dev,
+ u64 in_param,
+ u64 *out_param,
+ int out_is_imm,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ int err = 0;
+ struct mthca_cmd_context *context;
+
+ if (down_interruptible(&dev->cmd.event_sem))
+ return -EINTR;
+
+ spin_lock(&dev->cmd.context_lock);
+ BUG_ON(dev->cmd.free_head < 0);
+ context = &dev->cmd.context[dev->cmd.free_head];
+ dev->cmd.free_head = context->next;
+ spin_unlock(&dev->cmd.context_lock);
+
+ init_completion(&context->done);
+
+ err = mthca_cmd_post(dev, in_param,
+ out_param ? *out_param : 0,
+ in_modifier, op_modifier,
+ op, context->token, 1);
+ if (err)
+ goto out;
+
+ context->timer.expires = jiffies + timeout;
+ add_timer(&context->timer);
+
+ wait_for_completion(&context->done);
+ del_timer_sync(&context->timer);
+
+ err = context->result;
+ if (err)
+ goto out;
+
+ *status = context->status;
+ if (*status)
+ mthca_dbg(dev, "Command %02x completed with status %02x\n",
+ op, *status);
+
+ if (out_is_imm)
+ *out_param = context->out_param;
+
+out:
+ spin_lock(&dev->cmd.context_lock);
+ context->next = dev->cmd.free_head;
+ dev->cmd.free_head = context - dev->cmd.context;
+ spin_unlock(&dev->cmd.context_lock);
+
+ up(&dev->cmd.event_sem);
+ return err;
+}
+
+/* Invoke a command with an output mailbox */
+static int mthca_cmd_box(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ if (dev->cmd.use_events)
+ return mthca_cmd_wait(dev, in_param, &out_param, 0,
+ in_modifier, op_modifier, op,
+ timeout, status);
+ else
+ return mthca_cmd_poll(dev, in_param, &out_param, 0,
+ in_modifier, op_modifier, op,
+ timeout, status);
+}
+
+/* Invoke a command with no output parameter */
+static int mthca_cmd(struct mthca_dev *dev,
+ u64 in_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ return mthca_cmd_box(dev, in_param, 0, in_modifier,
+ op_modifier, op, timeout, status);
+}
+
+/*
+ * Invoke a command with an immediate output parameter (and copy the
+ * output into the caller's out_param pointer after the command
+ * executes).
+ */
+static int mthca_cmd_imm(struct mthca_dev *dev,
+ u64 in_param,
+ u64 *out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ if (dev->cmd.use_events)
+ return mthca_cmd_wait(dev, in_param, out_param, 1,
+ in_modifier, op_modifier, op,
+ timeout, status);
+ else
+ return mthca_cmd_poll(dev, in_param, out_param, 1,
+ in_modifier, op_modifier, op,
+ timeout, status);
+}
+
+/*
+ * Switch to using events to issue FW commands (should be called after
+ * event queue to command events has been initialized).
+ */
+int mthca_cmd_use_events(struct mthca_dev *dev)
+{
+ int i;
+
+ dev->cmd.context = kmalloc(dev->cmd.max_cmds *
+ sizeof (struct mthca_cmd_context),
+ GFP_KERNEL);
+ if (!dev->cmd.context)
+ return -ENOMEM;
+
+ for (i = 0; i < dev->cmd.max_cmds; ++i) {
+ dev->cmd.context[i].token = i;
+ dev->cmd.context[i].next = i + 1;
+ init_timer(&dev->cmd.context[i].timer);
+ dev->cmd.context[i].timer.data =
+ (unsigned long) &dev->cmd.context[i];
+ dev->cmd.context[i].timer.function = event_timeout;
+ }
+
+ dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
+ dev->cmd.free_head = 0;
+
+ sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds);
+ spin_lock_init(&dev->cmd.context_lock);
+
+ for (dev->cmd.token_mask = 1;
+ dev->cmd.token_mask < dev->cmd.max_cmds;
+ dev->cmd.token_mask <<= 1)
+ ; /* nothing */
+ --dev->cmd.token_mask;
+
+ dev->cmd.use_events = 1;
+ down(&dev->cmd.poll_sem);
+
+ return 0;
+}
+
+/*
+ * Switch back to polling (used when shutting down the device)
+ */
+void mthca_cmd_use_polling(struct mthca_dev *dev)
+{
+ int i;
+
+ dev->cmd.use_events = 0;
+
+ for (i = 0; i < dev->cmd.max_cmds; ++i)
+ down(&dev->cmd.event_sem);
+
+ kfree(dev->cmd.context);
+
+ up(&dev->cmd.poll_sem);
+}
+
+int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
+{
+ u64 out;
+ int ret;
+
+ ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status);
+
+ if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR)
+ mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, "
+ "sladdr=%d, SPD source=%s\n",
+ (int) (out >> 6) & 0xf, (int) (out >> 4) & 3,
+ (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM");
+
+ return ret;
+}
+
+int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status);
+}
+
+static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
+ u64 virt, u8 *status)
+{
+ u32 *inbox;
+ dma_addr_t indma;
+ struct mthca_icm_iter iter;
+ int lg;
+ int nent = 0;
+ int i;
+ int err = 0;
+ int ts = 0, tc = 0;
+
+ inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ memset(inbox, 0, PAGE_SIZE);
+
+ for (mthca_icm_first(icm, &iter);
+ !mthca_icm_last(&iter);
+ mthca_icm_next(&iter)) {
+ /*
+ * We have to pass pages that are aligned to their
+ * size, so find the least significant 1 in the
+ * address or size and use that as our log2 size.
+ */
+ lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
+ if (lg < 12) {
+ mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n",
+ (unsigned long long) mthca_icm_addr(&iter),
+ mthca_icm_size(&iter));
+ err = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
+ if (virt != -1) {
+ *((__be64 *) (inbox + nent * 4)) =
+ cpu_to_be64(virt);
+ virt += 1 << lg;
+ }
+
+ *((__be64 *) (inbox + nent * 4 + 2)) =
+ cpu_to_be64((mthca_icm_addr(&iter) +
+ (i << lg)) | (lg - 12));
+ ts += 1 << (lg - 10);
+ ++tc;
+
+ if (nent == PAGE_SIZE / 16) {
+ err = mthca_cmd(dev, indma, nent, 0, op,
+ CMD_TIME_CLASS_B, status);
+ if (err || *status)
+ goto out;
+ nent = 0;
+ }
+ }
+ }
+
+ if (nent)
+ err = mthca_cmd(dev, indma, nent, 0, op,
+ CMD_TIME_CLASS_B, status);
+
+ switch (op) {
+ case CMD_MAP_FA:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
+ break;
+ case CMD_MAP_ICM_AUX:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
+ break;
+ case CMD_MAP_ICM:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
+ tc, ts, (unsigned long long) virt - (ts << 10));
+ break;
+ }
+
+out:
+ pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
+ return err;
+}
+
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status);
+}
+
+int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
+}
+
+int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
+{
+ u32 *outbox;
+ dma_addr_t outdma;
+ int err = 0;
+ u8 lg;
+
+#define QUERY_FW_OUT_SIZE 0x100
+#define QUERY_FW_VER_OFFSET 0x00
+#define QUERY_FW_MAX_CMD_OFFSET 0x0f
+#define QUERY_FW_ERR_START_OFFSET 0x30
+#define QUERY_FW_ERR_SIZE_OFFSET 0x38
+
+#define QUERY_FW_START_OFFSET 0x20
+#define QUERY_FW_END_OFFSET 0x28
+
+#define QUERY_FW_SIZE_OFFSET 0x00
+#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
+#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40
+#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
+
+ outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma);
+ if (!outbox) {
+ return -ENOMEM;
+ }
+
+ err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET);
+ /*
+ * FW subminor version is at more signifant bits than minor
+ * version, so swap here.
+ */
+ dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) |
+ ((dev->fw_ver & 0xffff0000ull) >> 16) |
+ ((dev->fw_ver & 0x0000ffffull) << 16);
+
+ MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
+ dev->cmd.max_cmds = 1 << lg;
+
+ mthca_dbg(dev, "FW version %012llx, max commands %d\n",
+ (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
+ MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
+ MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET);
+ MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET);
+ mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
+
+ /*
+ * Arbel page size is always 4 KB; round up number of
+ * system pages needed.
+ */
+ dev->fw.arbel.fw_pages =
+ (dev->fw.arbel.fw_pages + (1 << (PAGE_SHIFT - 12)) - 1) >>
+ (PAGE_SHIFT - 12);
+
+ mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
+ (unsigned long long) dev->fw.arbel.clr_int_base,
+ (unsigned long long) dev->fw.arbel.eq_arm_base,
+ (unsigned long long) dev->fw.arbel.eq_set_ci_base);
+ } else {
+ MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET);
+ MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET);
+
+ mthca_dbg(dev, "FW size %d KB (start %llx, end %llx)\n",
+ (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10),
+ (unsigned long long) dev->fw.tavor.fw_start,
+ (unsigned long long) dev->fw.tavor.fw_end);
+ }
+
+out:
+ pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma);
+ return err;
+}
+
+int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
+{
+ u8 info;
+ u32 *outbox;
+ dma_addr_t outdma;
+ int err = 0;
+
+#define ENABLE_LAM_OUT_SIZE 0x100
+#define ENABLE_LAM_START_OFFSET 0x00
+#define ENABLE_LAM_END_OFFSET 0x08
+#define ENABLE_LAM_INFO_OFFSET 0x13
+
+#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
+#define ENABLE_LAM_INFO_ECC_MASK 0x3
+
+ outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma);
+ if (!outbox)
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM,
+ CMD_TIME_CLASS_C, status);
+
+ if (err)
+ goto out;
+
+ if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE)
+ goto out;
+
+ MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET);
+ MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET);
+ MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET);
+
+ if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) !=
+ !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ mthca_info(dev, "FW reports that HCA-attached memory "
+ "is %s hidden; does not match PCI config\n",
+ (info & ENABLE_LAM_INFO_HIDDEN_FLAG) ?
+ "" : "not");
+ }
+ if (info & ENABLE_LAM_INFO_HIDDEN_FLAG)
+ mthca_dbg(dev, "HCA-attached memory is hidden.\n");
+
+ mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n",
+ (int) ((dev->ddr_end - dev->ddr_start) >> 10),
+ (unsigned long long) dev->ddr_start,
+ (unsigned long long) dev->ddr_end);
+
+out:
+ pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma);
+ return err;
+}
+
+int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status);
+}
+
+int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
+{
+ u8 info;
+ u32 *outbox;
+ dma_addr_t outdma;
+ int err = 0;
+
+#define QUERY_DDR_OUT_SIZE 0x100
+#define QUERY_DDR_START_OFFSET 0x00
+#define QUERY_DDR_END_OFFSET 0x08
+#define QUERY_DDR_INFO_OFFSET 0x13
+
+#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
+#define QUERY_DDR_INFO_ECC_MASK 0x3
+
+ outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma);
+ if (!outbox)
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(dev->ddr_start, outbox, QUERY_DDR_START_OFFSET);
+ MTHCA_GET(dev->ddr_end, outbox, QUERY_DDR_END_OFFSET);
+ MTHCA_GET(info, outbox, QUERY_DDR_INFO_OFFSET);
+
+ if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) !=
+ !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ mthca_info(dev, "FW reports that HCA-attached memory "
+ "is %s hidden; does not match PCI config\n",
+ (info & QUERY_DDR_INFO_HIDDEN_FLAG) ?
+ "" : "not");
+ }
+ if (info & QUERY_DDR_INFO_HIDDEN_FLAG)
+ mthca_dbg(dev, "HCA-attached memory is hidden.\n");
+
+ mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n",
+ (int) ((dev->ddr_end - dev->ddr_start) >> 10),
+ (unsigned long long) dev->ddr_start,
+ (unsigned long long) dev->ddr_end);
+
+out:
+ pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma);
+ return err;
+}
+
+int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
+ struct mthca_dev_lim *dev_lim, u8 *status)
+{
+ u32 *outbox;
+ dma_addr_t outdma;
+ u8 field;
+ u16 size;
+ int err;
+
+#define QUERY_DEV_LIM_OUT_SIZE 0x100
+#define QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET 0x10
+#define QUERY_DEV_LIM_MAX_QP_SZ_OFFSET 0x11
+#define QUERY_DEV_LIM_RSVD_QP_OFFSET 0x12
+#define QUERY_DEV_LIM_MAX_QP_OFFSET 0x13
+#define QUERY_DEV_LIM_RSVD_SRQ_OFFSET 0x14
+#define QUERY_DEV_LIM_MAX_SRQ_OFFSET 0x15
+#define QUERY_DEV_LIM_RSVD_EEC_OFFSET 0x16
+#define QUERY_DEV_LIM_MAX_EEC_OFFSET 0x17
+#define QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET 0x19
+#define QUERY_DEV_LIM_RSVD_CQ_OFFSET 0x1a
+#define QUERY_DEV_LIM_MAX_CQ_OFFSET 0x1b
+#define QUERY_DEV_LIM_MAX_MPT_OFFSET 0x1d
+#define QUERY_DEV_LIM_RSVD_EQ_OFFSET 0x1e
+#define QUERY_DEV_LIM_MAX_EQ_OFFSET 0x1f
+#define QUERY_DEV_LIM_RSVD_MTT_OFFSET 0x20
+#define QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET 0x21
+#define QUERY_DEV_LIM_RSVD_MRW_OFFSET 0x22
+#define QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET 0x23
+#define QUERY_DEV_LIM_MAX_AV_OFFSET 0x27
+#define QUERY_DEV_LIM_MAX_REQ_QP_OFFSET 0x29
+#define QUERY_DEV_LIM_MAX_RES_QP_OFFSET 0x2b
+#define QUERY_DEV_LIM_MAX_RDMA_OFFSET 0x2f
+#define QUERY_DEV_LIM_RSZ_SRQ_OFFSET 0x33
+#define QUERY_DEV_LIM_ACK_DELAY_OFFSET 0x35
+#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36
+#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37
+#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b
+#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f
+#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44
+#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48
+#define QUERY_DEV_LIM_UAR_SZ_OFFSET 0x49
+#define QUERY_DEV_LIM_PAGE_SZ_OFFSET 0x4b
+#define QUERY_DEV_LIM_MAX_SG_OFFSET 0x51
+#define QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET 0x52
+#define QUERY_DEV_LIM_MAX_SG_RQ_OFFSET 0x55
+#define QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET 0x56
+#define QUERY_DEV_LIM_MAX_QP_MCG_OFFSET 0x61
+#define QUERY_DEV_LIM_RSVD_MCG_OFFSET 0x62
+#define QUERY_DEV_LIM_MAX_MCG_OFFSET 0x63
+#define QUERY_DEV_LIM_RSVD_PD_OFFSET 0x64
+#define QUERY_DEV_LIM_MAX_PD_OFFSET 0x65
+#define QUERY_DEV_LIM_RSVD_RDD_OFFSET 0x66
+#define QUERY_DEV_LIM_MAX_RDD_OFFSET 0x67
+#define QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET 0x80
+#define QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET 0x82
+#define QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET 0x84
+#define QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET 0x86
+#define QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET 0x88
+#define QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET 0x8a
+#define QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET 0x8c
+#define QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET 0x8e
+#define QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET 0x90
+#define QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET 0x92
+#define QUERY_DEV_LIM_PBL_SZ_OFFSET 0x96
+#define QUERY_DEV_LIM_BMME_FLAGS_OFFSET 0x97
+#define QUERY_DEV_LIM_RSVD_LKEY_OFFSET 0x98
+#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f
+#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0
+
+ outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma);
+ if (!outbox)
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+ dev_lim->max_srq_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+ dev_lim->max_qp_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
+ dev_lim->reserved_qps = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
+ dev_lim->max_qps = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_SRQ_OFFSET);
+ dev_lim->reserved_srqs = 1 << (field >> 4);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_OFFSET);
+ dev_lim->max_srqs = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EEC_OFFSET);
+ dev_lim->reserved_eecs = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EEC_OFFSET);
+ dev_lim->max_eecs = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET);
+ dev_lim->max_cq_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_CQ_OFFSET);
+ dev_lim->reserved_cqs = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_OFFSET);
+ dev_lim->max_cqs = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MPT_OFFSET);
+ dev_lim->max_mpts = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EQ_OFFSET);
+ dev_lim->reserved_eqs = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
+ dev_lim->max_eqs = 1 << (field & 0x7);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
+ dev_lim->reserved_mtts = 1 << (field >> 4);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
+ dev_lim->max_mrw_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
+ dev_lim->reserved_mrws = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET);
+ dev_lim->max_mtt_seg = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_REQ_QP_OFFSET);
+ dev_lim->max_requester_per_qp = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RES_QP_OFFSET);
+ dev_lim->max_responder_per_qp = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDMA_OFFSET);
+ dev_lim->max_rdma_global = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_ACK_DELAY_OFFSET);
+ dev_lim->local_ca_ack_delay = field & 0x1f;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MTU_WIDTH_OFFSET);
+ dev_lim->max_mtu = field >> 4;
+ dev_lim->max_port_width = field & 0xf;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_VL_PORT_OFFSET);
+ dev_lim->max_vl = field >> 4;
+ dev_lim->num_ports = field & 0xf;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
+ dev_lim->max_gids = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
+ dev_lim->max_pkeys = 1 << (field & 0xf);
+ MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_UAR_OFFSET);
+ dev_lim->reserved_uars = field >> 4;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_UAR_SZ_OFFSET);
+ dev_lim->uar_size = 1 << ((field & 0x3f) + 20);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_PAGE_SZ_OFFSET);
+ dev_lim->min_page_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_OFFSET);
+ dev_lim->max_sg = field;
+
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET);
+ dev_lim->max_desc_sz = size;
+
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_MCG_OFFSET);
+ dev_lim->max_qp_per_mcg = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MCG_OFFSET);
+ dev_lim->reserved_mgms = field & 0xf;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MCG_OFFSET);
+ dev_lim->max_mcgs = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_PD_OFFSET);
+ dev_lim->reserved_pds = field >> 4;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PD_OFFSET);
+ dev_lim->max_pds = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_RDD_OFFSET);
+ dev_lim->reserved_rdds = field >> 4;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDD_OFFSET);
+ dev_lim->max_rdds = 1 << (field & 0x3f);
+
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET);
+ dev_lim->eec_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET);
+ dev_lim->qpc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET);
+ dev_lim->eeec_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET);
+ dev_lim->eqpc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET);
+ dev_lim->eqc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET);
+ dev_lim->cqc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET);
+ dev_lim->srq_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET);
+ dev_lim->uar_scratch_entry_sz = size;
+
+ mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
+ dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz);
+ mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
+ dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz);
+ mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
+ dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz);
+ mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
+ dev_lim->reserved_mrws, dev_lim->reserved_mtts);
+ mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
+ dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
+ mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
+ dev_lim->max_pds, dev_lim->reserved_mgms);
+
+ mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
+ dev_lim->hca.arbel.resize_srq = field & 1;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET);
+ dev_lim->mtt_seg_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
+ dev_lim->mpt_entry_sz = size;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
+ dev_lim->hca.arbel.max_pbl_sz = 1 << (field & 0x3f);
+ MTHCA_GET(dev_lim->hca.arbel.bmme_flags, outbox,
+ QUERY_DEV_LIM_BMME_FLAGS_OFFSET);
+ MTHCA_GET(dev_lim->hca.arbel.reserved_lkey, outbox,
+ QUERY_DEV_LIM_RSVD_LKEY_OFFSET);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_LAMR_OFFSET);
+ dev_lim->hca.arbel.lam_required = field & 1;
+ MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox,
+ QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET);
+
+ if (dev_lim->hca.arbel.bmme_flags & 1)
+ mthca_dbg(dev, "Base MM extensions: yes "
+ "(flags %d, max PBL %d, rsvd L_Key %08x)\n",
+ dev_lim->hca.arbel.bmme_flags,
+ dev_lim->hca.arbel.max_pbl_sz,
+ dev_lim->hca.arbel.reserved_lkey);
+ else
+ mthca_dbg(dev, "Base MM extensions: no\n");
+
+ mthca_dbg(dev, "Max ICM size %lld MB\n",
+ (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
+ } else {
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
+ dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
+ dev_lim->mtt_seg_sz = MTHCA_MTT_SEG_SIZE;
+ dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
+ }
+
+out:
+ pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+ return err;
+}
+
+int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
+ struct mthca_adapter *adapter, u8 *status)
+{
+ u32 *outbox;
+ dma_addr_t outdma;
+ int err;
+
+#define QUERY_ADAPTER_OUT_SIZE 0x100
+#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00
+#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04
+#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
+#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
+
+ outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma);
+ if (!outbox)
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
+ MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
+ MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
+ MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+
+out:
+ pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+ return err;
+}
+
+int mthca_INIT_HCA(struct mthca_dev *dev,
+ struct mthca_init_hca_param *param,
+ u8 *status)
+{
+ u32 *inbox;
+ dma_addr_t indma;
+ int err;
+
+#define INIT_HCA_IN_SIZE 0x200
+#define INIT_HCA_FLAGS_OFFSET 0x014
+#define INIT_HCA_QPC_OFFSET 0x020
+#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
+#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
+#define INIT_HCA_EEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x20)
+#define INIT_HCA_LOG_EEC_OFFSET (INIT_HCA_QPC_OFFSET + 0x27)
+#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28)
+#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
+#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
+#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
+#define INIT_HCA_EQPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
+#define INIT_HCA_EEEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
+#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
+#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
+#define INIT_HCA_RDB_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
+#define INIT_HCA_UDAV_OFFSET 0x0b0
+#define INIT_HCA_UDAV_LKEY_OFFSET (INIT_HCA_UDAV_OFFSET + 0x0)
+#define INIT_HCA_UDAV_PD_OFFSET (INIT_HCA_UDAV_OFFSET + 0x4)
+#define INIT_HCA_MCAST_OFFSET 0x0c0
+#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
+#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
+#define INIT_HCA_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
+#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
+#define INIT_HCA_TPT_OFFSET 0x0f0
+#define INIT_HCA_MPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
+#define INIT_HCA_MTT_SEG_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x09)
+#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
+#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
+#define INIT_HCA_UAR_OFFSET 0x120
+#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00)
+#define INIT_HCA_UARC_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x09)
+#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
+#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
+#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
+#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)
+
+ inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ memset(inbox, 0, INIT_HCA_IN_SIZE);
+
+#if defined(__LITTLE_ENDIAN)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+#elif defined(__BIG_ENDIAN)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
+#else
+#error Host endianness not defined
+#endif
+ /* Check port for UD address vector: */
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
+
+ /* We leave wqe_quota, responder_exu, etc as 0 (default) */
+
+ /* QPC/EEC/CQC/EQC/RDB attributes */
+
+ MTHCA_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET);
+ MTHCA_PUT(inbox, param->eec_base, INIT_HCA_EEC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_eecs, INIT_HCA_LOG_EEC_OFFSET);
+ MTHCA_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET);
+ MTHCA_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET);
+ MTHCA_PUT(inbox, param->eqpc_base, INIT_HCA_EQPC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->eeec_base, INIT_HCA_EEEC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
+ MTHCA_PUT(inbox, param->rdb_base, INIT_HCA_RDB_BASE_OFFSET);
+
+ /* UD AV attributes */
+
+ /* multicast attributes */
+
+ MTHCA_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->mc_hash_sz, INIT_HCA_MC_HASH_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+
+ /* TPT attributes */
+
+ MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET);
+ if (dev->hca_type != ARBEL_NATIVE)
+ MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
+
+ /* UAR attributes */
+ {
+ u8 uar_page_sz = PAGE_SHIFT - 12;
+ MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ }
+
+ MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
+ }
+
+ err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
+ HZ, status);
+
+ pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ return err;
+}
+
+int mthca_INIT_IB(struct mthca_dev *dev,
+ struct mthca_init_ib_param *param,
+ int port, u8 *status)
+{
+ u32 *inbox;
+ dma_addr_t indma;
+ int err;
+ u32 flags;
+
+#define INIT_IB_IN_SIZE 56
+#define INIT_IB_FLAGS_OFFSET 0x00
+#define INIT_IB_FLAG_SIG (1 << 18)
+#define INIT_IB_FLAG_NG (1 << 17)
+#define INIT_IB_FLAG_G0 (1 << 16)
+#define INIT_IB_FLAG_1X (1 << 8)
+#define INIT_IB_FLAG_4X (1 << 9)
+#define INIT_IB_FLAG_12X (1 << 11)
+#define INIT_IB_VL_SHIFT 4
+#define INIT_IB_MTU_SHIFT 12
+#define INIT_IB_MAX_GID_OFFSET 0x06
+#define INIT_IB_MAX_PKEY_OFFSET 0x0a
+#define INIT_IB_GUID0_OFFSET 0x10
+#define INIT_IB_NODE_GUID_OFFSET 0x18
+#define INIT_IB_SI_GUID_OFFSET 0x20
+
+ inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ memset(inbox, 0, INIT_IB_IN_SIZE);
+
+ flags = 0;
+ flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0;
+ flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0;
+ flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0;
+ flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0;
+ flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0;
+ flags |= param->vl_cap << INIT_IB_VL_SHIFT;
+ flags |= param->mtu_cap << INIT_IB_MTU_SHIFT;
+ MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET);
+
+ MTHCA_PUT(inbox, param->gid_cap, INIT_IB_MAX_GID_OFFSET);
+ MTHCA_PUT(inbox, param->pkey_cap, INIT_IB_MAX_PKEY_OFFSET);
+ MTHCA_PUT(inbox, param->guid0, INIT_IB_GUID0_OFFSET);
+ MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
+ MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
+
+ err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB,
+ CMD_TIME_CLASS_A, status);
+
+ pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ return err;
+}
+
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status)
+{
+ return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status);
+}
+
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
+}
+
+int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
+ int port, u8 *status)
+{
+ u32 *inbox;
+ dma_addr_t indma;
+ int err;
+ u32 flags = 0;
+
+#define SET_IB_IN_SIZE 0x40
+#define SET_IB_FLAGS_OFFSET 0x00
+#define SET_IB_FLAG_SIG (1 << 18)
+#define SET_IB_FLAG_RQK (1 << 0)
+#define SET_IB_CAP_MASK_OFFSET 0x04
+#define SET_IB_SI_GUID_OFFSET 0x08
+
+ inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ memset(inbox, 0, SET_IB_IN_SIZE);
+
+ flags |= param->set_si_guid ? SET_IB_FLAG_SIG : 0;
+ flags |= param->reset_qkey_viol ? SET_IB_FLAG_RQK : 0;
+ MTHCA_PUT(inbox, flags, SET_IB_FLAGS_OFFSET);
+
+ MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
+ MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
+
+ err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
+ CMD_TIME_CLASS_B, status);
+
+ pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ return err;
+}
+
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
+}
+
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
+{
+ u64 *inbox;
+ dma_addr_t indma;
+ int err;
+
+ inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ inbox[0] = cpu_to_be64(virt);
+ inbox[1] = cpu_to_be64(dma_addr);
+
+ err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
+
+ pci_free_consistent(dev->pdev, 16, inbox, indma);
+
+ if (!err)
+ mthca_dbg(dev, "Mapped page at %llx for ICM.\n",
+ (unsigned long long) virt);
+
+ return err;
+}
+
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
+{
+ mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n",
+ page_count, (unsigned long long) virt);
+
+ return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status);
+}
+
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
+ u8 *status)
+{
+ int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE,
+ CMD_TIME_CLASS_A, status);
+
+ if (ret || status)
+ return ret;
+
+ /*
+ * Arbel page size is always 4 KB; round up number of system
+ * pages needed.
+ */
+ *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12);
+
+ return 0;
+}
+
+int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+ int mpt_index, u8 *status)
+{
+ dma_addr_t indma;
+ int err;
+
+ indma = pci_map_single(dev->pdev, mpt_entry,
+ MTHCA_MPT_ENTRY_SIZE,
+ PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
+ CMD_TIME_CLASS_B, status);
+
+ pci_unmap_single(dev->pdev, indma,
+ MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
+ return err;
+}
+
+int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+ int mpt_index, u8 *status)
+{
+ dma_addr_t outdma = 0;
+ int err;
+
+ if (mpt_entry) {
+ outdma = pci_map_single(dev->pdev, mpt_entry,
+ MTHCA_MPT_ENTRY_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(outdma))
+ return -ENOMEM;
+ }
+
+ err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
+ CMD_HW2SW_MPT,
+ CMD_TIME_CLASS_B, status);
+
+ if (mpt_entry)
+ pci_unmap_single(dev->pdev, outdma,
+ MTHCA_MPT_ENTRY_SIZE,
+ PCI_DMA_FROMDEVICE);
+ return err;
+}
+
+int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+ int num_mtt, u8 *status)
+{
+ dma_addr_t indma;
+ int err;
+
+ indma = pci_map_single(dev->pdev, mtt_entry,
+ (num_mtt + 2) * 8,
+ PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
+ CMD_TIME_CLASS_B, status);
+
+ pci_unmap_single(dev->pdev, indma,
+ (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
+ return err;
+}
+
+int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
+ int eq_num, u8 *status)
+{
+ mthca_dbg(dev, "%s mask %016llx for eqn %d\n",
+ unmap ? "Clearing" : "Setting",
+ (unsigned long long) event_mask, eq_num);
+ return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num,
+ 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+ int eq_num, u8 *status)
+{
+ dma_addr_t indma;
+ int err;
+
+ indma = pci_map_single(dev->pdev, eq_context,
+ MTHCA_EQ_CONTEXT_SIZE,
+ PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, indma,
+ MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
+ return err;
+}
+
+int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+ int eq_num, u8 *status)
+{
+ dma_addr_t outdma = 0;
+ int err;
+
+ outdma = pci_map_single(dev->pdev, eq_context,
+ MTHCA_EQ_CONTEXT_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(outdma))
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
+ CMD_HW2SW_EQ,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, outdma,
+ MTHCA_EQ_CONTEXT_SIZE,
+ PCI_DMA_FROMDEVICE);
+ return err;
+}
+
+int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+ int cq_num, u8 *status)
+{
+ dma_addr_t indma;
+ int err;
+
+ indma = pci_map_single(dev->pdev, cq_context,
+ MTHCA_CQ_CONTEXT_SIZE,
+ PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, indma,
+ MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
+ return err;
+}
+
+int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+ int cq_num, u8 *status)
+{
+ dma_addr_t outdma = 0;
+ int err;
+
+ outdma = pci_map_single(dev->pdev, cq_context,
+ MTHCA_CQ_CONTEXT_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(outdma))
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
+ CMD_HW2SW_CQ,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, outdma,
+ MTHCA_CQ_CONTEXT_SIZE,
+ PCI_DMA_FROMDEVICE);
+ return err;
+}
+
+int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
+ int is_ee, void *qp_context, u32 optmask,
+ u8 *status)
+{
+ static const u16 op[] = {
+ [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE,
+ [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE,
+ [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE,
+ [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE,
+ [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE,
+ [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE,
+ [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE,
+ [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE,
+ [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE,
+ [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE,
+ [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE
+ };
+ u8 op_mod = 0;
+
+ dma_addr_t indma;
+ int err;
+
+ if (trans < 0 || trans >= ARRAY_SIZE(op))
+ return -EINVAL;
+
+ if (trans == MTHCA_TRANS_ANY2RST) {
+ indma = 0;
+ op_mod = 3; /* don't write outbox, any->reset */
+
+ /* For debugging */
+ qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
+ &indma);
+ op_mod = 2; /* write outbox, any->reset */
+ } else {
+ indma = pci_map_single(dev->pdev, qp_context,
+ MTHCA_QP_CONTEXT_SIZE,
+ PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ if (0) {
+ int i;
+ mthca_dbg(dev, "Dumping QP context:\n");
+ printk(" opt param mask: %08x\n", be32_to_cpup(qp_context));
+ for (i = 0; i < 0x100 / 4; ++i) {
+ if (i % 8 == 0)
+ printk(" [%02x] ", i * 4);
+ printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+ }
+ }
+
+ if (trans == MTHCA_TRANS_ANY2RST) {
+ err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num,
+ op_mod, op[trans], CMD_TIME_CLASS_C, status);
+
+ if (0) {
+ int i;
+ mthca_dbg(dev, "Dumping QP context:\n");
+ printk(" %08x\n", be32_to_cpup(qp_context));
+ for (i = 0; i < 0x100 / 4; ++i) {
+ if (i % 8 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+ }
+
+ } else
+ err = mthca_cmd(dev, indma, (!!is_ee << 24) | num,
+ op_mod, op[trans], CMD_TIME_CLASS_C, status);
+
+ if (trans != MTHCA_TRANS_ANY2RST)
+ pci_unmap_single(dev->pdev, indma,
+ MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE);
+ else
+ pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
+ qp_context, indma);
+ return err;
+}
+
+int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
+ void *qp_context, u8 *status)
+{
+ dma_addr_t outdma = 0;
+ int err;
+
+ outdma = pci_map_single(dev->pdev, qp_context,
+ MTHCA_QP_CONTEXT_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(outdma))
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
+ CMD_QUERY_QPEE,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, outdma,
+ MTHCA_QP_CONTEXT_SIZE,
+ PCI_DMA_FROMDEVICE);
+ return err;
+}
+
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
+ u8 *status)
+{
+ u8 op_mod;
+
+ switch (type) {
+ case IB_QPT_SMI:
+ op_mod = 0;
+ break;
+ case IB_QPT_GSI:
+ op_mod = 1;
+ break;
+ case IB_QPT_RAW_IPV6:
+ op_mod = 2;
+ break;
+ case IB_QPT_RAW_ETY:
+ op_mod = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP,
+ CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+ void *in_mad, void *response_mad, u8 *status)
+{
+ void *box;
+ dma_addr_t dma;
+ int err;
+ u32 in_modifier = port;
+ u8 op_modifier = 0;
+
+#define MAD_IFC_BOX_SIZE 0x400
+#define MAD_IFC_MY_QPN_OFFSET 0x100
+#define MAD_IFC_RQPN_OFFSET 0x104
+#define MAD_IFC_SL_OFFSET 0x108
+#define MAD_IFC_G_PATH_OFFSET 0x109
+#define MAD_IFC_RLID_OFFSET 0x10a
+#define MAD_IFC_PKEY_OFFSET 0x10e
+#define MAD_IFC_GRH_OFFSET 0x140
+
+ box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
+ if (!box)
+ return -ENOMEM;
+
+ memcpy(box, in_mad, 256);
+
+ /*
+ * Key check traps can't be generated unless we have in_wc to
+ * tell us where to send the trap.
+ */
+ if (ignore_mkey || !in_wc)
+ op_modifier |= 0x1;
+ if (ignore_bkey || !in_wc)
+ op_modifier |= 0x2;
+
+ if (in_wc) {
+ u8 val;
+
+ memset(box + 256, 0, 256);
+
+ MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
+ MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
+
+ val = in_wc->sl << 4;
+ MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET);
+
+ val = in_wc->dlid_path_bits |
+ (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
+ MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET);
+
+ MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET);
+ MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+
+ if (in_grh)
+ memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
+
+ op_modifier |= 0x10;
+
+ in_modifier |= in_wc->slid << 16;
+ }
+
+ err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
+ CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
+
+ if (!err && !*status)
+ memcpy(response_mad, box + 512, 256);
+
+ pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
+ return err;
+}
+
+int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
+ u8 *status)
+{
+ dma_addr_t outdma = 0;
+ int err;
+
+ outdma = pci_map_single(dev->pdev, mgm,
+ MTHCA_MGM_ENTRY_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(outdma))
+ return -ENOMEM;
+
+ err = mthca_cmd_box(dev, 0, outdma, index, 0,
+ CMD_READ_MGM,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, outdma,
+ MTHCA_MGM_ENTRY_SIZE,
+ PCI_DMA_FROMDEVICE);
+ return err;
+}
+
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
+ u8 *status)
+{
+ dma_addr_t indma;
+ int err;
+
+ indma = pci_map_single(dev->pdev, mgm,
+ MTHCA_MGM_ENTRY_SIZE,
+ PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
+ CMD_TIME_CLASS_A, status);
+
+ pci_unmap_single(dev->pdev, indma,
+ MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
+ return err;
+}
+
+int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
+ u8 *status)
+{
+ dma_addr_t indma;
+ u64 imm;
+ int err;
+
+ indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(indma))
+ return -ENOMEM;
+
+ err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
+ CMD_TIME_CLASS_A, status);
+ *hash = imm;
+
+ pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
+ return err;
+}
+
+int mthca_NOP(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
new file mode 100644
index 000000000000..a8bc6aa36ff1
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_CMD_H
+#define MTHCA_CMD_H
+
+#include <ib_verbs.h>
+
+#define MTHCA_CMD_MAILBOX_ALIGN 16UL
+#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
+
+enum {
+ /* command completed successfully: */
+ MTHCA_CMD_STAT_OK = 0x00,
+ /* Internal error (such as a bus error) occurred while processing command: */
+ MTHCA_CMD_STAT_INTERNAL_ERR = 0x01,
+ /* Operation/command not supported or opcode modifier not supported: */
+ MTHCA_CMD_STAT_BAD_OP = 0x02,
+ /* Parameter not supported or parameter out of range: */
+ MTHCA_CMD_STAT_BAD_PARAM = 0x03,
+ /* System not enabled or bad system state: */
+ MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04,
+ /* Attempt to access reserved or unallocaterd resource: */
+ MTHCA_CMD_STAT_BAD_RESOURCE = 0x05,
+ /* Requested resource is currently executing a command, or is otherwise busy: */
+ MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06,
+ /* memory error: */
+ MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07,
+ /* Required capability exceeds device limits: */
+ MTHCA_CMD_STAT_EXCEED_LIM = 0x08,
+ /* Resource is not in the appropriate state or ownership: */
+ MTHCA_CMD_STAT_BAD_RES_STATE = 0x09,
+ /* Index out of range: */
+ MTHCA_CMD_STAT_BAD_INDEX = 0x0a,
+ /* FW image corrupted: */
+ MTHCA_CMD_STAT_BAD_NVMEM = 0x0b,
+ /* Attempt to modify a QP/EE which is not in the presumed state: */
+ MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10,
+ /* Bad segment parameters (Address/Size): */
+ MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20,
+ /* Memory Region has Memory Windows bound to: */
+ MTHCA_CMD_STAT_REG_BOUND = 0x21,
+ /* HCA local attached memory not present: */
+ MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
+ /* Bad management packet (silently discarded): */
+ MTHCA_CMD_STAT_BAD_PKT = 0x30,
+ /* More outstanding CQEs in CQ than new CQ size: */
+ MTHCA_CMD_STAT_BAD_SIZE = 0x40
+};
+
+enum {
+ MTHCA_TRANS_INVALID = 0,
+ MTHCA_TRANS_RST2INIT,
+ MTHCA_TRANS_INIT2INIT,
+ MTHCA_TRANS_INIT2RTR,
+ MTHCA_TRANS_RTR2RTS,
+ MTHCA_TRANS_RTS2RTS,
+ MTHCA_TRANS_SQERR2RTS,
+ MTHCA_TRANS_ANY2ERR,
+ MTHCA_TRANS_RTS2SQD,
+ MTHCA_TRANS_SQD2SQD,
+ MTHCA_TRANS_SQD2RTS,
+ MTHCA_TRANS_ANY2RST,
+};
+
+enum {
+ DEV_LIM_FLAG_RC = 1 << 0,
+ DEV_LIM_FLAG_UC = 1 << 1,
+ DEV_LIM_FLAG_UD = 1 << 2,
+ DEV_LIM_FLAG_RD = 1 << 3,
+ DEV_LIM_FLAG_RAW_IPV6 = 1 << 4,
+ DEV_LIM_FLAG_RAW_ETHER = 1 << 5,
+ DEV_LIM_FLAG_SRQ = 1 << 6,
+ DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8,
+ DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9,
+ DEV_LIM_FLAG_MW = 1 << 16,
+ DEV_LIM_FLAG_AUTO_PATH_MIG = 1 << 17,
+ DEV_LIM_FLAG_ATOMIC = 1 << 18,
+ DEV_LIM_FLAG_RAW_MULTI = 1 << 19,
+ DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20,
+ DEV_LIM_FLAG_UD_MULTI = 1 << 21,
+};
+
+struct mthca_dev_lim {
+ int max_srq_sz;
+ int max_qp_sz;
+ int reserved_qps;
+ int max_qps;
+ int reserved_srqs;
+ int max_srqs;
+ int reserved_eecs;
+ int max_eecs;
+ int max_cq_sz;
+ int reserved_cqs;
+ int max_cqs;
+ int max_mpts;
+ int reserved_eqs;
+ int max_eqs;
+ int reserved_mtts;
+ int max_mrw_sz;
+ int reserved_mrws;
+ int max_mtt_seg;
+ int max_requester_per_qp;
+ int max_responder_per_qp;
+ int max_rdma_global;
+ int local_ca_ack_delay;
+ int max_mtu;
+ int max_port_width;
+ int max_vl;
+ int num_ports;
+ int max_gids;
+ int max_pkeys;
+ u32 flags;
+ int reserved_uars;
+ int uar_size;
+ int min_page_sz;
+ int max_sg;
+ int max_desc_sz;
+ int max_qp_per_mcg;
+ int reserved_mgms;
+ int max_mcgs;
+ int reserved_pds;
+ int max_pds;
+ int reserved_rdds;
+ int max_rdds;
+ int eec_entry_sz;
+ int qpc_entry_sz;
+ int eeec_entry_sz;
+ int eqpc_entry_sz;
+ int eqc_entry_sz;
+ int cqc_entry_sz;
+ int srq_entry_sz;
+ int uar_scratch_entry_sz;
+ int mtt_seg_sz;
+ int mpt_entry_sz;
+ union {
+ struct {
+ int max_avs;
+ } tavor;
+ struct {
+ int resize_srq;
+ int max_pbl_sz;
+ u8 bmme_flags;
+ u32 reserved_lkey;
+ int lam_required;
+ u64 max_icm_sz;
+ } arbel;
+ } hca;
+};
+
+struct mthca_adapter {
+ u32 vendor_id;
+ u32 device_id;
+ u32 revision_id;
+ u8 inta_pin;
+};
+
+struct mthca_init_hca_param {
+ u64 qpc_base;
+ u64 eec_base;
+ u64 srqc_base;
+ u64 cqc_base;
+ u64 eqpc_base;
+ u64 eeec_base;
+ u64 eqc_base;
+ u64 rdb_base;
+ u64 mc_base;
+ u64 mpt_base;
+ u64 mtt_base;
+ u64 uar_scratch_base;
+ u64 uarc_base;
+ u16 log_mc_entry_sz;
+ u16 mc_hash_sz;
+ u8 log_num_qps;
+ u8 log_num_eecs;
+ u8 log_num_srqs;
+ u8 log_num_cqs;
+ u8 log_num_eqs;
+ u8 log_mc_table_sz;
+ u8 mtt_seg_sz;
+ u8 log_mpt_sz;
+ u8 log_uar_sz;
+ u8 log_uarc_sz;
+};
+
+struct mthca_init_ib_param {
+ int enable_1x;
+ int enable_4x;
+ int vl_cap;
+ int mtu_cap;
+ u16 gid_cap;
+ u16 pkey_cap;
+ int set_guid0;
+ u64 guid0;
+ int set_node_guid;
+ u64 node_guid;
+ int set_si_guid;
+ u64 si_guid;
+};
+
+struct mthca_set_ib_param {
+ int set_si_guid;
+ int reset_qkey_viol;
+ u64 si_guid;
+ u32 cap_mask;
+};
+
+int mthca_cmd_use_events(struct mthca_dev *dev);
+void mthca_cmd_use_polling(struct mthca_dev *dev);
+void mthca_cmd_event(struct mthca_dev *dev, u16 token,
+ u8 status, u64 out_param);
+
+int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
+int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
+int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status);
+int mthca_RUN_FW(struct mthca_dev *dev, u8 *status);
+int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status);
+int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status);
+int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status);
+int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status);
+int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
+ struct mthca_dev_lim *dev_lim, u8 *status);
+int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
+ struct mthca_adapter *adapter, u8 *status);
+int mthca_INIT_HCA(struct mthca_dev *dev,
+ struct mthca_init_hca_param *param,
+ u8 *status);
+int mthca_INIT_IB(struct mthca_dev *dev,
+ struct mthca_init_ib_param *param,
+ int port, u8 *status);
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
+int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
+ int port, u8 *status);
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
+ u8 *status);
+int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+ int mpt_index, u8 *status);
+int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+ int mpt_index, u8 *status);
+int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+ int num_mtt, u8 *status);
+int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
+ int eq_num, u8 *status);
+int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+ int eq_num, u8 *status);
+int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+ int eq_num, u8 *status);
+int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+ int cq_num, u8 *status);
+int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+ int cq_num, u8 *status);
+int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
+ int is_ee, void *qp_context, u32 optmask,
+ u8 *status);
+int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
+ void *qp_context, u8 *status);
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
+ u8 *status);
+int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+ void *in_mad, void *response_mad, u8 *status);
+int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
+ u8 *status);
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
+ u8 *status);
+int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
+ u8 *status);
+int mthca_NOP(struct mthca_dev *dev, u8 *status);
+
+#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
+
+#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
new file mode 100644
index 000000000000..b4bfbbfe2c3d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_CONFIG_REG_H
+#define MTHCA_CONFIG_REG_H
+
+#include <asm/page.h>
+
+#define MTHCA_HCR_BASE 0x80680
+#define MTHCA_HCR_SIZE 0x0001c
+#define MTHCA_ECR_BASE 0x80700
+#define MTHCA_ECR_SIZE 0x00008
+#define MTHCA_ECR_CLR_BASE 0x80708
+#define MTHCA_ECR_CLR_SIZE 0x00008
+#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
+#define MTHCA_CLR_INT_BASE 0xf00d8
+#define MTHCA_CLR_INT_SIZE 0x00008
+#define MTHCA_EQ_SET_CI_SIZE (8 * 32)
+
+#endif /* MTHCA_CONFIG_REG_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
new file mode 100644
index 000000000000..5dead2df7eb0
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
+ */
+
+#include <linux/init.h>
+#include <linux/hardirq.h>
+
+#include <ib_pack.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+
+enum {
+ MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE
+};
+
+enum {
+ MTHCA_CQ_ENTRY_SIZE = 0x20
+};
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mthca_cq_context {
+ u32 flags;
+ u64 start;
+ u32 logsize_usrpage;
+ u32 error_eqn; /* Tavor only */
+ u32 comp_eqn;
+ u32 pd;
+ u32 lkey;
+ u32 last_notified_index;
+ u32 solicit_producer_index;
+ u32 consumer_index;
+ u32 producer_index;
+ u32 cqn;
+ u32 ci_db; /* Arbel only */
+ u32 state_db; /* Arbel only */
+ u32 reserved;
+} __attribute__((packed));
+
+#define MTHCA_CQ_STATUS_OK ( 0 << 28)
+#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28)
+#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28)
+#define MTHCA_CQ_FLAG_TR ( 1 << 18)
+#define MTHCA_CQ_FLAG_OI ( 1 << 17)
+#define MTHCA_CQ_STATE_DISARMED ( 0 << 8)
+#define MTHCA_CQ_STATE_ARMED ( 1 << 8)
+#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8)
+#define MTHCA_EQ_STATE_FIRED (10 << 8)
+
+enum {
+ MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
+};
+
+enum {
+ SYNDROME_LOCAL_LENGTH_ERR = 0x01,
+ SYNDROME_LOCAL_QP_OP_ERR = 0x02,
+ SYNDROME_LOCAL_EEC_OP_ERR = 0x03,
+ SYNDROME_LOCAL_PROT_ERR = 0x04,
+ SYNDROME_WR_FLUSH_ERR = 0x05,
+ SYNDROME_MW_BIND_ERR = 0x06,
+ SYNDROME_BAD_RESP_ERR = 0x10,
+ SYNDROME_LOCAL_ACCESS_ERR = 0x11,
+ SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
+ SYNDROME_REMOTE_ACCESS_ERR = 0x13,
+ SYNDROME_REMOTE_OP_ERR = 0x14,
+ SYNDROME_RETRY_EXC_ERR = 0x15,
+ SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
+ SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20,
+ SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
+ SYNDROME_REMOTE_ABORTED_ERR = 0x22,
+ SYNDROME_INVAL_EECN_ERR = 0x23,
+ SYNDROME_INVAL_EEC_STATE_ERR = 0x24
+};
+
+struct mthca_cqe {
+ u32 my_qpn;
+ u32 my_ee;
+ u32 rqpn;
+ u16 sl_g_mlpath;
+ u16 rlid;
+ u32 imm_etype_pkey_eec;
+ u32 byte_cnt;
+ u32 wqe;
+ u8 opcode;
+ u8 is_send;
+ u8 reserved;
+ u8 owner;
+};
+
+struct mthca_err_cqe {
+ u32 my_qpn;
+ u32 reserved1[3];
+ u8 syndrome;
+ u8 reserved2;
+ u16 db_cnt;
+ u32 reserved3;
+ u32 wqe;
+ u8 opcode;
+ u8 reserved4[2];
+ u8 owner;
+};
+
+#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7)
+#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7)
+
+#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24)
+#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
+
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24)
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
+
+static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+{
+ if (cq->is_direct)
+ return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
+ else
+ return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
+}
+
+static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
+{
+ struct mthca_cqe *cqe = get_cqe(cq, i);
+ return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
+}
+
+static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
+{
+ return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
+}
+
+static inline void set_cqe_hw(struct mthca_cqe *cqe)
+{
+ cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
+}
+
+/*
+ * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
+ * should be correct before calling update_cons_index().
+ */
+static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
+ int incr)
+{
+ u32 doorbell[2];
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ *cq->set_ci_db = cpu_to_be32(cq->cons_index);
+ wmb();
+ } else {
+ doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
+ doorbell[1] = cpu_to_be32(incr - 1);
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_CQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+}
+
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
+{
+ struct mthca_cq *cq;
+
+ cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
+
+ if (!cq) {
+ mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
+ return;
+ }
+
+ ++cq->arm_sn;
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn)
+{
+ struct mthca_cq *cq;
+ struct mthca_cqe *cqe;
+ int prod_index;
+ int nfreed = 0;
+
+ spin_lock_irq(&dev->cq_table.lock);
+ cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
+ if (cq)
+ atomic_inc(&cq->refcount);
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ if (!cq)
+ return;
+
+ spin_lock_irq(&cq->lock);
+
+ /*
+ * First we need to find the current producer index, so we
+ * know where to start cleaning from. It doesn't matter if HW
+ * adds new entries after this loop -- the QP we're worried
+ * about is already in RESET, so the new entries won't come
+ * from our QP and therefore don't need to be checked.
+ */
+ for (prod_index = cq->cons_index;
+ cqe_sw(cq, prod_index & cq->ibcq.cqe);
+ ++prod_index)
+ if (prod_index == cq->cons_index + cq->ibcq.cqe)
+ break;
+
+ if (0)
+ mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
+ qpn, cqn, cq->cons_index, prod_index);
+
+ /*
+ * Now sweep backwards through the CQ, removing CQ entries
+ * that match our QP by copying older entries on top of them.
+ */
+ while (prod_index > cq->cons_index) {
+ cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe);
+ if (cqe->my_qpn == cpu_to_be32(qpn))
+ ++nfreed;
+ else if (nfreed)
+ memcpy(get_cqe(cq, (prod_index - 1 + nfreed) &
+ cq->ibcq.cqe),
+ cqe,
+ MTHCA_CQ_ENTRY_SIZE);
+ --prod_index;
+ }
+
+ if (nfreed) {
+ wmb();
+ cq->cons_index += nfreed;
+ update_cons_index(dev, cq, nfreed);
+ }
+
+ spin_unlock_irq(&cq->lock);
+ if (atomic_dec_and_test(&cq->refcount))
+ wake_up(&cq->wait);
+}
+
+static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
+ struct mthca_qp *qp, int wqe_index, int is_send,
+ struct mthca_err_cqe *cqe,
+ struct ib_wc *entry, int *free_cqe)
+{
+ int err;
+ int dbd;
+ u32 new_wqe;
+
+ if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) {
+ int j;
+
+ mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n",
+ cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
+ be32_to_cpu(cqe->wqe));
+
+ for (j = 0; j < 8; ++j)
+ printk(KERN_DEBUG " [%2x] %08x\n",
+ j * 4, be32_to_cpu(((u32 *) cqe)[j]));
+ }
+
+ /*
+ * For completions in error, only work request ID, status (and
+ * freed resource count for RD) have to be set.
+ */
+ switch (cqe->syndrome) {
+ case SYNDROME_LOCAL_LENGTH_ERR:
+ entry->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case SYNDROME_LOCAL_QP_OP_ERR:
+ entry->status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case SYNDROME_LOCAL_EEC_OP_ERR:
+ entry->status = IB_WC_LOC_EEC_OP_ERR;
+ break;
+ case SYNDROME_LOCAL_PROT_ERR:
+ entry->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case SYNDROME_WR_FLUSH_ERR:
+ entry->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case SYNDROME_MW_BIND_ERR:
+ entry->status = IB_WC_MW_BIND_ERR;
+ break;
+ case SYNDROME_BAD_RESP_ERR:
+ entry->status = IB_WC_BAD_RESP_ERR;
+ break;
+ case SYNDROME_LOCAL_ACCESS_ERR:
+ entry->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case SYNDROME_REMOTE_INVAL_REQ_ERR:
+ entry->status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case SYNDROME_REMOTE_ACCESS_ERR:
+ entry->status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case SYNDROME_REMOTE_OP_ERR:
+ entry->status = IB_WC_REM_OP_ERR;
+ break;
+ case SYNDROME_RETRY_EXC_ERR:
+ entry->status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case SYNDROME_RNR_RETRY_EXC_ERR:
+ entry->status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ case SYNDROME_LOCAL_RDD_VIOL_ERR:
+ entry->status = IB_WC_LOC_RDD_VIOL_ERR;
+ break;
+ case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
+ entry->status = IB_WC_REM_INV_RD_REQ_ERR;
+ break;
+ case SYNDROME_REMOTE_ABORTED_ERR:
+ entry->status = IB_WC_REM_ABORT_ERR;
+ break;
+ case SYNDROME_INVAL_EECN_ERR:
+ entry->status = IB_WC_INV_EECN_ERR;
+ break;
+ case SYNDROME_INVAL_EEC_STATE_ERR:
+ entry->status = IB_WC_INV_EEC_STATE_ERR;
+ break;
+ default:
+ entry->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
+ if (err)
+ return err;
+
+ /*
+ * If we're at the end of the WQE chain, or we've used up our
+ * doorbell count, free the CQE. Otherwise just update it for
+ * the next poll operation.
+ */
+ if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
+ return 0;
+
+ cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
+ cqe->wqe = new_wqe;
+ cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
+
+ *free_cqe = 0;
+
+ return 0;
+}
+
+static void dump_cqe(struct mthca_cqe *cqe)
+{
+ int j;
+
+ for (j = 0; j < 8; ++j)
+ printk(KERN_DEBUG " [%2x] %08x\n",
+ j * 4, be32_to_cpu(((u32 *) cqe)[j]));
+}
+
+static inline int mthca_poll_one(struct mthca_dev *dev,
+ struct mthca_cq *cq,
+ struct mthca_qp **cur_qp,
+ int *freed,
+ struct ib_wc *entry)
+{
+ struct mthca_wq *wq;
+ struct mthca_cqe *cqe;
+ int wqe_index;
+ int is_error;
+ int is_send;
+ int free_cqe = 1;
+ int err = 0;
+
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+
+ if (0) {
+ mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
+ cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
+ be32_to_cpu(cqe->wqe));
+
+ dump_cqe(cqe);
+ }
+
+ is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
+ MTHCA_ERROR_CQE_OPCODE_MASK;
+ is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
+
+ if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
+ /*
+ * We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ *cur_qp = mthca_array_get(&dev->qp_table.qp,
+ be32_to_cpu(cqe->my_qpn) &
+ (dev->limits.num_qps - 1));
+ if (!*cur_qp) {
+ mthca_warn(dev, "CQ entry for unknown QP %06x\n",
+ be32_to_cpu(cqe->my_qpn) & 0xffffff);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ entry->qp_num = (*cur_qp)->qpn;
+
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset)
+ >> wq->wqe_shift);
+ entry->wr_id = (*cur_qp)->wrid[wqe_index +
+ (*cur_qp)->rq.max];
+ } else {
+ wq = &(*cur_qp)->rq;
+ wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift;
+ entry->wr_id = (*cur_qp)->wrid[wqe_index];
+ }
+
+ if (wq->last_comp < wqe_index)
+ wq->tail += wqe_index - wq->last_comp;
+ else
+ wq->tail += wqe_index + wq->max - wq->last_comp;
+
+ wq->last_comp = wqe_index;
+
+ if (0)
+ mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n",
+ is_send ? "Send" : "Receive",
+ (*cur_qp)->qpn, wqe_index, wq->max);
+
+ if (is_error) {
+ err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
+ (struct mthca_err_cqe *) cqe,
+ entry, &free_cqe);
+ goto out;
+ }
+
+ if (is_send) {
+ entry->opcode = IB_WC_SEND; /* XXX */
+ } else {
+ entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ switch (cqe->opcode & 0x1f) {
+ case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
+ entry->wc_flags = IB_WC_WITH_IMM;
+ entry->imm_data = cqe->imm_etype_pkey_eec;
+ entry->opcode = IB_WC_RECV;
+ break;
+ case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
+ entry->wc_flags = IB_WC_WITH_IMM;
+ entry->imm_data = cqe->imm_etype_pkey_eec;
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->wc_flags = 0;
+ entry->opcode = IB_WC_RECV;
+ break;
+ }
+ entry->slid = be16_to_cpu(cqe->rlid);
+ entry->sl = be16_to_cpu(cqe->sl_g_mlpath) >> 12;
+ entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff;
+ entry->dlid_path_bits = be16_to_cpu(cqe->sl_g_mlpath) & 0x7f;
+ entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
+ entry->wc_flags |= be16_to_cpu(cqe->sl_g_mlpath) & 0x80 ?
+ IB_WC_GRH : 0;
+ }
+
+ entry->status = IB_WC_SUCCESS;
+
+ out:
+ if (likely(free_cqe)) {
+ set_cqe_hw(cqe);
+ ++(*freed);
+ ++cq->cons_index;
+ }
+
+ return err;
+}
+
+int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry)
+{
+ struct mthca_dev *dev = to_mdev(ibcq->device);
+ struct mthca_cq *cq = to_mcq(ibcq);
+ struct mthca_qp *qp = NULL;
+ unsigned long flags;
+ int err = 0;
+ int freed = 0;
+ int npolled;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ err = mthca_poll_one(dev, cq, &qp,
+ &freed, entry + npolled);
+ if (err)
+ break;
+ }
+
+ if (freed) {
+ wmb();
+ update_cons_index(dev, cq, freed);
+ }
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return err == 0 || err == -EAGAIN ? npolled : err;
+}
+
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
+{
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
+ MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
+ MTHCA_TAVOR_CQ_DB_REQ_NOT) |
+ to_mcq(cq)->cqn);
+ doorbell[1] = 0xffffffff;
+
+ mthca_write64(doorbell,
+ to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
+
+ return 0;
+}
+
+int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+ struct mthca_cq *cq = to_mcq(ibcq);
+ u32 doorbell[2];
+ u32 sn;
+ u32 ci;
+
+ sn = cq->arm_sn & 3;
+ ci = cpu_to_be32(cq->cons_index);
+
+ doorbell[0] = ci;
+ doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
+ (notify == IB_CQ_SOLICITED ? 1 : 2));
+
+ mthca_write_db_rec(doorbell, cq->arm_db);
+
+ /*
+ * Make sure that the doorbell record in host memory is
+ * written before ringing the doorbell via PCI MMIO.
+ */
+ wmb();
+
+ doorbell[0] = cpu_to_be32((sn << 28) |
+ (notify == IB_CQ_SOLICITED ?
+ MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
+ MTHCA_ARBEL_CQ_DB_REQ_NOT) |
+ cq->cqn);
+ doorbell[1] = ci;
+
+ mthca_write64(doorbell,
+ to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
+
+ return 0;
+}
+
+static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
+{
+ int i;
+ int size;
+
+ if (cq->is_direct)
+ pci_free_consistent(dev->pdev,
+ (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
+ cq->queue.direct.buf,
+ pci_unmap_addr(&cq->queue.direct,
+ mapping));
+ else {
+ size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
+ for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+ if (cq->queue.page_list[i].buf)
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ cq->queue.page_list[i].buf,
+ pci_unmap_addr(&cq->queue.page_list[i],
+ mapping));
+
+ kfree(cq->queue.page_list);
+ }
+}
+
+static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
+ struct mthca_cq *cq)
+{
+ int err = -ENOMEM;
+ int npages, shift;
+ u64 *dma_list = NULL;
+ dma_addr_t t;
+ int i;
+
+ if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) {
+ cq->is_direct = 1;
+ npages = 1;
+ shift = get_order(size) + PAGE_SHIFT;
+
+ cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
+ size, &t);
+ if (!cq->queue.direct.buf)
+ return -ENOMEM;
+
+ pci_unmap_addr_set(&cq->queue.direct, mapping, t);
+
+ memset(cq->queue.direct.buf, 0, size);
+
+ while (t & ((1 << shift) - 1)) {
+ --shift;
+ npages *= 2;
+ }
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_free;
+
+ for (i = 0; i < npages; ++i)
+ dma_list[i] = t + i * (1 << shift);
+ } else {
+ cq->is_direct = 0;
+ npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ shift = PAGE_SHIFT;
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ return -ENOMEM;
+
+ cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list,
+ GFP_KERNEL);
+ if (!cq->queue.page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i)
+ cq->queue.page_list[i].buf = NULL;
+
+ for (i = 0; i < npages; ++i) {
+ cq->queue.page_list[i].buf =
+ pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+ if (!cq->queue.page_list[i].buf)
+ goto err_free;
+
+ dma_list[i] = t;
+ pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t);
+
+ memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE);
+ }
+ }
+
+ err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
+ dma_list, shift, npages,
+ 0, size,
+ MTHCA_MPT_FLAG_LOCAL_WRITE |
+ MTHCA_MPT_FLAG_LOCAL_READ,
+ &cq->mr);
+ if (err)
+ goto err_free;
+
+ kfree(dma_list);
+
+ return 0;
+
+err_free:
+ mthca_free_cq_buf(dev, cq);
+
+err_out:
+ kfree(dma_list);
+
+ return err;
+}
+
+int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_cq *cq)
+{
+ int size = nent * MTHCA_CQ_ENTRY_SIZE;
+ void *mailbox = NULL;
+ struct mthca_cq_context *cq_context;
+ int err = -ENOMEM;
+ u8 status;
+ int i;
+
+ might_sleep();
+
+ cq->ibcq.cqe = nent - 1;
+
+ cq->cqn = mthca_alloc(&dev->cq_table.alloc);
+ if (cq->cqn == -1)
+ return -ENOMEM;
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ cq->arm_sn = 1;
+
+ err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
+ if (err)
+ goto err_out;
+
+ err = -ENOMEM;
+
+ cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+ cq->cqn, &cq->set_ci_db);
+ if (cq->set_ci_db_index < 0)
+ goto err_out_icm;
+
+ cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+ cq->cqn, &cq->arm_db);
+ if (cq->arm_db_index < 0)
+ goto err_out_ci;
+ }
+
+ mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox)
+ goto err_out_mailbox;
+
+ cq_context = MAILBOX_ALIGN(mailbox);
+
+ err = mthca_alloc_cq_buf(dev, size, cq);
+ if (err)
+ goto err_out_mailbox;
+
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe(cq, i));
+
+ spin_lock_init(&cq->lock);
+ atomic_set(&cq->refcount, 1);
+ init_waitqueue_head(&cq->wait);
+
+ memset(cq_context, 0, sizeof *cq_context);
+ cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK |
+ MTHCA_CQ_STATE_DISARMED |
+ MTHCA_CQ_FLAG_TR);
+ cq_context->start = cpu_to_be64(0);
+ cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
+ dev->driver_uar.index);
+ cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
+ cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
+ cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num);
+ cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
+ cq_context->cqn = cpu_to_be32(cq->cqn);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index);
+ cq_context->state_db = cpu_to_be32(cq->arm_db_index);
+ }
+
+ err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
+ goto err_out_free_mr;
+ }
+
+ if (status) {
+ mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_free_mr;
+ }
+
+ spin_lock_irq(&dev->cq_table.lock);
+ if (mthca_array_set(&dev->cq_table.cq,
+ cq->cqn & (dev->limits.num_cqs - 1),
+ cq)) {
+ spin_unlock_irq(&dev->cq_table.lock);
+ goto err_out_free_mr;
+ }
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ cq->cons_index = 0;
+
+ kfree(mailbox);
+
+ return 0;
+
+err_out_free_mr:
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+
+err_out_mailbox:
+ kfree(mailbox);
+
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+
+err_out_ci:
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+
+err_out_icm:
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+
+err_out:
+ mthca_free(&dev->cq_table.alloc, cq->cqn);
+
+ return err;
+}
+
+void mthca_free_cq(struct mthca_dev *dev,
+ struct mthca_cq *cq)
+{
+ void *mailbox;
+ int err;
+ u8 status;
+
+ might_sleep();
+
+ mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox) {
+ mthca_warn(dev, "No memory for mailbox to free CQ.\n");
+ return;
+ }
+
+ err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
+ else if (status)
+ mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n",
+ status);
+
+ if (0) {
+ u32 *ctx = MAILBOX_ALIGN(mailbox);
+ int j;
+
+ printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
+ cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
+ for (j = 0; j < 16; ++j)
+ printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
+ }
+
+ spin_lock_irq(&dev->cq_table.lock);
+ mthca_array_clear(&dev->cq_table.cq,
+ cq->cqn & (dev->limits.num_cqs - 1));
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
+ synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
+ else
+ synchronize_irq(dev->pdev->irq);
+
+ atomic_dec(&cq->refcount);
+ wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+ }
+
+ mthca_free(&dev->cq_table.alloc, cq->cqn);
+ kfree(mailbox);
+}
+
+int __devinit mthca_init_cq_table(struct mthca_dev *dev)
+{
+ int err;
+
+ spin_lock_init(&dev->cq_table.lock);
+
+ err = mthca_alloc_init(&dev->cq_table.alloc,
+ dev->limits.num_cqs,
+ (1 << 24) - 1,
+ dev->limits.reserved_cqs);
+ if (err)
+ return err;
+
+ err = mthca_array_init(&dev->cq_table.cq,
+ dev->limits.num_cqs);
+ if (err)
+ mthca_alloc_cleanup(&dev->cq_table.alloc);
+
+ return err;
+}
+
+void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev)
+{
+ mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
+ mthca_alloc_cleanup(&dev->cq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
new file mode 100644
index 000000000000..56b2bfb5adb1
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_DEV_H
+#define MTHCA_DEV_H
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <asm/semaphore.h>
+
+#include "mthca_provider.h"
+#include "mthca_doorbell.h"
+
+#define DRV_NAME "ib_mthca"
+#define PFX DRV_NAME ": "
+#define DRV_VERSION "0.06-pre"
+#define DRV_RELDATE "November 8, 2004"
+
+/* Types of supported HCA */
+enum {
+ TAVOR, /* MT23108 */
+ ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
+ ARBEL_NATIVE /* MT25208 with extended features */
+};
+
+enum {
+ MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
+ MTHCA_FLAG_SRQ = 1 << 2,
+ MTHCA_FLAG_MSI = 1 << 3,
+ MTHCA_FLAG_MSI_X = 1 << 4,
+ MTHCA_FLAG_NO_LAM = 1 << 5
+};
+
+enum {
+ MTHCA_MAX_PORTS = 2
+};
+
+enum {
+ MTHCA_EQ_CONTEXT_SIZE = 0x40,
+ MTHCA_CQ_CONTEXT_SIZE = 0x40,
+ MTHCA_QP_CONTEXT_SIZE = 0x200,
+ MTHCA_RDB_ENTRY_SIZE = 0x20,
+ MTHCA_AV_SIZE = 0x20,
+ MTHCA_MGM_ENTRY_SIZE = 0x40,
+
+ /* Arbel FW gives us these, but we need them for Tavor */
+ MTHCA_MPT_ENTRY_SIZE = 0x40,
+ MTHCA_MTT_SEG_SIZE = 0x40,
+};
+
+enum {
+ MTHCA_EQ_CMD,
+ MTHCA_EQ_ASYNC,
+ MTHCA_EQ_COMP,
+ MTHCA_NUM_EQ
+};
+
+struct mthca_cmd {
+ int use_events;
+ struct semaphore hcr_sem;
+ struct semaphore poll_sem;
+ struct semaphore event_sem;
+ int max_cmds;
+ spinlock_t context_lock;
+ int free_head;
+ struct mthca_cmd_context *context;
+ u16 token_mask;
+};
+
+struct mthca_limits {
+ int num_ports;
+ int vl_cap;
+ int mtu_cap;
+ int gid_table_len;
+ int pkey_table_len;
+ int local_ca_ack_delay;
+ int num_uars;
+ int max_sg;
+ int num_qps;
+ int reserved_qps;
+ int num_srqs;
+ int reserved_srqs;
+ int num_eecs;
+ int reserved_eecs;
+ int num_cqs;
+ int reserved_cqs;
+ int num_eqs;
+ int reserved_eqs;
+ int num_mpts;
+ int num_mtt_segs;
+ int mtt_seg_size;
+ int reserved_mtts;
+ int reserved_mrws;
+ int reserved_uars;
+ int num_mgms;
+ int num_amgms;
+ int reserved_mcgs;
+ int num_pds;
+ int reserved_pds;
+};
+
+struct mthca_alloc {
+ u32 last;
+ u32 top;
+ u32 max;
+ u32 mask;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct mthca_array {
+ struct {
+ void **page;
+ int used;
+ } *page_list;
+};
+
+struct mthca_uar_table {
+ struct mthca_alloc alloc;
+ u64 uarc_base;
+ int uarc_size;
+};
+
+struct mthca_pd_table {
+ struct mthca_alloc alloc;
+};
+
+struct mthca_mr_table {
+ struct mthca_alloc mpt_alloc;
+ int max_mtt_order;
+ unsigned long **mtt_buddy;
+ u64 mtt_base;
+ struct mthca_icm_table *mtt_table;
+ struct mthca_icm_table *mpt_table;
+};
+
+struct mthca_eq_table {
+ struct mthca_alloc alloc;
+ void __iomem *clr_int;
+ u32 clr_mask;
+ u32 arm_mask;
+ struct mthca_eq eq[MTHCA_NUM_EQ];
+ u64 icm_virt;
+ struct page *icm_page;
+ dma_addr_t icm_dma;
+ int have_irq;
+ u8 inta_pin;
+};
+
+struct mthca_cq_table {
+ struct mthca_alloc alloc;
+ spinlock_t lock;
+ struct mthca_array cq;
+ struct mthca_icm_table *table;
+};
+
+struct mthca_qp_table {
+ struct mthca_alloc alloc;
+ u32 rdb_base;
+ int rdb_shift;
+ int sqp_start;
+ spinlock_t lock;
+ struct mthca_array qp;
+ struct mthca_icm_table *qp_table;
+ struct mthca_icm_table *eqp_table;
+};
+
+struct mthca_av_table {
+ struct pci_pool *pool;
+ int num_ddr_avs;
+ u64 ddr_av_base;
+ void __iomem *av_map;
+ struct mthca_alloc alloc;
+};
+
+struct mthca_mcg_table {
+ struct semaphore sem;
+ struct mthca_alloc alloc;
+ struct mthca_icm_table *table;
+};
+
+struct mthca_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+
+ int hca_type;
+ unsigned long mthca_flags;
+ unsigned long device_cap_flags;
+
+ u32 rev_id;
+
+ /* firmware info */
+ u64 fw_ver;
+ union {
+ struct {
+ u64 fw_start;
+ u64 fw_end;
+ } tavor;
+ struct {
+ u64 clr_int_base;
+ u64 eq_arm_base;
+ u64 eq_set_ci_base;
+ struct mthca_icm *fw_icm;
+ struct mthca_icm *aux_icm;
+ u16 fw_pages;
+ } arbel;
+ } fw;
+
+ u64 ddr_start;
+ u64 ddr_end;
+
+ MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
+ struct semaphore cap_mask_mutex;
+
+ void __iomem *hcr;
+ void __iomem *kar;
+ void __iomem *clr_base;
+ union {
+ struct {
+ void __iomem *ecr_base;
+ } tavor;
+ struct {
+ void __iomem *eq_arm;
+ void __iomem *eq_set_ci_base;
+ } arbel;
+ } eq_regs;
+
+ struct mthca_cmd cmd;
+ struct mthca_limits limits;
+
+ struct mthca_uar_table uar_table;
+ struct mthca_pd_table pd_table;
+ struct mthca_mr_table mr_table;
+ struct mthca_eq_table eq_table;
+ struct mthca_cq_table cq_table;
+ struct mthca_qp_table qp_table;
+ struct mthca_av_table av_table;
+ struct mthca_mcg_table mcg_table;
+
+ struct mthca_uar driver_uar;
+ struct mthca_db_table *db_tab;
+ struct mthca_pd driver_pd;
+ struct mthca_mr driver_mr;
+
+ struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
+ struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
+ spinlock_t sm_lock;
+};
+
+#define mthca_dbg(mdev, format, arg...) \
+ dev_dbg(&mdev->pdev->dev, format, ## arg)
+#define mthca_err(mdev, format, arg...) \
+ dev_err(&mdev->pdev->dev, format, ## arg)
+#define mthca_info(mdev, format, arg...) \
+ dev_info(&mdev->pdev->dev, format, ## arg)
+#define mthca_warn(mdev, format, arg...) \
+ dev_warn(&mdev->pdev->dev, format, ## arg)
+
+extern void __buggy_use_of_MTHCA_GET(void);
+extern void __buggy_use_of_MTHCA_PUT(void);
+
+#define MTHCA_GET(dest, source, offset) \
+ do { \
+ void *__p = (char *) (source) + (offset); \
+ switch (sizeof (dest)) { \
+ case 1: (dest) = *(u8 *) __p; break; \
+ case 2: (dest) = be16_to_cpup(__p); break; \
+ case 4: (dest) = be32_to_cpup(__p); break; \
+ case 8: (dest) = be64_to_cpup(__p); break; \
+ default: __buggy_use_of_MTHCA_GET(); \
+ } \
+ } while (0)
+
+#define MTHCA_PUT(dest, source, offset) \
+ do { \
+ __typeof__(source) *__p = \
+ (__typeof__(source) *) ((char *) (dest) + (offset)); \
+ switch (sizeof(source)) { \
+ case 1: *__p = (source); break; \
+ case 2: *__p = cpu_to_be16(source); break; \
+ case 4: *__p = cpu_to_be32(source); break; \
+ case 8: *__p = cpu_to_be64(source); break; \
+ default: __buggy_use_of_MTHCA_PUT(); \
+ } \
+ } while (0)
+
+int mthca_reset(struct mthca_dev *mdev);
+
+u32 mthca_alloc(struct mthca_alloc *alloc);
+void mthca_free(struct mthca_alloc *alloc, u32 obj);
+int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
+ u32 reserved);
+void mthca_alloc_cleanup(struct mthca_alloc *alloc);
+void *mthca_array_get(struct mthca_array *array, int index);
+int mthca_array_set(struct mthca_array *array, int index, void *value);
+void mthca_array_clear(struct mthca_array *array, int index);
+int mthca_array_init(struct mthca_array *array, int nent);
+void mthca_array_cleanup(struct mthca_array *array, int nent);
+
+int mthca_init_uar_table(struct mthca_dev *dev);
+int mthca_init_pd_table(struct mthca_dev *dev);
+int mthca_init_mr_table(struct mthca_dev *dev);
+int mthca_init_eq_table(struct mthca_dev *dev);
+int mthca_init_cq_table(struct mthca_dev *dev);
+int mthca_init_qp_table(struct mthca_dev *dev);
+int mthca_init_av_table(struct mthca_dev *dev);
+int mthca_init_mcg_table(struct mthca_dev *dev);
+
+void mthca_cleanup_uar_table(struct mthca_dev *dev);
+void mthca_cleanup_pd_table(struct mthca_dev *dev);
+void mthca_cleanup_mr_table(struct mthca_dev *dev);
+void mthca_cleanup_eq_table(struct mthca_dev *dev);
+void mthca_cleanup_cq_table(struct mthca_dev *dev);
+void mthca_cleanup_qp_table(struct mthca_dev *dev);
+void mthca_cleanup_av_table(struct mthca_dev *dev);
+void mthca_cleanup_mcg_table(struct mthca_dev *dev);
+
+int mthca_register_device(struct mthca_dev *dev);
+void mthca_unregister_device(struct mthca_dev *dev);
+
+int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
+void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
+
+int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
+void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
+
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr);
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr);
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
+
+int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
+void mthca_unmap_eq_icm(struct mthca_dev *dev);
+
+int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry);
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
+int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
+int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_cq *cq);
+void mthca_free_cq(struct mthca_dev *dev,
+ struct mthca_cq *cq);
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
+void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn);
+
+void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
+ enum ib_event_type event_type);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, u32 *new_wqe);
+int mthca_alloc_qp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_qp_type type,
+ enum ib_sig_type send_policy,
+ struct mthca_qp *qp);
+int mthca_alloc_sqp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ int qpn,
+ int port,
+ struct mthca_sqp *sqp);
+void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
+int mthca_create_ah(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct mthca_ah *ah);
+int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
+int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
+ struct ib_ud_header *header);
+
+int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int mthca_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad);
+int mthca_create_agents(struct mthca_dev *dev);
+void mthca_free_agents(struct mthca_dev *dev);
+
+static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct mthca_dev, ib_dev);
+}
+
+#endif /* MTHCA_DEV_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
new file mode 100644
index 000000000000..78b183cab54c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/types.h>
+
+#define MTHCA_RD_DOORBELL 0x00
+#define MTHCA_SEND_DOORBELL 0x10
+#define MTHCA_RECEIVE_DOORBELL 0x18
+#define MTHCA_CQ_DOORBELL 0x20
+#define MTHCA_EQ_DOORBELL 0x28
+
+#if BITS_PER_LONG == 64
+/*
+ * Assume that we can just write a 64-bit doorbell atomically. s390
+ * actually doesn't have writeq() but S/390 systems don't even have
+ * PCI so we won't worry about it.
+ */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name)
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
+
+static inline void mthca_write64(u32 val[2], void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ __raw_writeq(*(u64 *) val, dest);
+}
+
+static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+{
+ *(u64 *) db = *(u64 *) val;
+}
+
+#else
+
+/*
+ * Just fall back to a spinlock to protect the doorbell if
+ * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
+ * MMIO writes.
+ */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)
+
+static inline void mthca_write64(u32 val[2], void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(doorbell_lock, flags);
+ __raw_writel(val[0], dest);
+ __raw_writel(val[1], dest + 4);
+ spin_unlock_irqrestore(doorbell_lock, flags);
+}
+
+static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+{
+ db[0] = val[0];
+ wmb();
+ db[1] = val[1];
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
new file mode 100644
index 000000000000..623daab5c92b
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -0,0 +1,964 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_config_reg.h"
+
+enum {
+ MTHCA_NUM_ASYNC_EQE = 0x80,
+ MTHCA_NUM_CMD_EQE = 0x80,
+ MTHCA_EQ_ENTRY_SIZE = 0x20
+};
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mthca_eq_context {
+ u32 flags;
+ u64 start;
+ u32 logsize_usrpage;
+ u32 tavor_pd; /* reserved for Arbel */
+ u8 reserved1[3];
+ u8 intr;
+ u32 arbel_pd; /* lost_count for Tavor */
+ u32 lkey;
+ u32 reserved2[2];
+ u32 consumer_index;
+ u32 producer_index;
+ u32 reserved3[4];
+} __attribute__((packed));
+
+#define MTHCA_EQ_STATUS_OK ( 0 << 28)
+#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28)
+#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28)
+#define MTHCA_EQ_OWNER_SW ( 0 << 24)
+#define MTHCA_EQ_OWNER_HW ( 1 << 24)
+#define MTHCA_EQ_FLAG_TR ( 1 << 18)
+#define MTHCA_EQ_FLAG_OI ( 1 << 17)
+#define MTHCA_EQ_STATE_ARMED ( 1 << 8)
+#define MTHCA_EQ_STATE_FIRED ( 2 << 8)
+#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8)
+#define MTHCA_EQ_STATE_ARBEL ( 8 << 8)
+
+enum {
+ MTHCA_EVENT_TYPE_COMP = 0x00,
+ MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
+ MTHCA_EVENT_TYPE_COMM_EST = 0x02,
+ MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
+ MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
+ MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
+ MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
+ MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
+ MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
+ MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
+ MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
+ MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08,
+ MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09,
+ MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
+ MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e,
+ MTHCA_EVENT_TYPE_CMD = 0x0a
+};
+
+#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \
+ (1ULL << MTHCA_EVENT_TYPE_COMM_EST) | \
+ (1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED) | \
+ (1ULL << MTHCA_EVENT_TYPE_CQ_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \
+ (1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
+ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
+#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
+#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
+
+#define MTHCA_EQ_DB_INC_CI (1 << 24)
+#define MTHCA_EQ_DB_REQ_NOT (2 << 24)
+#define MTHCA_EQ_DB_DISARM_CQ (3 << 24)
+#define MTHCA_EQ_DB_SET_CI (4 << 24)
+#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
+
+struct mthca_eqe {
+ u8 reserved1;
+ u8 type;
+ u8 reserved2;
+ u8 subtype;
+ union {
+ u32 raw[6];
+ struct {
+ u32 cqn;
+ } __attribute__((packed)) comp;
+ struct {
+ u16 reserved1;
+ u16 token;
+ u32 reserved2;
+ u8 reserved3[3];
+ u8 status;
+ u64 out_param;
+ } __attribute__((packed)) cmd;
+ struct {
+ u32 qpn;
+ } __attribute__((packed)) qp;
+ struct {
+ u32 cqn;
+ u32 reserved1;
+ u8 reserved2[3];
+ u8 syndrome;
+ } __attribute__((packed)) cq_err;
+ struct {
+ u32 reserved1[2];
+ u32 port;
+ } __attribute__((packed)) port_change;
+ } event;
+ u8 reserved3[3];
+ u8 owner;
+} __attribute__((packed));
+
+#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7)
+#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7)
+
+static inline u64 async_mask(struct mthca_dev *dev)
+{
+ return dev->mthca_flags & MTHCA_FLAG_SRQ ?
+ MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
+ MTHCA_ASYNC_EVENT_MASK;
+}
+
+static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
+ doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
+
+ /*
+ * This barrier makes sure that all updates to ownership bits
+ * done by set_eqe_hw() hit memory before the consumer index
+ * is updated. set_eq_ci() allows the HCA to possibly write
+ * more EQ entries, and we want to avoid the exceedingly
+ * unlikely possibility of the HCA writing an entry and then
+ * having set_eqe_hw() overwrite the owner field.
+ */
+ wmb();
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_EQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+}
+
+static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+ /* See comment in tavor_set_eq_ci() above. */
+ wmb();
+ __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base +
+ eq->eqn * 8);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+ if (dev->hca_type == ARBEL_NATIVE)
+ arbel_set_eq_ci(dev, eq, ci);
+ else
+ tavor_set_eq_ci(dev, eq, ci);
+}
+
+static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
+{
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
+ doorbell[1] = 0;
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_EQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+}
+
+static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
+{
+ writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
+}
+
+static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
+{
+ if (dev->hca_type != ARBEL_NATIVE) {
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
+ doorbell[1] = cpu_to_be32(cqn);
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_EQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+}
+
+static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
+{
+ unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
+ return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+}
+
+static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
+{
+ struct mthca_eqe* eqe;
+ eqe = get_eqe(eq, eq->cons_index);
+ return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
+}
+
+static inline void set_eqe_hw(struct mthca_eqe *eqe)
+{
+ eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW;
+}
+
+static void port_change(struct mthca_dev *dev, int port, int active)
+{
+ struct ib_event record;
+
+ mthca_dbg(dev, "Port change to %s for port %d\n",
+ active ? "active" : "down", port);
+
+ record.device = &dev->ib_dev;
+ record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ record.element.port_num = port;
+
+ ib_dispatch_event(&record);
+}
+
+static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
+{
+ struct mthca_eqe *eqe;
+ int disarm_cqn;
+ int eqes_found = 0;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ int set_ci = 0;
+
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ rmb();
+
+ switch (eqe->type) {
+ case MTHCA_EVENT_TYPE_COMP:
+ disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
+ disarm_cq(dev, eq->eqn, disarm_cqn);
+ mthca_cq_event(dev, disarm_cqn);
+ break;
+
+ case MTHCA_EVENT_TYPE_PATH_MIG:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_PATH_MIG);
+ break;
+
+ case MTHCA_EVENT_TYPE_COMM_EST:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_COMM_EST);
+ break;
+
+ case MTHCA_EVENT_TYPE_SQ_DRAINED:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_SQ_DRAINED);
+ break;
+
+ case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_FATAL);
+ break;
+
+ case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_PATH_MIG_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_REQ_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_ACCESS_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_CMD:
+ mthca_cmd_event(dev,
+ be16_to_cpu(eqe->event.cmd.token),
+ eqe->event.cmd.status,
+ be64_to_cpu(eqe->event.cmd.out_param));
+ /*
+ * cmd_event() may add more commands.
+ * The card will think the queue has overflowed if
+ * we don't tell it we've been processing events.
+ */
+ set_ci = 1;
+ break;
+
+ case MTHCA_EVENT_TYPE_PORT_CHANGE:
+ port_change(dev,
+ (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
+ eqe->subtype == 0x4);
+ break;
+
+ case MTHCA_EVENT_TYPE_CQ_ERROR:
+ mthca_warn(dev, "CQ %s on CQN %08x\n",
+ eqe->event.cq_err.syndrome == 1 ?
+ "overrun" : "access violation",
+ be32_to_cpu(eqe->event.cq_err.cqn));
+ break;
+
+ case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
+ mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
+ break;
+
+ case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
+ case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
+ case MTHCA_EVENT_TYPE_ECC_DETECT:
+ default:
+ mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
+ eqe->type, eqe->subtype, eq->eqn);
+ break;
+ };
+
+ set_eqe_hw(eqe);
+ ++eq->cons_index;
+ eqes_found = 1;
+
+ if (unlikely(set_ci)) {
+ /*
+ * Conditional on hca_type is OK here because
+ * this is a rare case, not the fast path.
+ */
+ set_eq_ci(dev, eq, eq->cons_index);
+ set_ci = 0;
+ }
+ }
+
+ /*
+ * Rely on caller to set consumer index so that we don't have
+ * to test hca_type in our interrupt handling fast path.
+ */
+ return eqes_found;
+}
+
+static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+{
+ struct mthca_dev *dev = dev_ptr;
+ u32 ecr;
+ int i;
+
+ if (dev->eq_table.clr_mask)
+ writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
+
+ ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
+ if (ecr) {
+ writel(ecr, dev->eq_regs.tavor.ecr_base +
+ MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (ecr & dev->eq_table.eq[i].eqn_mask &&
+ mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+ tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
+ dev->eq_table.eq[i].cons_index);
+ tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+ }
+ }
+
+ return IRQ_RETVAL(ecr);
+}
+
+static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
+ struct pt_regs *regs)
+{
+ struct mthca_eq *eq = eq_ptr;
+ struct mthca_dev *dev = eq->dev;
+
+ mthca_eq_int(dev, eq);
+ tavor_set_eq_ci(dev, eq, eq->cons_index);
+ tavor_eq_req_not(dev, eq->eqn);
+
+ /* MSI-X vectors always belong to us */
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+{
+ struct mthca_dev *dev = dev_ptr;
+ int work = 0;
+ int i;
+
+ if (dev->eq_table.clr_mask)
+ writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+ work = 1;
+ arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
+ dev->eq_table.eq[i].cons_index);
+ }
+
+ arbel_eq_req_not(dev, dev->eq_table.arm_mask);
+
+ return IRQ_RETVAL(work);
+}
+
+static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
+ struct pt_regs *regs)
+{
+ struct mthca_eq *eq = eq_ptr;
+ struct mthca_dev *dev = eq->dev;
+
+ mthca_eq_int(dev, eq);
+ arbel_set_eq_ci(dev, eq, eq->cons_index);
+ arbel_eq_req_not(dev, eq->eqn_mask);
+
+ /* MSI-X vectors always belong to us */
+ return IRQ_HANDLED;
+}
+
+static int __devinit mthca_create_eq(struct mthca_dev *dev,
+ int nent,
+ u8 intr,
+ struct mthca_eq *eq)
+{
+ int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+ u64 *dma_list = NULL;
+ dma_addr_t t;
+ void *mailbox = NULL;
+ struct mthca_eq_context *eq_context;
+ int err = -ENOMEM;
+ int i;
+ u8 status;
+
+ /* Make sure EQ size is aligned to a power of 2 size. */
+ for (i = 1; i < nent; i <<= 1)
+ ; /* nothing */
+ nent = i;
+
+ eq->dev = dev;
+
+ eq->page_list = kmalloc(npages * sizeof *eq->page_list,
+ GFP_KERNEL);
+ if (!eq->page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i)
+ eq->page_list[i].buf = NULL;
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_out_free;
+
+ mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox)
+ goto err_out_free;
+ eq_context = MAILBOX_ALIGN(mailbox);
+
+ for (i = 0; i < npages; ++i) {
+ eq->page_list[i].buf = pci_alloc_consistent(dev->pdev,
+ PAGE_SIZE, &t);
+ if (!eq->page_list[i].buf)
+ goto err_out_free;
+
+ dma_list[i] = t;
+ pci_unmap_addr_set(&eq->page_list[i], mapping, t);
+
+ memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+ }
+
+ for (i = 0; i < nent; ++i)
+ set_eqe_hw(get_eqe(eq, i));
+
+ eq->eqn = mthca_alloc(&dev->eq_table.alloc);
+ if (eq->eqn == -1)
+ goto err_out_free;
+
+ err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
+ dma_list, PAGE_SHIFT, npages,
+ 0, npages * PAGE_SIZE,
+ MTHCA_MPT_FLAG_LOCAL_WRITE |
+ MTHCA_MPT_FLAG_LOCAL_READ,
+ &eq->mr);
+ if (err)
+ goto err_out_free_eq;
+
+ eq->nent = nent;
+
+ memset(eq_context, 0, sizeof *eq_context);
+ eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK |
+ MTHCA_EQ_OWNER_HW |
+ MTHCA_EQ_STATE_ARMED |
+ MTHCA_EQ_FLAG_TR);
+ if (dev->hca_type == ARBEL_NATIVE)
+ eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
+
+ eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ if (dev->hca_type == ARBEL_NATIVE) {
+ eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
+ } else {
+ eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
+ eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num);
+ }
+ eq_context->intr = intr;
+ eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
+
+ err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
+ goto err_out_free_mr;
+ }
+ if (status) {
+ mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_free_mr;
+ }
+
+ kfree(dma_list);
+ kfree(mailbox);
+
+ eq->eqn_mask = swab32(1 << eq->eqn);
+ eq->cons_index = 0;
+
+ dev->eq_table.arm_mask |= eq->eqn_mask;
+
+ mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
+ eq->eqn, nent);
+
+ return err;
+
+ err_out_free_mr:
+ mthca_free_mr(dev, &eq->mr);
+
+ err_out_free_eq:
+ mthca_free(&dev->eq_table.alloc, eq->eqn);
+
+ err_out_free:
+ for (i = 0; i < npages; ++i)
+ if (eq->page_list[i].buf)
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ pci_unmap_addr(&eq->page_list[i],
+ mapping));
+
+ kfree(eq->page_list);
+ kfree(dma_list);
+ kfree(mailbox);
+
+ err_out:
+ return err;
+}
+
+static void mthca_free_eq(struct mthca_dev *dev,
+ struct mthca_eq *eq)
+{
+ void *mailbox = NULL;
+ int err;
+ u8 status;
+ int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+ int i;
+
+ mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox)
+ return;
+
+ err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox),
+ eq->eqn, &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
+ if (status)
+ mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n",
+ status);
+
+ dev->eq_table.arm_mask &= ~eq->eqn_mask;
+
+ if (0) {
+ mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
+ for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
+ if (i % 4 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4));
+ if ((i + 1) % 4 == 0)
+ printk("\n");
+ }
+ }
+
+ mthca_free_mr(dev, &eq->mr);
+ for (i = 0; i < npages; ++i)
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ pci_unmap_addr(&eq->page_list[i], mapping));
+
+ kfree(eq->page_list);
+ kfree(mailbox);
+}
+
+static void mthca_free_irqs(struct mthca_dev *dev)
+{
+ int i;
+
+ if (dev->eq_table.have_irq)
+ free_irq(dev->pdev->irq, dev);
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (dev->eq_table.eq[i].have_irq)
+ free_irq(dev->eq_table.eq[i].msi_x_vector,
+ dev->eq_table.eq + i);
+}
+
+static int __devinit mthca_map_reg(struct mthca_dev *dev,
+ unsigned long offset, unsigned long size,
+ void __iomem **map)
+{
+ unsigned long base = pci_resource_start(dev->pdev, 0);
+
+ if (!request_mem_region(base + offset, size, DRV_NAME))
+ return -EBUSY;
+
+ *map = ioremap(base + offset, size);
+ if (!*map) {
+ release_mem_region(base + offset, size);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
+ unsigned long size, void __iomem *map)
+{
+ unsigned long base = pci_resource_start(dev->pdev, 0);
+
+ release_mem_region(base + offset, size);
+ iounmap(map);
+}
+
+static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
+{
+ unsigned long mthca_base;
+
+ mthca_base = pci_resource_start(dev->pdev, 0);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ /*
+ * We assume that the EQ arm and EQ set CI registers
+ * fall within the first BAR. We can't trust the
+ * values firmware gives us, since those addresses are
+ * valid on the HCA's side of the PCI bus but not
+ * necessarily the host side.
+ */
+ if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+ &dev->clr_base)) {
+ mthca_err(dev, "Couldn't map interrupt clear register, "
+ "aborting.\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Add 4 because we limit ourselves to EQs 0 ... 31,
+ * so we only need the low word of the register.
+ */
+ if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_arm_base) + 4, 4,
+ &dev->eq_regs.arbel.eq_arm)) {
+ mthca_err(dev, "Couldn't map interrupt clear register, "
+ "aborting.\n");
+ mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+ dev->clr_base);
+ return -ENOMEM;
+ }
+
+ if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_set_ci_base,
+ MTHCA_EQ_SET_CI_SIZE,
+ &dev->eq_regs.arbel.eq_set_ci_base)) {
+ mthca_err(dev, "Couldn't map interrupt clear register, "
+ "aborting.\n");
+ mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_arm_base) + 4, 4,
+ dev->eq_regs.arbel.eq_arm);
+ mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+ dev->clr_base);
+ return -ENOMEM;
+ }
+ } else {
+ if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+ &dev->clr_base)) {
+ mthca_err(dev, "Couldn't map interrupt clear register, "
+ "aborting.\n");
+ return -ENOMEM;
+ }
+
+ if (mthca_map_reg(dev, MTHCA_ECR_BASE,
+ MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
+ &dev->eq_regs.tavor.ecr_base)) {
+ mthca_err(dev, "Couldn't map ecr register, "
+ "aborting.\n");
+ mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+ dev->clr_base);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+
+}
+
+static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
+{
+ if (dev->hca_type == ARBEL_NATIVE) {
+ mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_set_ci_base,
+ MTHCA_EQ_SET_CI_SIZE,
+ dev->eq_regs.arbel.eq_set_ci_base);
+ mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_arm_base) + 4, 4,
+ dev->eq_regs.arbel.eq_arm);
+ mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+ dev->clr_base);
+ } else {
+ mthca_unmap_reg(dev, MTHCA_ECR_BASE,
+ MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
+ dev->eq_regs.tavor.ecr_base);
+ mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+ dev->clr_base);
+ }
+}
+
+int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
+{
+ int ret;
+ u8 status;
+
+ /*
+ * We assume that mapping one page is enough for the whole EQ
+ * context table. This is fine with all current HCAs, because
+ * we only use 32 EQs and each EQ uses 32 bytes of context
+ * memory, or 1 KB total.
+ */
+ dev->eq_table.icm_virt = icm_virt;
+ dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
+ if (!dev->eq_table.icm_page)
+ return -ENOMEM;
+ dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+ __free_page(dev->eq_table.icm_page);
+ return -ENOMEM;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_page(dev->eq_table.icm_page);
+ }
+
+ return ret;
+}
+
+void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
+{
+ u8 status;
+
+ mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
+ pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_page(dev->eq_table.icm_page);
+}
+
+int __devinit mthca_init_eq_table(struct mthca_dev *dev)
+{
+ int err;
+ u8 status;
+ u8 intr;
+ int i;
+
+ err = mthca_alloc_init(&dev->eq_table.alloc,
+ dev->limits.num_eqs,
+ dev->limits.num_eqs - 1,
+ dev->limits.reserved_eqs);
+ if (err)
+ return err;
+
+ err = mthca_map_eq_regs(dev);
+ if (err)
+ goto err_out_free;
+
+ if (dev->mthca_flags & MTHCA_FLAG_MSI ||
+ dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ dev->eq_table.clr_mask = 0;
+ } else {
+ dev->eq_table.clr_mask =
+ swab32(1 << (dev->eq_table.inta_pin & 31));
+ dev->eq_table.clr_int = dev->clr_base +
+ (dev->eq_table.inta_pin < 31 ? 4 : 0);
+ }
+
+ dev->eq_table.arm_mask = 0;
+
+ intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ?
+ 128 : dev->eq_table.inta_pin;
+
+ err = mthca_create_eq(dev, dev->limits.num_cqs,
+ (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
+ &dev->eq_table.eq[MTHCA_EQ_COMP]);
+ if (err)
+ goto err_out_unmap;
+
+ err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE,
+ (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
+ &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
+ if (err)
+ goto err_out_comp;
+
+ err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE,
+ (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
+ &dev->eq_table.eq[MTHCA_EQ_CMD]);
+ if (err)
+ goto err_out_async;
+
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ static const char *eq_name[] = {
+ [MTHCA_EQ_COMP] = DRV_NAME " (comp)",
+ [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
+ [MTHCA_EQ_CMD] = DRV_NAME " (cmd)"
+ };
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+ err = request_irq(dev->eq_table.eq[i].msi_x_vector,
+ dev->hca_type == ARBEL_NATIVE ?
+ mthca_arbel_msi_x_interrupt :
+ mthca_tavor_msi_x_interrupt,
+ 0, eq_name[i], dev->eq_table.eq + i);
+ if (err)
+ goto err_out_cmd;
+ dev->eq_table.eq[i].have_irq = 1;
+ }
+ } else {
+ err = request_irq(dev->pdev->irq,
+ dev->hca_type == ARBEL_NATIVE ?
+ mthca_arbel_interrupt :
+ mthca_tavor_interrupt,
+ SA_SHIRQ, DRV_NAME, dev);
+ if (err)
+ goto err_out_cmd;
+ dev->eq_table.have_irq = 1;
+ }
+
+ err = mthca_MAP_EQ(dev, async_mask(dev),
+ 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ if (err)
+ mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
+ dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
+ if (status)
+ mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
+ dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
+
+ err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
+ 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+ if (err)
+ mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
+ dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
+ if (status)
+ mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
+ dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
+
+ for (i = 0; i < MTHCA_EQ_CMD; ++i)
+ if (dev->hca_type == ARBEL_NATIVE)
+ arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
+ else
+ tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+
+ return 0;
+
+err_out_cmd:
+ mthca_free_irqs(dev);
+ mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
+
+err_out_async:
+ mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
+
+err_out_comp:
+ mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
+
+err_out_unmap:
+ mthca_unmap_eq_regs(dev);
+
+err_out_free:
+ mthca_alloc_cleanup(&dev->eq_table.alloc);
+ return err;
+}
+
+void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev)
+{
+ u8 status;
+ int i;
+
+ mthca_free_irqs(dev);
+
+ mthca_MAP_EQ(dev, async_mask(dev),
+ 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
+ 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ mthca_free_eq(dev, &dev->eq_table.eq[i]);
+
+ mthca_unmap_eq_regs(dev);
+
+ mthca_alloc_cleanup(&dev->eq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
new file mode 100644
index 000000000000..7df223642015
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <ib_verbs.h>
+#include <ib_mad.h>
+#include <ib_smi.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+enum {
+ MTHCA_VENDOR_CLASS1 = 0x9,
+ MTHCA_VENDOR_CLASS2 = 0xa
+};
+
+struct mthca_trap_mad {
+ struct ib_mad *mad;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+static void update_sm_ah(struct mthca_dev *dev,
+ u8 port_num, u16 lid, u8 sl)
+{
+ struct ib_ah *new_ah;
+ struct ib_ah_attr ah_attr;
+ unsigned long flags;
+
+ if (!dev->send_agent[port_num - 1][0])
+ return;
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = lid;
+ ah_attr.sl = sl;
+ ah_attr.port_num = port_num;
+
+ new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
+ if (IS_ERR(new_ah))
+ return;
+
+ spin_lock_irqsave(&dev->sm_lock, flags);
+ if (dev->sm_ah[port_num - 1])
+ ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ dev->sm_ah[port_num - 1] = new_ah;
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
+}
+
+/*
+ * Snoop SM MADs for port info and P_Key table sets, so we can
+ * synthesize LID change and P_Key change events.
+ */
+static void smp_snoop(struct ib_device *ibdev,
+ u8 port_num,
+ struct ib_mad *mad)
+{
+ struct ib_event event;
+
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+ update_sm_ah(to_mdev(ibdev), port_num,
+ be16_to_cpup((__be16 *) (mad->data + 58)),
+ (*(u8 *) (mad->data + 76)) & 0xf);
+
+ event.device = ibdev;
+ event.event = IB_EVENT_LID_CHANGE;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+ }
+
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ event.device = ibdev;
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+ }
+ }
+}
+
+static void forward_trap(struct mthca_dev *dev,
+ u8 port_num,
+ struct ib_mad *mad)
+{
+ int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ struct mthca_trap_mad *tmad;
+ struct ib_sge gather_list;
+ struct ib_send_wr *bad_wr, wr = {
+ .opcode = IB_WR_SEND,
+ .sg_list = &gather_list,
+ .num_sge = 1,
+ .send_flags = IB_SEND_SIGNALED,
+ .wr = {
+ .ud = {
+ .remote_qpn = qpn,
+ .remote_qkey = qpn ? IB_QP1_QKEY : 0,
+ .timeout_ms = 0
+ }
+ }
+ };
+ struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
+ int ret;
+ unsigned long flags;
+
+ if (agent) {
+ tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
+ if (!tmad)
+ return;
+
+ tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
+ if (!tmad->mad) {
+ kfree(tmad);
+ return;
+ }
+
+ memcpy(tmad->mad, mad, sizeof *mad);
+
+ wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
+ wr.wr_id = (unsigned long) tmad;
+
+ gather_list.addr = dma_map_single(agent->device->dma_device,
+ tmad->mad,
+ sizeof *tmad->mad,
+ DMA_TO_DEVICE);
+ gather_list.length = sizeof *tmad->mad;
+ gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
+ pci_unmap_addr_set(tmad, mapping, gather_list.addr);
+
+ /*
+ * We rely here on the fact that MLX QPs don't use the
+ * address handle after the send is posted (this is
+ * wrong following the IB spec strictly, but we know
+ * it's OK for our devices).
+ */
+ spin_lock_irqsave(&dev->sm_lock, flags);
+ wr.wr.ud.ah = dev->sm_ah[port_num - 1];
+ if (wr.wr.ud.ah)
+ ret = ib_post_send_mad(agent, &wr, &bad_wr);
+ else
+ ret = -EINVAL;
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
+
+ if (ret) {
+ dma_unmap_single(agent->device->dma_device,
+ pci_unmap_addr(tmad, mapping),
+ sizeof *tmad->mad,
+ DMA_TO_DEVICE);
+ kfree(tmad->mad);
+ kfree(tmad);
+ }
+ }
+}
+
+int mthca_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ int err;
+ u8 status;
+ u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE;
+
+ /* Forward locally generated traps to the SM */
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
+ slid == 0) {
+ forward_trap(to_mdev(ibdev), port_num, in_mad);
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ }
+
+ /*
+ * Only handle SM gets, sets and trap represses for SM class
+ *
+ * Only handle PMA and Mellanox vendor-specific class gets and
+ * sets for other classes.
+ */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /*
+ * Don't process SMInfo queries or vendor-specific
+ * MADs -- the SMA can't handle them.
+ */
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+ ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+ IB_SMP_ATTR_VENDOR_MASK))
+ return IB_MAD_RESULT_SUCCESS;
+ } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
+ in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } else
+ return IB_MAD_RESULT_SUCCESS;
+
+ err = mthca_MAD_IFC(to_mdev(ibdev),
+ mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY,
+ port_num, in_wc, in_grh, in_mad, out_mad,
+ &status);
+ if (err) {
+ mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");
+ return IB_MAD_RESULT_FAILURE;
+ }
+ if (status == MTHCA_CMD_STAT_BAD_PKT)
+ return IB_MAD_RESULT_SUCCESS;
+ if (status) {
+ mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n",
+ status);
+ return IB_MAD_RESULT_FAILURE;
+ }
+
+ if (!out_mad->mad_hdr.status)
+ smp_snoop(ibdev, port_num, in_mad);
+
+ /* set return bit in status of directed route responses */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct mthca_trap_mad *tmad =
+ (void *) (unsigned long) mad_send_wc->wr_id;
+
+ dma_unmap_single(agent->device->dma_device,
+ pci_unmap_addr(tmad, mapping),
+ sizeof *tmad->mad,
+ DMA_TO_DEVICE);
+ kfree(tmad->mad);
+ kfree(tmad);
+}
+
+int mthca_create_agents(struct mthca_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+
+ spin_lock_init(&dev->sm_lock);
+
+ for (p = 0; p < dev->limits.num_ports; ++p)
+ for (q = 0; q <= 1; ++q) {
+ agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
+ q ? IB_QPT_GSI : IB_QPT_SMI,
+ NULL, 0, send_handler,
+ NULL, NULL);
+ if (IS_ERR(agent))
+ goto err;
+ dev->send_agent[p][q] = agent;
+ }
+
+ return 0;
+
+err:
+ for (p = 0; p < dev->limits.num_ports; ++p)
+ for (q = 0; q <= 1; ++q)
+ if (dev->send_agent[p][q])
+ ib_unregister_mad_agent(dev->send_agent[p][q]);
+
+ return PTR_ERR(agent);
+}
+
+void mthca_free_agents(struct mthca_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+
+ for (p = 0; p < dev->limits.num_ports; ++p) {
+ for (q = 0; q <= 1; ++q) {
+ agent = dev->send_agent[p][q];
+ dev->send_agent[p][q] = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+
+ if (dev->sm_ah[p])
+ ib_destroy_ah(dev->sm_ah[p]);
+ }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
new file mode 100644
index 000000000000..9e782bc1c38d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -0,0 +1,1123 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#include "mthca_dev.h"
+#include "mthca_config_reg.h"
+#include "mthca_cmd.h"
+#include "mthca_profile.h"
+#include "mthca_memfree.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+#ifdef CONFIG_PCI_MSI
+
+static int msi_x = 0;
+module_param(msi_x, int, 0444);
+MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
+
+static int msi = 0;
+module_param(msi, int, 0444);
+MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
+
+#else /* CONFIG_PCI_MSI */
+
+#define msi_x (0)
+#define msi (0)
+
+#endif /* CONFIG_PCI_MSI */
+
+static const char mthca_version[] __devinitdata =
+ "ib_mthca: Mellanox InfiniBand HCA driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static struct mthca_profile default_profile = {
+ .num_qp = 1 << 16,
+ .rdb_per_qp = 4,
+ .num_cq = 1 << 16,
+ .num_mcg = 1 << 13,
+ .num_mpt = 1 << 17,
+ .num_mtt = 1 << 20,
+ .num_udav = 1 << 15, /* Tavor only */
+ .uarc_size = 1 << 18, /* Arbel only */
+};
+
+static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
+{
+ int cap;
+ u16 val;
+
+ /* First try to max out Read Byte Count */
+ cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
+ if (cap) {
+ if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) {
+ mthca_err(mdev, "Couldn't read PCI-X command register, "
+ "aborting.\n");
+ return -ENODEV;
+ }
+ val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
+ if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
+ mthca_err(mdev, "Couldn't write PCI-X command register, "
+ "aborting.\n");
+ return -ENODEV;
+ }
+ } else if (mdev->hca_type == TAVOR)
+ mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
+
+ cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
+ if (cap) {
+ if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) {
+ mthca_err(mdev, "Couldn't read PCI Express device control "
+ "register, aborting.\n");
+ return -ENODEV;
+ }
+ val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
+ if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
+ mthca_err(mdev, "Couldn't write PCI Express device control "
+ "register, aborting.\n");
+ return -ENODEV;
+ }
+ } else if (mdev->hca_type == ARBEL_NATIVE ||
+ mdev->hca_type == ARBEL_COMPAT)
+ mthca_info(mdev, "No PCI Express capability, "
+ "not setting Max Read Request Size.\n");
+
+ return 0;
+}
+
+static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
+{
+ int err;
+ u8 status;
+
+ err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+ if (dev_lim->min_page_sz > PAGE_SIZE) {
+ mthca_err(mdev, "HCA minimum page size of %d bigger than "
+ "kernel PAGE_SIZE of %ld, aborting.\n",
+ dev_lim->min_page_sz, PAGE_SIZE);
+ return -ENODEV;
+ }
+ if (dev_lim->num_ports > MTHCA_MAX_PORTS) {
+ mthca_err(mdev, "HCA has %d ports, but we only support %d, "
+ "aborting.\n",
+ dev_lim->num_ports, MTHCA_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ mdev->limits.num_ports = dev_lim->num_ports;
+ mdev->limits.vl_cap = dev_lim->max_vl;
+ mdev->limits.mtu_cap = dev_lim->max_mtu;
+ mdev->limits.gid_table_len = dev_lim->max_gids;
+ mdev->limits.pkey_table_len = dev_lim->max_pkeys;
+ mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
+ mdev->limits.max_sg = dev_lim->max_sg;
+ mdev->limits.reserved_qps = dev_lim->reserved_qps;
+ mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
+ mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
+ mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
+ mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
+ mdev->limits.reserved_mrws = dev_lim->reserved_mrws;
+ mdev->limits.reserved_uars = dev_lim->reserved_uars;
+ mdev->limits.reserved_pds = dev_lim->reserved_pds;
+
+ /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
+ May be doable since hardware supports it for SRQ.
+
+ IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver.
+
+ IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not
+ supported by driver. */
+ mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
+ IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR)
+ mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR)
+ mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI)
+ mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG)
+ mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE)
+ mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
+ mdev->mthca_flags |= MTHCA_FLAG_SRQ;
+
+ return 0;
+}
+
+static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
+{
+ u8 status;
+ int err;
+ struct mthca_dev_lim dev_lim;
+ struct mthca_profile profile;
+ struct mthca_init_hca_param init_hca;
+ struct mthca_adapter adapter;
+
+ err = mthca_SYS_EN(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "SYS_EN command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "SYS_EN returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_QUERY_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ goto err_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_disable;
+ }
+ err = mthca_QUERY_DDR(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
+ goto err_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_disable;
+ }
+
+ err = mthca_dev_lim(mdev, &dev_lim);
+
+ profile = default_profile;
+ profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
+ profile.uarc_size = 0;
+
+ err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+ if (err < 0)
+ goto err_disable;
+
+ err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ if (err) {
+ mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
+ goto err_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_disable;
+ }
+
+ err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
+ goto err_close;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_close;
+ }
+
+ mdev->eq_table.inta_pin = adapter.inta_pin;
+ mdev->rev_id = adapter.revision_id;
+
+ return 0;
+
+err_close:
+ mthca_CLOSE_HCA(mdev, 0, &status);
+
+err_disable:
+ mthca_SYS_DIS(mdev, &status);
+
+ return err;
+}
+
+static int __devinit mthca_load_fw(struct mthca_dev *mdev)
+{
+ u8 status;
+ int err;
+
+ /* FIXME: use HCA-attached memory for FW if present */
+
+ mdev->fw.arbel.fw_icm =
+ mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
+ GFP_HIGHUSER | __GFP_NOWARN);
+ if (!mdev->fw.arbel.fw_icm) {
+ mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
+ return -ENOMEM;
+ }
+
+ err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status);
+ if (err) {
+ mthca_err(mdev, "MAP_FA command failed, aborting.\n");
+ goto err_free;
+ }
+ if (status) {
+ mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_free;
+ }
+ err = mthca_RUN_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "RUN_FW command failed, aborting.\n");
+ goto err_unmap_fa;
+ }
+ if (status) {
+ mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_unmap_fa;
+ }
+
+ return 0;
+
+err_unmap_fa:
+ mthca_UNMAP_FA(mdev, &status);
+
+err_free:
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ return err;
+}
+
+static int __devinit mthca_init_icm(struct mthca_dev *mdev,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca,
+ u64 icm_size)
+{
+ u64 aux_pages;
+ u8 status;
+ int err;
+
+ err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status);
+ if (err) {
+ mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n",
+ (unsigned long long) icm_size >> 10,
+ (unsigned long long) aux_pages << 2);
+
+ mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
+ GFP_HIGHUSER | __GFP_NOWARN);
+ if (!mdev->fw.arbel.aux_icm) {
+ mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
+ return -ENOMEM;
+ }
+
+ err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status);
+ if (err) {
+ mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n");
+ goto err_free_aux;
+ }
+ if (status) {
+ mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_aux;
+ }
+
+ err = mthca_map_eq_icm(mdev, init_hca->eqc_base);
+ if (err) {
+ mthca_err(mdev, "Failed to map EQ context memory, aborting.\n");
+ goto err_unmap_aux;
+ }
+
+ mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
+ init_hca->mtt_seg_sz,
+ mdev->limits.num_mtt_segs,
+ mdev->limits.reserved_mtts, 1);
+ if (!mdev->mr_table.mtt_table) {
+ mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_eq;
+ }
+
+ mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
+ dev_lim->mpt_entry_sz,
+ mdev->limits.num_mpts,
+ mdev->limits.reserved_mrws, 1);
+ if (!mdev->mr_table.mpt_table) {
+ mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_mtt;
+ }
+
+ mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
+ dev_lim->qpc_entry_sz,
+ mdev->limits.num_qps,
+ mdev->limits.reserved_qps, 0);
+ if (!mdev->qp_table.qp_table) {
+ mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_mpt;
+ }
+
+ mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
+ dev_lim->eqpc_entry_sz,
+ mdev->limits.num_qps,
+ mdev->limits.reserved_qps, 0);
+ if (!mdev->qp_table.eqp_table) {
+ mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_qp;
+ }
+
+ mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
+ dev_lim->cqc_entry_sz,
+ mdev->limits.num_cqs,
+ mdev->limits.reserved_cqs, 0);
+ if (!mdev->cq_table.table) {
+ mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_eqp;
+ }
+
+ /*
+ * It's not strictly required, but for simplicity just map the
+ * whole multicast group table now. The table isn't very big
+ * and it's a lot easier than trying to track ref counts.
+ */
+ mdev->mcg_table.table = mthca_alloc_icm_table(mdev, init_hca->mc_base,
+ MTHCA_MGM_ENTRY_SIZE,
+ mdev->limits.num_mgms +
+ mdev->limits.num_amgms,
+ mdev->limits.num_mgms +
+ mdev->limits.num_amgms,
+ 0);
+ if (!mdev->mcg_table.table) {
+ mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_cq;
+ }
+
+ return 0;
+
+err_unmap_cq:
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+
+err_unmap_eqp:
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+
+err_unmap_qp:
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+
+err_unmap_mpt:
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+
+err_unmap_mtt:
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+
+err_unmap_eq:
+ mthca_unmap_eq_icm(mdev);
+
+err_unmap_aux:
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+
+err_free_aux:
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
+ return err;
+}
+
+static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
+{
+ struct mthca_dev_lim dev_lim;
+ struct mthca_profile profile;
+ struct mthca_init_hca_param init_hca;
+ struct mthca_adapter adapter;
+ u64 icm_size;
+ u8 status;
+ int err;
+
+ err = mthca_QUERY_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_ENABLE_LAM(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
+ return err;
+ }
+ if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
+ mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
+ mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
+ } else if (status) {
+ mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_load_fw(mdev);
+ if (err) {
+ mthca_err(mdev, "Failed to start FW, aborting.\n");
+ goto err_disable;
+ }
+
+ err = mthca_dev_lim(mdev, &dev_lim);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ goto err_stop_fw;
+ }
+
+ profile = default_profile;
+ profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
+ profile.num_udav = 0;
+
+ icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+ if ((int) icm_size < 0) {
+ err = icm_size;
+ goto err_stop_fw;
+ }
+
+ err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size);
+ if (err)
+ goto err_stop_fw;
+
+ err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ if (err) {
+ mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ if (status) {
+ mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_icm;
+ }
+
+ err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_icm;
+ }
+
+ mdev->eq_table.inta_pin = adapter.inta_pin;
+ mdev->rev_id = adapter.revision_id;
+
+ return 0;
+
+err_free_icm:
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
+err_stop_fw:
+ mthca_UNMAP_FA(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+
+err_disable:
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev, &status);
+
+ return err;
+}
+
+static int __devinit mthca_init_hca(struct mthca_dev *mdev)
+{
+ if (mdev->hca_type == ARBEL_NATIVE)
+ return mthca_init_arbel(mdev);
+ else
+ return mthca_init_tavor(mdev);
+}
+
+static int __devinit mthca_setup_hca(struct mthca_dev *dev)
+{
+ int err;
+ u8 status;
+
+ MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
+
+ err = mthca_init_uar_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "user access region table, aborting.\n");
+ return err;
+ }
+
+ err = mthca_uar_alloc(dev, &dev->driver_uar);
+ if (err) {
+ mthca_err(dev, "Failed to allocate driver access region, "
+ "aborting.\n");
+ goto err_uar_table_free;
+ }
+
+ dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!dev->kar) {
+ mthca_err(dev, "Couldn't map kernel access region, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_uar_free;
+ }
+
+ err = mthca_init_pd_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "protection domain table, aborting.\n");
+ goto err_kar_unmap;
+ }
+
+ err = mthca_init_mr_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "memory region table, aborting.\n");
+ goto err_pd_table_free;
+ }
+
+ err = mthca_pd_alloc(dev, &dev->driver_pd);
+ if (err) {
+ mthca_err(dev, "Failed to create driver PD, "
+ "aborting.\n");
+ goto err_mr_table_free;
+ }
+
+ err = mthca_init_eq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "event queue table, aborting.\n");
+ goto err_pd_free;
+ }
+
+ err = mthca_cmd_use_events(dev);
+ if (err) {
+ mthca_err(dev, "Failed to switch to event-driven "
+ "firmware commands, aborting.\n");
+ goto err_eq_table_free;
+ }
+
+ err = mthca_NOP(dev, &status);
+ if (err || status) {
+ mthca_err(dev, "NOP command failed to generate interrupt, aborting.\n");
+ if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
+ mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
+ else
+ mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+
+ goto err_cmd_poll;
+ }
+
+ mthca_dbg(dev, "NOP command IRQ test passed\n");
+
+ err = mthca_init_cq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "completion queue table, aborting.\n");
+ goto err_cmd_poll;
+ }
+
+ err = mthca_init_qp_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "queue pair table, aborting.\n");
+ goto err_cq_table_free;
+ }
+
+ err = mthca_init_av_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "address vector table, aborting.\n");
+ goto err_qp_table_free;
+ }
+
+ err = mthca_init_mcg_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "multicast group table, aborting.\n");
+ goto err_av_table_free;
+ }
+
+ return 0;
+
+err_av_table_free:
+ mthca_cleanup_av_table(dev);
+
+err_qp_table_free:
+ mthca_cleanup_qp_table(dev);
+
+err_cq_table_free:
+ mthca_cleanup_cq_table(dev);
+
+err_cmd_poll:
+ mthca_cmd_use_polling(dev);
+
+err_eq_table_free:
+ mthca_cleanup_eq_table(dev);
+
+err_pd_free:
+ mthca_pd_free(dev, &dev->driver_pd);
+
+err_mr_table_free:
+ mthca_cleanup_mr_table(dev);
+
+err_pd_table_free:
+ mthca_cleanup_pd_table(dev);
+
+err_kar_unmap:
+ iounmap(dev->kar);
+
+err_uar_free:
+ mthca_uar_free(dev, &dev->driver_uar);
+
+err_uar_table_free:
+ mthca_cleanup_uar_table(dev);
+ return err;
+}
+
+static int __devinit mthca_request_regions(struct pci_dev *pdev,
+ int ddr_hidden)
+{
+ int err;
+
+ /*
+ * We can't just use pci_request_regions() because the MSI-X
+ * table is right in the middle of the first BAR. If we did
+ * pci_request_region and grab all of the first BAR, then
+ * setting up MSI-X would fail, since the PCI core wants to do
+ * request_mem_region on the MSI-X vector table.
+ *
+ * So just request what we need right now, and request any
+ * other regions we need when setting up EQs.
+ */
+ if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE, DRV_NAME))
+ return -EBUSY;
+
+ err = pci_request_region(pdev, 2, DRV_NAME);
+ if (err)
+ goto err_bar2_failed;
+
+ if (!ddr_hidden) {
+ err = pci_request_region(pdev, 4, DRV_NAME);
+ if (err)
+ goto err_bar4_failed;
+ }
+
+ return 0;
+
+err_bar4_failed:
+ pci_release_region(pdev, 2);
+
+err_bar2_failed:
+ release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
+
+ return err;
+}
+
+static void mthca_release_regions(struct pci_dev *pdev,
+ int ddr_hidden)
+{
+ if (!ddr_hidden)
+ pci_release_region(pdev, 4);
+
+ pci_release_region(pdev, 2);
+
+ release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
+}
+
+static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
+{
+ struct msix_entry entries[3];
+ int err;
+
+ entries[0].entry = 0;
+ entries[1].entry = 1;
+ entries[2].entry = 2;
+
+ err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err) {
+ if (err > 0)
+ mthca_info(mdev, "Only %d MSI-X vectors available, "
+ "not using MSI-X\n", err);
+ return err;
+ }
+
+ mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
+ mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
+ mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
+
+ return 0;
+}
+
+static void mthca_close_hca(struct mthca_dev *mdev)
+{
+ u8 status;
+
+ mthca_CLOSE_HCA(mdev, 0, &status);
+
+ if (mdev->hca_type == ARBEL_NATIVE) {
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
+ mthca_UNMAP_FA(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev, &status);
+ } else
+ mthca_SYS_DIS(mdev, &status);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ static int mthca_version_printed = 0;
+ static int mthca_memfree_warned = 0;
+ int ddr_hidden = 0;
+ int err;
+ struct mthca_dev *mdev;
+
+ if (!mthca_version_printed) {
+ printk(KERN_INFO "%s", mthca_version);
+ ++mthca_version_printed;
+ }
+
+ printk(KERN_INFO PFX "Initializing %s (%s)\n",
+ pci_pretty_name(pdev), pci_name(pdev));
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device, "
+ "aborting.\n");
+ return err;
+ }
+
+ /*
+ * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
+ * be present)
+ */
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+ pci_resource_len(pdev, 0) != 1 << 20) {
+ dev_err(&pdev->dev, "Missing DCS, aborting.");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
+ pci_resource_len(pdev, 2) != 1 << 23) {
+ dev_err(&pdev->dev, "Missing UAR, aborting.");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
+ ddr_hidden = 1;
+
+ err = mthca_request_regions(pdev, ddr_hidden);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources, "
+ "aborting.\n");
+ goto err_disable_pdev;
+ }
+
+ pci_set_master(pdev);
+
+ err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+ err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+ goto err_free_res;
+ }
+ }
+ err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
+ "consistent PCI DMA mask.\n");
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
+ "aborting.\n");
+ goto err_free_res;
+ }
+ }
+
+ mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
+ if (!mdev) {
+ dev_err(&pdev->dev, "Device struct alloc failed, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_free_res;
+ }
+
+ mdev->pdev = pdev;
+ mdev->hca_type = id->driver_data;
+
+ if (mdev->hca_type == ARBEL_NATIVE && !mthca_memfree_warned++)
+ mthca_warn(mdev, "Warning: native MT25208 mode support is incomplete. "
+ "Your HCA may not work properly.\n");
+
+ if (ddr_hidden)
+ mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mthca_reset(mdev);
+ if (err) {
+ mthca_err(mdev, "Failed to reset HCA, aborting.\n");
+ goto err_free_dev;
+ }
+
+ if (msi_x && !mthca_enable_msi_x(mdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
+ if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) &&
+ !pci_enable_msi(pdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI;
+
+ sema_init(&mdev->cmd.hcr_sem, 1);
+ sema_init(&mdev->cmd.poll_sem, 1);
+ mdev->cmd.use_events = 0;
+
+ mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
+ if (!mdev->hcr) {
+ mthca_err(mdev, "Couldn't map command register, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_free_dev;
+ }
+
+ err = mthca_tune_pci(mdev);
+ if (err)
+ goto err_iounmap;
+
+ err = mthca_init_hca(mdev);
+ if (err)
+ goto err_iounmap;
+
+ err = mthca_setup_hca(mdev);
+ if (err)
+ goto err_close;
+
+ err = mthca_register_device(mdev);
+ if (err)
+ goto err_cleanup;
+
+ err = mthca_create_agents(mdev);
+ if (err)
+ goto err_unregister;
+
+ pci_set_drvdata(pdev, mdev);
+
+ return 0;
+
+err_unregister:
+ mthca_unregister_device(mdev);
+
+err_cleanup:
+ mthca_cleanup_mcg_table(mdev);
+ mthca_cleanup_av_table(mdev);
+ mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_cq_table(mdev);
+ mthca_cmd_use_polling(mdev);
+ mthca_cleanup_eq_table(mdev);
+
+ mthca_pd_free(mdev, &mdev->driver_pd);
+
+ mthca_cleanup_mr_table(mdev);
+ mthca_cleanup_pd_table(mdev);
+ mthca_cleanup_uar_table(mdev);
+
+err_close:
+ mthca_close_hca(mdev);
+
+err_iounmap:
+ iounmap(mdev->hcr);
+
+err_free_dev:
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI)
+ pci_disable_msi(pdev);
+
+ ib_dealloc_device(&mdev->ib_dev);
+
+err_free_res:
+ mthca_release_regions(pdev, ddr_hidden);
+
+err_disable_pdev:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+ struct mthca_dev *mdev = pci_get_drvdata(pdev);
+ u8 status;
+ int p;
+
+ if (mdev) {
+ mthca_free_agents(mdev);
+ mthca_unregister_device(mdev);
+
+ for (p = 1; p <= mdev->limits.num_ports; ++p)
+ mthca_CLOSE_IB(mdev, p, &status);
+
+ mthca_cleanup_mcg_table(mdev);
+ mthca_cleanup_av_table(mdev);
+ mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_cq_table(mdev);
+ mthca_cmd_use_polling(mdev);
+ mthca_cleanup_eq_table(mdev);
+
+ mthca_pd_free(mdev, &mdev->driver_pd);
+
+ mthca_cleanup_mr_table(mdev);
+ mthca_cleanup_pd_table(mdev);
+
+ iounmap(mdev->kar);
+ mthca_uar_free(mdev, &mdev->driver_uar);
+ mthca_cleanup_uar_table(mdev);
+
+ mthca_close_hca(mdev);
+
+ iounmap(mdev->hcr);
+
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI)
+ pci_disable_msi(pdev);
+
+ ib_dealloc_device(&mdev->ib_dev);
+ mthca_release_regions(pdev, mdev->mthca_flags &
+ MTHCA_FLAG_DDR_HIDDEN);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ }
+}
+
+static struct pci_device_id mthca_pci_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
+ .driver_data = TAVOR },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR),
+ .driver_data = TAVOR },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
+ .driver_data = ARBEL_COMPAT },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
+ .driver_data = ARBEL_COMPAT },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL),
+ .driver_data = ARBEL_NATIVE },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL),
+ .driver_data = ARBEL_NATIVE },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mthca_pci_table);
+
+static struct pci_driver mthca_driver = {
+ .name = "ib_mthca",
+ .id_table = mthca_pci_table,
+ .probe = mthca_init_one,
+ .remove = __devexit_p(mthca_remove_one)
+};
+
+static int __init mthca_init(void)
+{
+ int ret;
+
+ ret = pci_register_driver(&mthca_driver);
+ return ret < 0 ? ret : 0;
+}
+
+static void __exit mthca_cleanup(void)
+{
+ pci_unregister_driver(&mthca_driver);
+}
+
+module_init(mthca_init);
+module_exit(mthca_cleanup);
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
new file mode 100644
index 000000000000..70a6553a588e
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/init.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+enum {
+ MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
+};
+
+struct mthca_mgm {
+ u32 next_gid_index;
+ u32 reserved[3];
+ u8 gid[16];
+ u32 qp[MTHCA_QP_PER_MGM];
+};
+
+static const u8 zero_gid[16]; /* automatically initialized to 0 */
+
+/*
+ * Caller must hold MCG table semaphore. gid and mgm parameters must
+ * be properly aligned for command interface.
+ *
+ * Returns 0 unless a firmware command error occurs.
+ *
+ * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
+ * and *mgm holds MGM entry.
+ *
+ * if GID is found in AMGM, *index = index in AMGM, *prev = index of
+ * previous entry in hash chain and *mgm holds AMGM entry.
+ *
+ * If no AMGM exists for given gid, *index = -1, *prev = index of last
+ * entry in hash chain and *mgm holds end of hash chain.
+ */
+static int find_mgm(struct mthca_dev *dev,
+ u8 *gid, struct mthca_mgm *mgm,
+ u16 *hash, int *prev, int *index)
+{
+ void *mailbox;
+ u8 *mgid;
+ int err;
+ u8 status;
+
+ mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
+ if (!mailbox)
+ return -ENOMEM;
+ mgid = MAILBOX_ALIGN(mailbox);
+
+ memcpy(mgid, gid, 16);
+
+ err = mthca_MGID_HASH(dev, mgid, hash, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "MGID_HASH returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (0)
+ mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
+ "%04x:%04x:%04x:%04x is %04x\n",
+ be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]),
+ be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]),
+ be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]),
+ be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]),
+ *hash);
+
+ *index = *hash;
+ *prev = -1;
+
+ do {
+ err = mthca_READ_MGM(dev, *index, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ return -EINVAL;
+ }
+
+ if (!memcmp(mgm->gid, zero_gid, 16)) {
+ if (*index != *hash) {
+ mthca_err(dev, "Found zero MGID in AMGM.\n");
+ err = -EINVAL;
+ }
+ goto out;
+ }
+
+ if (!memcmp(mgm->gid, gid, 16))
+ goto out;
+
+ *prev = *index;
+ *index = be32_to_cpu(mgm->next_gid_index) >> 5;
+ } while (*index);
+
+ *index = -1;
+
+ out:
+ kfree(mailbox);
+ return err;
+}
+
+int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ void *mailbox;
+ struct mthca_mgm *mgm;
+ u16 hash;
+ int index, prev;
+ int link = 0;
+ int i;
+ int err;
+ u8 status;
+
+ mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
+ if (!mailbox)
+ return -ENOMEM;
+ mgm = MAILBOX_ALIGN(mailbox);
+
+ if (down_interruptible(&dev->mcg_table.sem))
+ return -EINTR;
+
+ err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+ if (err)
+ goto out;
+
+ if (index != -1) {
+ if (!memcmp(mgm->gid, zero_gid, 16))
+ memcpy(mgm->gid, gid->raw, 16);
+ } else {
+ link = 1;
+
+ index = mthca_alloc(&dev->mcg_table.alloc);
+ if (index == -1) {
+ mthca_err(dev, "No AMGM entries left\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mthca_READ_MGM(dev, index, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(mgm->gid, gid->raw, 16);
+ mgm->next_gid_index = 0;
+ }
+
+ for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
+ if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+ mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
+ break;
+ }
+
+ if (i == MTHCA_QP_PER_MGM) {
+ mthca_err(dev, "MGM at index %x is full.\n", index);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mthca_WRITE_MGM(dev, index, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ }
+
+ if (!link)
+ goto out;
+
+ err = mthca_READ_MGM(dev, prev, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mgm->next_gid_index = cpu_to_be32(index << 5);
+
+ err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ }
+
+ out:
+ up(&dev->mcg_table.sem);
+ kfree(mailbox);
+ return err;
+}
+
+int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ void *mailbox;
+ struct mthca_mgm *mgm;
+ u16 hash;
+ int prev, index;
+ int i, loc;
+ int err;
+ u8 status;
+
+ mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
+ if (!mailbox)
+ return -ENOMEM;
+ mgm = MAILBOX_ALIGN(mailbox);
+
+ if (down_interruptible(&dev->mcg_table.sem))
+ return -EINTR;
+
+ err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+ if (err)
+ goto out;
+
+ if (index == -1) {
+ mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+ "not found\n",
+ be16_to_cpu(((u16 *) gid->raw)[0]),
+ be16_to_cpu(((u16 *) gid->raw)[1]),
+ be16_to_cpu(((u16 *) gid->raw)[2]),
+ be16_to_cpu(((u16 *) gid->raw)[3]),
+ be16_to_cpu(((u16 *) gid->raw)[4]),
+ be16_to_cpu(((u16 *) gid->raw)[5]),
+ be16_to_cpu(((u16 *) gid->raw)[6]),
+ be16_to_cpu(((u16 *) gid->raw)[7]));
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) {
+ if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31)))
+ loc = i;
+ if (!(mgm->qp[i] & cpu_to_be32(1 << 31)))
+ break;
+ }
+
+ if (loc == -1) {
+ mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mgm->qp[loc] = mgm->qp[i - 1];
+ mgm->qp[i - 1] = 0;
+
+ err = mthca_WRITE_MGM(dev, index, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (i != 1)
+ goto out;
+
+ goto out;
+
+ if (prev == -1) {
+ /* Remove entry from MGM */
+ if (be32_to_cpu(mgm->next_gid_index) >> 5) {
+ err = mthca_READ_MGM(dev,
+ be32_to_cpu(mgm->next_gid_index) >> 5,
+ mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n",
+ status);
+ err = -EINVAL;
+ goto out;
+ }
+ } else
+ memset(mgm->gid, 0, 16);
+
+ err = mthca_WRITE_MGM(dev, index, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+ } else {
+ /* Remove entry from AMGM */
+ index = be32_to_cpu(mgm->next_gid_index) >> 5;
+ err = mthca_READ_MGM(dev, prev, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mgm->next_gid_index = cpu_to_be32(index << 5);
+
+ err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ out:
+ up(&dev->mcg_table.sem);
+ kfree(mailbox);
+ return err;
+}
+
+int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
+{
+ int err;
+
+ err = mthca_alloc_init(&dev->mcg_table.alloc,
+ dev->limits.num_amgms,
+ dev->limits.num_amgms - 1,
+ 0);
+ if (err)
+ return err;
+
+ init_MUTEX(&dev->mcg_table.sem);
+
+ return 0;
+}
+
+void __devexit mthca_cleanup_mcg_table(struct mthca_dev *dev)
+{
+ mthca_alloc_cleanup(&dev->mcg_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
new file mode 100644
index 000000000000..7730b5960616
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "mthca_memfree.h"
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+/*
+ * We allocate in as big chunks as we can, up to a maximum of 256 KB
+ * per chunk.
+ */
+enum {
+ MTHCA_ICM_ALLOC_SIZE = 1 << 18,
+ MTHCA_TABLE_CHUNK_SIZE = 1 << 18
+};
+
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
+{
+ struct mthca_icm_chunk *chunk, *tmp;
+ int i;
+
+ if (!icm)
+ return;
+
+ list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
+ if (chunk->nsg > 0)
+ pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ for (i = 0; i < chunk->npages; ++i)
+ __free_pages(chunk->mem[i].page,
+ get_order(chunk->mem[i].length));
+
+ kfree(chunk);
+ }
+
+ kfree(icm);
+}
+
+struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
+ unsigned int gfp_mask)
+{
+ struct mthca_icm *icm;
+ struct mthca_icm_chunk *chunk = NULL;
+ int cur_order;
+
+ icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!icm)
+ return icm;
+
+ icm->refcount = 0;
+ INIT_LIST_HEAD(&icm->chunk_list);
+
+ cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
+
+ while (npages > 0) {
+ if (!chunk) {
+ chunk = kmalloc(sizeof *chunk,
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!chunk)
+ goto fail;
+
+ chunk->npages = 0;
+ chunk->nsg = 0;
+ list_add_tail(&chunk->list, &icm->chunk_list);
+ }
+
+ while (1 << cur_order > npages)
+ --cur_order;
+
+ chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
+ if (chunk->mem[chunk->npages].page) {
+ chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
+ chunk->mem[chunk->npages].offset = 0;
+
+ if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+ chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (chunk->nsg <= 0)
+ goto fail;
+
+ chunk = NULL;
+ }
+
+ npages -= 1 << cur_order;
+ } else {
+ --cur_order;
+ if (cur_order < 0)
+ goto fail;
+ }
+ }
+
+ if (chunk) {
+ chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (chunk->nsg <= 0)
+ goto fail;
+ }
+
+ return icm;
+
+fail:
+ mthca_free_icm(dev, icm);
+ return NULL;
+}
+
+int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
+{
+ int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+ int ret = 0;
+ u8 status;
+
+ down(&table->mutex);
+
+ if (table->icm[i]) {
+ ++table->icm[i]->refcount;
+ goto out;
+ }
+
+ table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+ (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN);
+ if (!table->icm[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ &status) || status) {
+ mthca_free_icm(dev, table->icm[i]);
+ table->icm[i] = NULL;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ++table->icm[i]->refcount;
+
+out:
+ up(&table->mutex);
+ return ret;
+}
+
+void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
+{
+ int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+ u8 status;
+
+ down(&table->mutex);
+
+ if (--table->icm[i]->refcount == 0) {
+ mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ mthca_free_icm(dev, table->icm[i]);
+ table->icm[i] = NULL;
+ }
+
+ up(&table->mutex);
+}
+
+struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
+ u64 virt, int obj_size,
+ int nobj, int reserved,
+ int use_lowmem)
+{
+ struct mthca_icm_table *table;
+ int num_icm;
+ int i;
+ u8 status;
+
+ num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE;
+
+ table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ table->virt = virt;
+ table->num_icm = num_icm;
+ table->num_obj = nobj;
+ table->obj_size = obj_size;
+ table->lowmem = use_lowmem;
+ init_MUTEX(&table->mutex);
+
+ for (i = 0; i < num_icm; ++i)
+ table->icm[i] = NULL;
+
+ for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
+ table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+ (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN);
+ if (!table->icm[i])
+ goto err;
+ if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ &status) || status) {
+ mthca_free_icm(dev, table->icm[i]);
+ table->icm[i] = NULL;
+ goto err;
+ }
+
+ /*
+ * Add a reference to this ICM chunk so that it never
+ * gets freed (since it contains reserved firmware objects).
+ */
+ ++table->icm[i]->refcount;
+ }
+
+ return table;
+
+err:
+ for (i = 0; i < num_icm; ++i)
+ if (table->icm[i]) {
+ mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ mthca_free_icm(dev, table->icm[i]);
+ }
+
+ kfree(table);
+
+ return NULL;
+}
+
+void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
+{
+ int i;
+ u8 status;
+
+ for (i = 0; i < table->num_icm; ++i)
+ if (table->icm[i]) {
+ mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ mthca_free_icm(dev, table->icm[i]);
+ }
+
+ kfree(table);
+}
+
+static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+{
+ return dev->uar_table.uarc_base +
+ dev->driver_uar.index * dev->uar_table.uarc_size +
+ page * 4096;
+}
+
+int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
+{
+ int group;
+ int start, end, dir;
+ int i, j;
+ struct mthca_db_page *page;
+ int ret = 0;
+ u8 status;
+
+ down(&dev->db_tab->mutex);
+
+ switch (type) {
+ case MTHCA_DB_TYPE_CQ_ARM:
+ case MTHCA_DB_TYPE_SQ:
+ group = 0;
+ start = 0;
+ end = dev->db_tab->max_group1;
+ dir = 1;
+ break;
+
+ case MTHCA_DB_TYPE_CQ_SET_CI:
+ case MTHCA_DB_TYPE_RQ:
+ case MTHCA_DB_TYPE_SRQ:
+ group = 1;
+ start = dev->db_tab->npages - 1;
+ end = dev->db_tab->min_group2;
+ dir = -1;
+ break;
+
+ default:
+ return -1;
+ }
+
+ for (i = start; i != end; i += dir)
+ if (dev->db_tab->page[i].db_rec &&
+ !bitmap_full(dev->db_tab->page[i].used,
+ MTHCA_DB_REC_PER_PAGE)) {
+ page = dev->db_tab->page + i;
+ goto found;
+ }
+
+ if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ page = dev->db_tab->page + end;
+ page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
+ &page->mapping, GFP_KERNEL);
+ if (!page->db_rec) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memset(page->db_rec, 0, 4096);
+
+ ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ dma_free_coherent(&dev->pdev->dev, 4096,
+ page->db_rec, page->mapping);
+ goto out;
+ }
+
+ bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
+ if (group == 0)
+ ++dev->db_tab->max_group1;
+ else
+ --dev->db_tab->min_group2;
+
+found:
+ j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
+ set_bit(j, page->used);
+
+ if (group == 1)
+ j = MTHCA_DB_REC_PER_PAGE - 1 - j;
+
+ ret = i * MTHCA_DB_REC_PER_PAGE + j;
+
+ page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
+
+ *db = (u32 *) &page->db_rec[j];
+
+out:
+ up(&dev->db_tab->mutex);
+
+ return ret;
+}
+
+void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
+{
+ int i, j;
+ struct mthca_db_page *page;
+ u8 status;
+
+ i = db_index / MTHCA_DB_REC_PER_PAGE;
+ j = db_index % MTHCA_DB_REC_PER_PAGE;
+
+ page = dev->db_tab->page + i;
+
+ down(&dev->db_tab->mutex);
+
+ page->db_rec[j] = 0;
+ if (i >= dev->db_tab->min_group2)
+ j = MTHCA_DB_REC_PER_PAGE - 1 - j;
+ clear_bit(j, page->used);
+
+ if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
+ i >= dev->db_tab->max_group1 - 1) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+
+ dma_free_coherent(&dev->pdev->dev, 4096,
+ page->db_rec, page->mapping);
+ page->db_rec = NULL;
+
+ if (i == dev->db_tab->max_group1) {
+ --dev->db_tab->max_group1;
+ /* XXX may be able to unmap more pages now */
+ }
+ if (i == dev->db_tab->min_group2)
+ ++dev->db_tab->min_group2;
+ }
+
+ up(&dev->db_tab->mutex);
+}
+
+int mthca_init_db_tab(struct mthca_dev *dev)
+{
+ int i;
+
+ if (dev->hca_type != ARBEL_NATIVE)
+ return 0;
+
+ dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
+ if (!dev->db_tab)
+ return -ENOMEM;
+
+ init_MUTEX(&dev->db_tab->mutex);
+
+ dev->db_tab->npages = dev->uar_table.uarc_size / PAGE_SIZE;
+ dev->db_tab->max_group1 = 0;
+ dev->db_tab->min_group2 = dev->db_tab->npages - 1;
+
+ dev->db_tab->page = kmalloc(dev->db_tab->npages *
+ sizeof *dev->db_tab->page,
+ GFP_KERNEL);
+ if (!dev->db_tab->page) {
+ kfree(dev->db_tab);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < dev->db_tab->npages; ++i)
+ dev->db_tab->page[i].db_rec = NULL;
+
+ return 0;
+}
+
+void mthca_cleanup_db_tab(struct mthca_dev *dev)
+{
+ int i;
+ u8 status;
+
+ if (dev->hca_type != ARBEL_NATIVE)
+ return;
+
+ /*
+ * Because we don't always free our UARC pages when they
+ * become empty to make mthca_free_db() simpler we need to
+ * make a sweep through the doorbell pages and free any
+ * leftover pages now.
+ */
+ for (i = 0; i < dev->db_tab->npages; ++i) {
+ if (!dev->db_tab->page[i].db_rec)
+ continue;
+
+ if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
+ mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
+
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+
+ dma_free_coherent(&dev->pdev->dev, 4096,
+ dev->db_tab->page[i].db_rec,
+ dev->db_tab->page[i].mapping);
+ }
+
+ kfree(dev->db_tab->page);
+ kfree(dev->db_tab);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
new file mode 100644
index 000000000000..a8fa97e140f5
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef MTHCA_MEMFREE_H
+#define MTHCA_MEMFREE_H
+
+#include <linux/list.h>
+#include <linux/pci.h>
+
+#include <asm/semaphore.h>
+
+#define MTHCA_ICM_CHUNK_LEN \
+ ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
+ (sizeof (struct scatterlist)))
+
+struct mthca_icm_chunk {
+ struct list_head list;
+ int npages;
+ int nsg;
+ struct scatterlist mem[MTHCA_ICM_CHUNK_LEN];
+};
+
+struct mthca_icm {
+ struct list_head chunk_list;
+ int refcount;
+};
+
+struct mthca_icm_table {
+ u64 virt;
+ int num_icm;
+ int num_obj;
+ int obj_size;
+ int lowmem;
+ struct semaphore mutex;
+ struct mthca_icm *icm[0];
+};
+
+struct mthca_icm_iter {
+ struct mthca_icm *icm;
+ struct mthca_icm_chunk *chunk;
+ int page_idx;
+};
+
+struct mthca_dev;
+
+struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
+ unsigned int gfp_mask);
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
+
+struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
+ u64 virt, int obj_size,
+ int nobj, int reserved,
+ int use_lowmem);
+void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
+int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+
+static inline void mthca_icm_first(struct mthca_icm *icm,
+ struct mthca_icm_iter *iter)
+{
+ iter->icm = icm;
+ iter->chunk = list_empty(&icm->chunk_list) ?
+ NULL : list_entry(icm->chunk_list.next,
+ struct mthca_icm_chunk, list);
+ iter->page_idx = 0;
+}
+
+static inline int mthca_icm_last(struct mthca_icm_iter *iter)
+{
+ return !iter->chunk;
+}
+
+static inline void mthca_icm_next(struct mthca_icm_iter *iter)
+{
+ if (++iter->page_idx >= iter->chunk->nsg) {
+ if (iter->chunk->list.next == &iter->icm->chunk_list) {
+ iter->chunk = NULL;
+ return;
+ }
+
+ iter->chunk = list_entry(iter->chunk->list.next,
+ struct mthca_icm_chunk, list);
+ iter->page_idx = 0;
+ }
+}
+
+static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter)
+{
+ return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
+}
+
+static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
+{
+ return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
+}
+
+enum {
+ MTHCA_DB_REC_PER_PAGE = 4096 / 8
+};
+
+struct mthca_db_page {
+ DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
+ u64 *db_rec;
+ dma_addr_t mapping;
+};
+
+struct mthca_db_table {
+ int npages;
+ int max_group1;
+ int min_group2;
+ struct mthca_db_page *page;
+ struct semaphore mutex;
+};
+
+enum {
+ MTHCA_DB_TYPE_INVALID = 0x0,
+ MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
+ MTHCA_DB_TYPE_CQ_ARM = 0x2,
+ MTHCA_DB_TYPE_SQ = 0x3,
+ MTHCA_DB_TYPE_RQ = 0x4,
+ MTHCA_DB_TYPE_SRQ = 0x5,
+ MTHCA_DB_TYPE_GROUP_SEP = 0x7
+};
+
+int mthca_init_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_db_tab(struct mthca_dev *dev);
+int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
+void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
+
+#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
new file mode 100644
index 000000000000..80a0cd97881b
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+/*
+ * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
+ */
+struct mthca_mpt_entry {
+ u32 flags;
+ u32 page_size;
+ u32 key;
+ u32 pd;
+ u64 start;
+ u64 length;
+ u32 lkey;
+ u32 window_count;
+ u32 window_count_limit;
+ u64 mtt_seg;
+ u32 mtt_sz; /* Arbel only */
+ u32 reserved[2];
+} __attribute__((packed));
+
+#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28)
+#define MTHCA_MPT_FLAG_MIO (1 << 17)
+#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15)
+#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9)
+#define MTHCA_MPT_FLAG_REGION (1 << 8)
+
+#define MTHCA_MTT_FLAG_PRESENT 1
+
+/*
+ * Buddy allocator for MTT segments (currently not very efficient
+ * since it doesn't keep a free list and just searches linearly
+ * through the bitmaps)
+ */
+
+static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order)
+{
+ int o;
+ int m;
+ u32 seg;
+
+ spin_lock(&dev->mr_table.mpt_alloc.lock);
+
+ for (o = order; o <= dev->mr_table.max_mtt_order; ++o) {
+ m = 1 << (dev->mr_table.max_mtt_order - o);
+ seg = find_first_bit(dev->mr_table.mtt_buddy[o], m);
+ if (seg < m)
+ goto found;
+ }
+
+ spin_unlock(&dev->mr_table.mpt_alloc.lock);
+ return -1;
+
+ found:
+ clear_bit(seg, dev->mr_table.mtt_buddy[o]);
+
+ while (o > order) {
+ --o;
+ seg <<= 1;
+ set_bit(seg ^ 1, dev->mr_table.mtt_buddy[o]);
+ }
+
+ spin_unlock(&dev->mr_table.mpt_alloc.lock);
+
+ seg <<= order;
+
+ return seg;
+}
+
+static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
+{
+ seg >>= order;
+
+ spin_lock(&dev->mr_table.mpt_alloc.lock);
+
+ while (test_bit(seg ^ 1, dev->mr_table.mtt_buddy[order])) {
+ clear_bit(seg ^ 1, dev->mr_table.mtt_buddy[order]);
+ seg >>= 1;
+ ++order;
+ }
+
+ set_bit(seg, dev->mr_table.mtt_buddy[order]);
+
+ spin_unlock(&dev->mr_table.mpt_alloc.lock);
+}
+
+static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
+{
+ if (dev->hca_type == ARBEL_NATIVE)
+ return (ind >> 24) | (ind << 8);
+ else
+ return ind;
+}
+
+static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
+{
+ if (dev->hca_type == ARBEL_NATIVE)
+ return (key << 24) | (key >> 8);
+ else
+ return key;
+}
+
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr)
+{
+ void *mailbox;
+ struct mthca_mpt_entry *mpt_entry;
+ u32 key;
+ int err;
+ u8 status;
+
+ might_sleep();
+
+ mr->order = -1;
+ key = mthca_alloc(&dev->mr_table.mpt_alloc);
+ if (key == -1)
+ return -ENOMEM;
+ mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
+
+ mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox) {
+ mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ return -ENOMEM;
+ }
+ mpt_entry = MAILBOX_ALIGN(mailbox);
+
+ mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
+ MTHCA_MPT_FLAG_MIO |
+ MTHCA_MPT_FLAG_PHYSICAL |
+ MTHCA_MPT_FLAG_REGION |
+ access);
+ mpt_entry->page_size = 0;
+ mpt_entry->key = cpu_to_be32(key);
+ mpt_entry->pd = cpu_to_be32(pd);
+ mpt_entry->start = 0;
+ mpt_entry->length = ~0ULL;
+
+ memset(&mpt_entry->lkey, 0,
+ sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
+
+ err = mthca_SW2HW_MPT(dev, mpt_entry,
+ key & (dev->limits.num_mpts - 1),
+ &status);
+ if (err)
+ mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ else if (status) {
+ mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ }
+
+ kfree(mailbox);
+ return err;
+}
+
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr)
+{
+ void *mailbox;
+ u64 *mtt_entry;
+ struct mthca_mpt_entry *mpt_entry;
+ u32 key;
+ int err = -ENOMEM;
+ u8 status;
+ int i;
+
+ might_sleep();
+ WARN_ON(buffer_size_shift >= 32);
+
+ key = mthca_alloc(&dev->mr_table.mpt_alloc);
+ if (key == -1)
+ return -ENOMEM;
+ mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
+
+ for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
+ i < list_len;
+ i <<= 1, ++mr->order)
+ ; /* nothing */
+
+ mr->first_seg = mthca_alloc_mtt(dev, mr->order);
+ if (mr->first_seg == -1)
+ goto err_out_mpt_free;
+
+ /*
+ * If list_len is odd, we add one more dummy entry for
+ * firmware efficiency.
+ */
+ mailbox = kmalloc(max(sizeof *mpt_entry,
+ (size_t) 8 * (list_len + (list_len & 1) + 2)) +
+ MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox)
+ goto err_out_free_mtt;
+
+ mtt_entry = MAILBOX_ALIGN(mailbox);
+
+ mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
+ mr->first_seg * dev->limits.mtt_seg_size);
+ mtt_entry[1] = 0;
+ for (i = 0; i < list_len; ++i)
+ mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+ if (list_len & 1) {
+ mtt_entry[i + 2] = 0;
+ ++list_len;
+ }
+
+ if (0) {
+ mthca_dbg(dev, "Dumping MPT entry\n");
+ for (i = 0; i < list_len + 2; ++i)
+ printk(KERN_ERR "[%2d] %016llx\n",
+ i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
+ }
+
+ err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
+ if (err) {
+ mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
+ goto err_out_mailbox_free;
+ }
+ if (status) {
+ mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_mailbox_free;
+ }
+
+ mpt_entry = MAILBOX_ALIGN(mailbox);
+
+ mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
+ MTHCA_MPT_FLAG_MIO |
+ MTHCA_MPT_FLAG_REGION |
+ access);
+
+ mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
+ mpt_entry->key = cpu_to_be32(key);
+ mpt_entry->pd = cpu_to_be32(pd);
+ mpt_entry->start = cpu_to_be64(iova);
+ mpt_entry->length = cpu_to_be64(total_size);
+ memset(&mpt_entry->lkey, 0,
+ sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
+ mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base +
+ mr->first_seg * dev->limits.mtt_seg_size);
+
+ if (0) {
+ mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
+ for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
+ if (i % 4 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+ if ((i + 1) % 4 == 0)
+ printk("\n");
+ }
+ }
+
+ err = mthca_SW2HW_MPT(dev, mpt_entry,
+ key & (dev->limits.num_mpts - 1),
+ &status);
+ if (err)
+ mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ else if (status) {
+ mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ }
+
+ kfree(mailbox);
+ return err;
+
+ err_out_mailbox_free:
+ kfree(mailbox);
+
+ err_out_free_mtt:
+ mthca_free_mtt(dev, mr->first_seg, mr->order);
+
+ err_out_mpt_free:
+ mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ return err;
+}
+
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
+{
+ int err;
+ u8 status;
+
+ might_sleep();
+
+ err = mthca_HW2SW_MPT(dev, NULL,
+ key_to_hw_index(dev, mr->ibmr.lkey) &
+ (dev->limits.num_mpts - 1),
+ &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
+ else if (status)
+ mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
+ status);
+
+ if (mr->order >= 0)
+ mthca_free_mtt(dev, mr->first_seg, mr->order);
+
+ mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
+}
+
+int __devinit mthca_init_mr_table(struct mthca_dev *dev)
+{
+ int err;
+ int i, s;
+
+ err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
+ dev->limits.num_mpts,
+ ~0, dev->limits.reserved_mrws);
+ if (err)
+ return err;
+
+ err = -ENOMEM;
+
+ for (i = 1, dev->mr_table.max_mtt_order = 0;
+ i < dev->limits.num_mtt_segs;
+ i <<= 1, ++dev->mr_table.max_mtt_order)
+ ; /* nothing */
+
+ dev->mr_table.mtt_buddy = kmalloc((dev->mr_table.max_mtt_order + 1) *
+ sizeof (long *),
+ GFP_KERNEL);
+ if (!dev->mr_table.mtt_buddy)
+ goto err_out;
+
+ for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
+ dev->mr_table.mtt_buddy[i] = NULL;
+
+ for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) {
+ s = BITS_TO_LONGS(1 << (dev->mr_table.max_mtt_order - i));
+ dev->mr_table.mtt_buddy[i] = kmalloc(s * sizeof (long),
+ GFP_KERNEL);
+ if (!dev->mr_table.mtt_buddy[i])
+ goto err_out_free;
+ bitmap_zero(dev->mr_table.mtt_buddy[i],
+ 1 << (dev->mr_table.max_mtt_order - i));
+ }
+
+ set_bit(0, dev->mr_table.mtt_buddy[dev->mr_table.max_mtt_order]);
+
+ for (i = 0; i < dev->mr_table.max_mtt_order; ++i)
+ if (1 << i >= dev->limits.reserved_mtts)
+ break;
+
+ if (i == dev->mr_table.max_mtt_order) {
+ mthca_err(dev, "MTT table of order %d is "
+ "too small.\n", i);
+ goto err_out_free;
+ }
+
+ (void) mthca_alloc_mtt(dev, i);
+
+ return 0;
+
+ err_out_free:
+ for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
+ kfree(dev->mr_table.mtt_buddy[i]);
+
+ err_out:
+ mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
+
+ return err;
+}
+
+void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
+{
+ int i;
+
+ /* XXX check if any MRs are still allocated? */
+ for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
+ kfree(dev->mr_table.mtt_buddy[i]);
+ kfree(dev->mr_table.mtt_buddy);
+ mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
new file mode 100644
index 000000000000..ea66847e4ea3
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "mthca_dev.h"
+
+int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
+{
+ int err;
+
+ might_sleep();
+
+ atomic_set(&pd->sqp_count, 0);
+ pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
+ if (pd->pd_num == -1)
+ return -ENOMEM;
+
+ err = mthca_mr_alloc_notrans(dev, pd->pd_num,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ MTHCA_MPT_FLAG_LOCAL_WRITE,
+ &pd->ntmr);
+ if (err)
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+
+ return err;
+}
+
+void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
+{
+ might_sleep();
+ mthca_free_mr(dev, &pd->ntmr);
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+}
+
+int __devinit mthca_init_pd_table(struct mthca_dev *dev)
+{
+ return mthca_alloc_init(&dev->pd_table.alloc,
+ dev->limits.num_pds,
+ (1 << 24) - 1,
+ dev->limits.reserved_pds);
+}
+
+void __devexit mthca_cleanup_pd_table(struct mthca_dev *dev)
+{
+ /* XXX check if any PDs are still allocated? */
+ mthca_alloc_cleanup(&dev->pd_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
new file mode 100644
index 000000000000..7881a8a919ca
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include "mthca_profile.h"
+
+enum {
+ MTHCA_RES_QP,
+ MTHCA_RES_EEC,
+ MTHCA_RES_SRQ,
+ MTHCA_RES_CQ,
+ MTHCA_RES_EQP,
+ MTHCA_RES_EEEC,
+ MTHCA_RES_EQ,
+ MTHCA_RES_RDB,
+ MTHCA_RES_MCG,
+ MTHCA_RES_MPT,
+ MTHCA_RES_MTT,
+ MTHCA_RES_UAR,
+ MTHCA_RES_UDAV,
+ MTHCA_RES_UARC,
+ MTHCA_RES_NUM
+};
+
+enum {
+ MTHCA_NUM_EQS = 32,
+ MTHCA_NUM_PDS = 1 << 15
+};
+
+u64 mthca_make_profile(struct mthca_dev *dev,
+ struct mthca_profile *request,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca)
+{
+ struct mthca_resource {
+ u64 size;
+ u64 start;
+ int type;
+ int num;
+ int log_num;
+ };
+
+ u64 mem_base, mem_avail;
+ u64 total_size = 0;
+ struct mthca_resource *profile;
+ struct mthca_resource tmp;
+ int i, j;
+
+ profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
+ if (!profile)
+ return -ENOMEM;
+
+ memset(profile, 0, MTHCA_RES_NUM * sizeof *profile);
+
+ profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz;
+ profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz;
+ profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz;
+ profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz;
+ profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz;
+ profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz;
+ profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz;
+ profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
+ profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
+ profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
+ profile[MTHCA_RES_MTT].size = dev_lim->mtt_seg_sz;
+ profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
+ profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
+ profile[MTHCA_RES_UARC].size = request->uarc_size;
+
+ profile[MTHCA_RES_QP].num = request->num_qp;
+ profile[MTHCA_RES_EQP].num = request->num_qp;
+ profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp;
+ profile[MTHCA_RES_CQ].num = request->num_cq;
+ profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS;
+ profile[MTHCA_RES_MCG].num = request->num_mcg;
+ profile[MTHCA_RES_MPT].num = request->num_mpt;
+ profile[MTHCA_RES_MTT].num = request->num_mtt;
+ profile[MTHCA_RES_UAR].num = request->num_uar;
+ profile[MTHCA_RES_UARC].num = request->num_uar;
+ profile[MTHCA_RES_UDAV].num = request->num_udav;
+
+ for (i = 0; i < MTHCA_RES_NUM; ++i) {
+ profile[i].type = i;
+ profile[i].log_num = max(ffs(profile[i].num) - 1, 0);
+ profile[i].size *= profile[i].num;
+ if (dev->hca_type == ARBEL_NATIVE)
+ profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
+ }
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ mem_base = 0;
+ mem_avail = dev_lim->hca.arbel.max_icm_sz;
+ } else {
+ mem_base = dev->ddr_start;
+ mem_avail = dev->fw.tavor.fw_start - dev->ddr_start;
+ }
+
+ /*
+ * Sort the resources in decreasing order of size. Since they
+ * all have sizes that are powers of 2, we'll be able to keep
+ * resources aligned to their size and pack them without gaps
+ * using the sorted order.
+ */
+ for (i = MTHCA_RES_NUM; i > 0; --i)
+ for (j = 1; j < i; ++j) {
+ if (profile[j].size > profile[j - 1].size) {
+ tmp = profile[j];
+ profile[j] = profile[j - 1];
+ profile[j - 1] = tmp;
+ }
+ }
+
+ for (i = 0; i < MTHCA_RES_NUM; ++i) {
+ if (profile[i].size) {
+ profile[i].start = mem_base + total_size;
+ total_size += profile[i].size;
+ }
+ if (total_size > mem_avail) {
+ mthca_err(dev, "Profile requires 0x%llx bytes; "
+ "won't in 0x%llx bytes of context memory.\n",
+ (unsigned long long) total_size,
+ (unsigned long long) mem_avail);
+ kfree(profile);
+ return -ENOMEM;
+ }
+
+ if (profile[i].size)
+ mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx "
+ "(size 0x%8llx)\n",
+ i, profile[i].type, profile[i].log_num,
+ (unsigned long long) profile[i].start,
+ (unsigned long long) profile[i].size);
+ }
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_dbg(dev, "HCA context memory: reserving %d KB\n",
+ (int) (total_size >> 10));
+ else
+ mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n",
+ (int) (total_size >> 10), (int) (mem_avail >> 10),
+ (int) ((mem_avail - total_size) >> 10));
+
+ for (i = 0; i < MTHCA_RES_NUM; ++i) {
+ switch (profile[i].type) {
+ case MTHCA_RES_QP:
+ dev->limits.num_qps = profile[i].num;
+ init_hca->qpc_base = profile[i].start;
+ init_hca->log_num_qps = profile[i].log_num;
+ break;
+ case MTHCA_RES_EEC:
+ dev->limits.num_eecs = profile[i].num;
+ init_hca->eec_base = profile[i].start;
+ init_hca->log_num_eecs = profile[i].log_num;
+ break;
+ case MTHCA_RES_SRQ:
+ dev->limits.num_srqs = profile[i].num;
+ init_hca->srqc_base = profile[i].start;
+ init_hca->log_num_srqs = profile[i].log_num;
+ break;
+ case MTHCA_RES_CQ:
+ dev->limits.num_cqs = profile[i].num;
+ init_hca->cqc_base = profile[i].start;
+ init_hca->log_num_cqs = profile[i].log_num;
+ break;
+ case MTHCA_RES_EQP:
+ init_hca->eqpc_base = profile[i].start;
+ break;
+ case MTHCA_RES_EEEC:
+ init_hca->eeec_base = profile[i].start;
+ break;
+ case MTHCA_RES_EQ:
+ dev->limits.num_eqs = profile[i].num;
+ init_hca->eqc_base = profile[i].start;
+ init_hca->log_num_eqs = profile[i].log_num;
+ break;
+ case MTHCA_RES_RDB:
+ for (dev->qp_table.rdb_shift = 0;
+ profile[MTHCA_RES_QP].num << dev->qp_table.rdb_shift <
+ profile[i].num;
+ ++dev->qp_table.rdb_shift)
+ ; /* nothing */
+ dev->qp_table.rdb_base = (u32) profile[i].start;
+ init_hca->rdb_base = profile[i].start;
+ break;
+ case MTHCA_RES_MCG:
+ dev->limits.num_mgms = profile[i].num >> 1;
+ dev->limits.num_amgms = profile[i].num >> 1;
+ init_hca->mc_base = profile[i].start;
+ init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1;
+ init_hca->log_mc_table_sz = profile[i].log_num;
+ init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
+ break;
+ case MTHCA_RES_MPT:
+ dev->limits.num_mpts = profile[i].num;
+ init_hca->mpt_base = profile[i].start;
+ init_hca->log_mpt_sz = profile[i].log_num;
+ break;
+ case MTHCA_RES_MTT:
+ dev->limits.num_mtt_segs = profile[i].num;
+ dev->limits.mtt_seg_size = dev_lim->mtt_seg_sz;
+ dev->mr_table.mtt_base = profile[i].start;
+ init_hca->mtt_base = profile[i].start;
+ init_hca->mtt_seg_sz = ffs(dev_lim->mtt_seg_sz) - 7;
+ break;
+ case MTHCA_RES_UAR:
+ dev->limits.num_uars = profile[i].num;
+ init_hca->uar_scratch_base = profile[i].start;
+ break;
+ case MTHCA_RES_UDAV:
+ dev->av_table.ddr_av_base = profile[i].start;
+ dev->av_table.num_ddr_avs = profile[i].num;
+ break;
+ case MTHCA_RES_UARC:
+ dev->uar_table.uarc_size = request->uarc_size;
+ dev->uar_table.uarc_base = profile[i].start;
+ init_hca->uarc_base = profile[i].start;
+ init_hca->log_uarc_sz = ffs(request->uarc_size) - 13;
+ init_hca->log_uar_sz = ffs(request->num_uar) - 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * PDs don't take any HCA memory, but we assign them as part
+ * of the HCA profile anyway.
+ */
+ dev->limits.num_pds = MTHCA_NUM_PDS;
+
+ kfree(profile);
+ return total_size;
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
new file mode 100644
index 000000000000..daaf7999486c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_PROFILE_H
+#define MTHCA_PROFILE_H
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+struct mthca_profile {
+ int num_qp;
+ int rdb_per_qp;
+ int num_cq;
+ int num_mcg;
+ int num_mpt;
+ int num_mtt;
+ int num_udav;
+ int num_uar;
+ int uarc_size;
+};
+
+u64 mthca_make_profile(struct mthca_dev *mdev,
+ struct mthca_profile *request,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca);
+
+#endif /* MTHCA_PROFILE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
new file mode 100644
index 000000000000..bbf74cf43343
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
+ */
+
+#include <ib_smi.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+static int mthca_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ struct mthca_dev* mdev = to_mdev(ibdev);
+
+ u8 status;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ props->fw_ver = mdev->fw_ver;
+
+ memset(in_mad, 0, sizeof *in_mad);
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mthca_MAD_IFC(mdev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ props->device_cap_flags = mdev->device_cap_flags;
+ props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) &
+ 0xffffff;
+ props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30));
+ props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32));
+ memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
+ memcpy(&props->node_guid, out_mad->data + 12, 8);
+
+ err = 0;
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mthca_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(in_mad, 0, sizeof *in_mad);
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ props->lid = be16_to_cpup((u16 *) (out_mad->data + 16));
+ props->lmc = out_mad->data[34] & 0x7;
+ props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18));
+ props->sm_sl = out_mad->data[36] & 0xf;
+ props->state = out_mad->data[32] & 0xf;
+ props->phys_state = out_mad->data[33] >> 4;
+ props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20));
+ props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
+ props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
+ props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mthca_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ struct mthca_set_ib_param set_ib;
+ struct ib_port_attr attr;
+ int err;
+ u8 status;
+
+ if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ return -ERESTARTSYS;
+
+ err = mthca_query_port(ibdev, port, &attr);
+ if (err)
+ goto out;
+
+ set_ib.set_si_guid = 0;
+ set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
+
+ set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
+ ~props->clr_port_cap_mask;
+
+ err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ up(&to_mdev(ibdev)->cap_mask_mutex);
+ return err;
+}
+
+static int mthca_query_pkey(struct ib_device *ibdev,
+ u8 port, u16 index, u16 *pkey)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(in_mad, 0, sizeof *in_mad);
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]);
+
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mthca_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(in_mad, 0, sizeof *in_mad);
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(gid->raw, out_mad->data + 8, 8);
+
+ memset(in_mad, 0, sizeof *in_mad);
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(index / 8);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
+
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
+{
+ struct mthca_pd *pd;
+ int err;
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_pd_alloc(to_mdev(ibdev), pd);
+ if (err) {
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+
+ return &pd->ibpd;
+}
+
+static int mthca_dealloc_pd(struct ib_pd *pd)
+{
+ mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
+ kfree(pd);
+
+ return 0;
+}
+
+static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ int err;
+ struct mthca_ah *ah;
+
+ ah = kmalloc(sizeof *ah, GFP_KERNEL);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
+ if (err) {
+ kfree(ah);
+ return ERR_PTR(err);
+ }
+
+ return &ah->ibah;
+}
+
+static int mthca_ah_destroy(struct ib_ah *ah)
+{
+ mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
+ kfree(ah);
+
+ return 0;
+}
+
+static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct mthca_qp *qp;
+ int err;
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ {
+ qp = kmalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->sq.max = init_attr->cap.max_send_wr;
+ qp->rq.max = init_attr->cap.max_recv_wr;
+ qp->sq.max_gs = init_attr->cap.max_send_sge;
+ qp->rq.max_gs = init_attr->cap.max_recv_sge;
+
+ err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
+ to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq),
+ init_attr->qp_type, init_attr->sq_sig_type,
+ qp);
+ qp->ibqp.qp_num = qp->qpn;
+ break;
+ }
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ {
+ qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->sq.max = init_attr->cap.max_send_wr;
+ qp->rq.max = init_attr->cap.max_recv_wr;
+ qp->sq.max_gs = init_attr->cap.max_send_sge;
+ qp->rq.max_gs = init_attr->cap.max_recv_sge;
+
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
+
+ err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
+ to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq),
+ init_attr->sq_sig_type,
+ qp->ibqp.qp_num, init_attr->port_num,
+ to_msqp(qp));
+ break;
+ }
+ default:
+ /* Don't support raw QPs */
+ return ERR_PTR(-ENOSYS);
+ }
+
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ init_attr->cap.max_inline_data = 0;
+
+ return &qp->ibqp;
+}
+
+static int mthca_destroy_qp(struct ib_qp *qp)
+{
+ mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
+ kfree(qp);
+ return 0;
+}
+
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
+{
+ struct mthca_cq *cq;
+ int nent;
+ int err;
+
+ cq = kmalloc(sizeof *cq, GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ for (nent = 1; nent <= entries; nent <<= 1)
+ ; /* nothing */
+
+ err = mthca_init_cq(to_mdev(ibdev), nent, cq);
+ if (err) {
+ kfree(cq);
+ cq = ERR_PTR(err);
+ }
+
+ return &cq->ibcq;
+}
+
+static int mthca_destroy_cq(struct ib_cq *cq)
+{
+ mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
+ kfree(cq);
+
+ return 0;
+}
+
+static inline u32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) |
+ MTHCA_MPT_FLAG_LOCAL_READ;
+}
+
+static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct mthca_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_mr_alloc_notrans(to_mdev(pd->device),
+ to_mpd(pd)->pd_num,
+ convert_access(acc), mr);
+
+ if (err) {
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+
+ return &mr->ibmr;
+}
+
+static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start)
+{
+ struct mthca_mr *mr;
+ u64 *page_list;
+ u64 total_size;
+ u64 mask;
+ int shift;
+ int npages;
+ int err;
+ int i, j, n;
+
+ /* First check that we have enough alignment */
+ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
+ return ERR_PTR(-EINVAL);
+
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
+ return ERR_PTR(-EINVAL);
+
+ mask = 0;
+ total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (buffer_list[i].addr & ~PAGE_MASK)
+ return ERR_PTR(-EINVAL);
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & ~PAGE_MASK))
+ return ERR_PTR(-EINVAL);
+
+ total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ }
+
+ /* Find largest page shift we can use to cover buffers */
+ for (shift = PAGE_SHIFT; shift < 31; ++shift)
+ if (num_phys_buf > 1) {
+ if ((1ULL << shift) & mask)
+ break;
+ } else {
+ if (1ULL << shift >=
+ buffer_list[0].size +
+ (buffer_list[0].addr & ((1ULL << shift) - 1)))
+ break;
+ }
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
+ buffer_list[0].addr &= ~0ull << shift;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
+
+ if (!npages)
+ return &mr->ibmr;
+
+ page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
+ if (!page_list) {
+ kfree(mr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
+ ++j)
+ page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
+
+ mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
+ "in PD %x; shift %d, npages %d.\n",
+ (unsigned long long) buffer_list[0].addr,
+ (unsigned long long) *iova_start,
+ to_mpd(pd)->pd_num,
+ shift, npages);
+
+ err = mthca_mr_alloc_phys(to_mdev(pd->device),
+ to_mpd(pd)->pd_num,
+ page_list, shift, npages,
+ *iova_start, total_size,
+ convert_access(acc), mr);
+
+ if (err) {
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+
+ kfree(page_list);
+ return &mr->ibmr;
+}
+
+static int mthca_dereg_mr(struct ib_mr *mr)
+{
+ mthca_free_mr(to_mdev(mr->device), to_mmr(mr));
+ kfree(mr);
+ return 0;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+ struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
+ return sprintf(buf, "%x\n", dev->rev_id);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+ struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
+ return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
+ (int) (dev->fw_ver >> 16) & 0xffff,
+ (int) dev->fw_ver & 0xffff);
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+ struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
+ switch (dev->hca_type) {
+ case TAVOR: return sprintf(buf, "MT23108\n");
+ case ARBEL_COMPAT: return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
+ case ARBEL_NATIVE: return sprintf(buf, "MT25208\n");
+ default: return sprintf(buf, "unknown\n");
+ }
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+
+static struct class_device_attribute *mthca_class_attributes[] = {
+ &class_device_attr_hw_rev,
+ &class_device_attr_fw_ver,
+ &class_device_attr_hca_type
+};
+
+int mthca_register_device(struct mthca_dev *dev)
+{
+ int ret;
+ int i;
+
+ strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.node_type = IB_NODE_CA;
+ dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
+ dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.class_dev.dev = &dev->pdev->dev;
+ dev->ib_dev.query_device = mthca_query_device;
+ dev->ib_dev.query_port = mthca_query_port;
+ dev->ib_dev.modify_port = mthca_modify_port;
+ dev->ib_dev.query_pkey = mthca_query_pkey;
+ dev->ib_dev.query_gid = mthca_query_gid;
+ dev->ib_dev.alloc_pd = mthca_alloc_pd;
+ dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
+ dev->ib_dev.create_ah = mthca_ah_create;
+ dev->ib_dev.destroy_ah = mthca_ah_destroy;
+ dev->ib_dev.create_qp = mthca_create_qp;
+ dev->ib_dev.modify_qp = mthca_modify_qp;
+ dev->ib_dev.destroy_qp = mthca_destroy_qp;
+ dev->ib_dev.create_cq = mthca_create_cq;
+ dev->ib_dev.destroy_cq = mthca_destroy_cq;
+ dev->ib_dev.poll_cq = mthca_poll_cq;
+ dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
+ dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.dereg_mr = mthca_dereg_mr;
+ dev->ib_dev.attach_mcast = mthca_multicast_attach;
+ dev->ib_dev.detach_mcast = mthca_multicast_detach;
+ dev->ib_dev.process_mad = mthca_process_mad;
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
+ dev->ib_dev.post_send = mthca_arbel_post_send;
+ dev->ib_dev.post_recv = mthca_arbel_post_receive;
+ } else {
+ dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
+ dev->ib_dev.post_send = mthca_tavor_post_send;
+ dev->ib_dev.post_recv = mthca_tavor_post_receive;
+ }
+
+ init_MUTEX(&dev->cap_mask_mutex);
+
+ ret = ib_register_device(&dev->ib_dev);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
+ ret = class_device_create_file(&dev->ib_dev.class_dev,
+ mthca_class_attributes[i]);
+ if (ret) {
+ ib_unregister_device(&dev->ib_dev);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+void mthca_unregister_device(struct mthca_dev *dev)
+{
+ ib_unregister_device(&dev->ib_dev);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
new file mode 100644
index 000000000000..0598f3905d9a
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_PROVIDER_H
+#define MTHCA_PROVIDER_H
+
+#include <ib_verbs.h>
+#include <ib_pack.h>
+
+#define MTHCA_MPT_FLAG_ATOMIC (1 << 14)
+#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13)
+#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12)
+#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11)
+#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10)
+
+struct mthca_buf_list {
+ void *buf;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+struct mthca_uar {
+ unsigned long pfn;
+ int index;
+};
+
+struct mthca_mr {
+ struct ib_mr ibmr;
+ int order;
+ u32 first_seg;
+};
+
+struct mthca_pd {
+ struct ib_pd ibpd;
+ u32 pd_num;
+ atomic_t sqp_count;
+ struct mthca_mr ntmr;
+};
+
+struct mthca_eq {
+ struct mthca_dev *dev;
+ int eqn;
+ u32 eqn_mask;
+ u32 cons_index;
+ u16 msi_x_vector;
+ u16 msi_x_entry;
+ int have_irq;
+ int nent;
+ struct mthca_buf_list *page_list;
+ struct mthca_mr mr;
+};
+
+struct mthca_av;
+
+enum mthca_ah_type {
+ MTHCA_AH_ON_HCA,
+ MTHCA_AH_PCI_POOL,
+ MTHCA_AH_KMALLOC
+};
+
+struct mthca_ah {
+ struct ib_ah ibah;
+ enum mthca_ah_type type;
+ u32 key;
+ struct mthca_av *av;
+ dma_addr_t avdma;
+};
+
+/*
+ * Quick description of our CQ/QP locking scheme:
+ *
+ * We have one global lock that protects dev->cq/qp_table. Each
+ * struct mthca_cq/qp also has its own lock. An individual qp lock
+ * may be taken inside of an individual cq lock. Both cqs attached to
+ * a qp may be locked, with the send cq locked first. No other
+ * nesting should be done.
+ *
+ * Each struct mthca_cq/qp also has an atomic_t ref count. The
+ * pointer from the cq/qp_table to the struct counts as one reference.
+ * This reference also is good for access through the consumer API, so
+ * modifying the CQ/QP etc doesn't need to take another reference.
+ * Access because of a completion being polled does need a reference.
+ *
+ * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
+ * destroy function to sleep on.
+ *
+ * This means that access from the consumer API requires nothing but
+ * taking the struct's lock.
+ *
+ * Access because of a completion event should go as follows:
+ * - lock cq/qp_table and look up struct
+ * - increment ref count in struct
+ * - drop cq/qp_table lock
+ * - lock struct, do your thing, and unlock struct
+ * - decrement ref count; if zero, wake up waiters
+ *
+ * To destroy a CQ/QP, we can do the following:
+ * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
+ * - decrement ref count
+ * - wait_event until ref count is zero
+ *
+ * It is the consumer's responsibilty to make sure that no QP
+ * operations (WQE posting or state modification) are pending when the
+ * QP is destroyed. Also, the consumer must make sure that calls to
+ * qp_modify are serialized.
+ *
+ * Possible optimizations (wait for profile data to see if/where we
+ * have locks bouncing between CPUs):
+ * - split cq/qp table lock into n separate (cache-aligned) locks,
+ * indexed (say) by the page in the table
+ * - split QP struct lock into three (one for common info, one for the
+ * send queue and one for the receive queue)
+ */
+
+struct mthca_cq {
+ struct ib_cq ibcq;
+ spinlock_t lock;
+ atomic_t refcount;
+ int cqn;
+ u32 cons_index;
+ int is_direct;
+
+ /* Next fields are Arbel only */
+ int set_ci_db_index;
+ u32 *set_ci_db;
+ int arm_db_index;
+ u32 *arm_db;
+ int arm_sn;
+
+ union {
+ struct mthca_buf_list direct;
+ struct mthca_buf_list *page_list;
+ } queue;
+ struct mthca_mr mr;
+ wait_queue_head_t wait;
+};
+
+struct mthca_wq {
+ spinlock_t lock;
+ int max;
+ unsigned next_ind;
+ unsigned last_comp;
+ unsigned head;
+ unsigned tail;
+ void *last;
+ int max_gs;
+ int wqe_shift;
+
+ int db_index; /* Arbel only */
+ u32 *db;
+};
+
+struct mthca_qp {
+ struct ib_qp ibqp;
+ atomic_t refcount;
+ u32 qpn;
+ int is_direct;
+ u8 transport;
+ u8 state;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+
+ struct mthca_mr mr;
+
+ struct mthca_wq rq;
+ struct mthca_wq sq;
+ enum ib_sig_type sq_policy;
+ int send_wqe_offset;
+
+ u64 *wrid;
+ union {
+ struct mthca_buf_list direct;
+ struct mthca_buf_list *page_list;
+ } queue;
+
+ wait_queue_head_t wait;
+};
+
+struct mthca_sqp {
+ struct mthca_qp qp;
+ int port;
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ int header_buf_size;
+ void *header_buf;
+ dma_addr_t header_dma;
+};
+
+static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct mthca_mr, ibmr);
+}
+
+static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct mthca_pd, ibpd);
+}
+
+static inline struct mthca_ah *to_mah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct mthca_ah, ibah);
+}
+
+static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct mthca_cq, ibcq);
+}
+
+static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct mthca_qp, ibqp);
+}
+
+static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
+{
+ return container_of(qp, struct mthca_sqp, qp);
+}
+
+#endif /* MTHCA_PROVIDER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
new file mode 100644
index 000000000000..7e4bbbd31f07
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -0,0 +1,2056 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
+ */
+
+#include <linux/init.h>
+
+#include <ib_verbs.h>
+#include <ib_cache.h>
+#include <ib_pack.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+
+enum {
+ MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
+ MTHCA_ACK_REQ_FREQ = 10,
+ MTHCA_FLIGHT_LIMIT = 9,
+ MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */
+};
+
+enum {
+ MTHCA_QP_STATE_RST = 0,
+ MTHCA_QP_STATE_INIT = 1,
+ MTHCA_QP_STATE_RTR = 2,
+ MTHCA_QP_STATE_RTS = 3,
+ MTHCA_QP_STATE_SQE = 4,
+ MTHCA_QP_STATE_SQD = 5,
+ MTHCA_QP_STATE_ERR = 6,
+ MTHCA_QP_STATE_DRAINING = 7
+};
+
+enum {
+ MTHCA_QP_ST_RC = 0x0,
+ MTHCA_QP_ST_UC = 0x1,
+ MTHCA_QP_ST_RD = 0x2,
+ MTHCA_QP_ST_UD = 0x3,
+ MTHCA_QP_ST_MLX = 0x7
+};
+
+enum {
+ MTHCA_QP_PM_MIGRATED = 0x3,
+ MTHCA_QP_PM_ARMED = 0x0,
+ MTHCA_QP_PM_REARM = 0x1
+};
+
+enum {
+ /* qp_context flags */
+ MTHCA_QP_BIT_DE = 1 << 8,
+ /* params1 */
+ MTHCA_QP_BIT_SRE = 1 << 15,
+ MTHCA_QP_BIT_SWE = 1 << 14,
+ MTHCA_QP_BIT_SAE = 1 << 13,
+ MTHCA_QP_BIT_SIC = 1 << 4,
+ MTHCA_QP_BIT_SSC = 1 << 3,
+ /* params2 */
+ MTHCA_QP_BIT_RRE = 1 << 15,
+ MTHCA_QP_BIT_RWE = 1 << 14,
+ MTHCA_QP_BIT_RAE = 1 << 13,
+ MTHCA_QP_BIT_RIC = 1 << 4,
+ MTHCA_QP_BIT_RSC = 1 << 3
+};
+
+struct mthca_qp_path {
+ u32 port_pkey;
+ u8 rnr_retry;
+ u8 g_mylmc;
+ u16 rlid;
+ u8 ackto;
+ u8 mgid_index;
+ u8 static_rate;
+ u8 hop_limit;
+ u32 sl_tclass_flowlabel;
+ u8 rgid[16];
+} __attribute__((packed));
+
+struct mthca_qp_context {
+ u32 flags;
+ u32 tavor_sched_queue; /* Reserved on Arbel */
+ u8 mtu_msgmax;
+ u8 rq_size_stride; /* Reserved on Tavor */
+ u8 sq_size_stride; /* Reserved on Tavor */
+ u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */
+ u32 usr_page;
+ u32 local_qpn;
+ u32 remote_qpn;
+ u32 reserved1[2];
+ struct mthca_qp_path pri_path;
+ struct mthca_qp_path alt_path;
+ u32 rdd;
+ u32 pd;
+ u32 wqe_base;
+ u32 wqe_lkey;
+ u32 params1;
+ u32 reserved2;
+ u32 next_send_psn;
+ u32 cqn_snd;
+ u32 snd_wqe_base_l; /* Next send WQE on Tavor */
+ u32 snd_db_index; /* (debugging only entries) */
+ u32 last_acked_psn;
+ u32 ssn;
+ u32 params2;
+ u32 rnr_nextrecvpsn;
+ u32 ra_buff_indx;
+ u32 cqn_rcv;
+ u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */
+ u32 rcv_db_index; /* (debugging only entries) */
+ u32 qkey;
+ u32 srqn;
+ u32 rmsn;
+ u16 rq_wqe_counter; /* reserved on Tavor */
+ u16 sq_wqe_counter; /* reserved on Tavor */
+ u32 reserved3[18];
+} __attribute__((packed));
+
+struct mthca_qp_param {
+ u32 opt_param_mask;
+ u32 reserved1;
+ struct mthca_qp_context context;
+ u32 reserved2[62];
+} __attribute__((packed));
+
+enum {
+ MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
+ MTHCA_QP_OPTPAR_RRE = 1 << 1,
+ MTHCA_QP_OPTPAR_RAE = 1 << 2,
+ MTHCA_QP_OPTPAR_RWE = 1 << 3,
+ MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4,
+ MTHCA_QP_OPTPAR_Q_KEY = 1 << 5,
+ MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6,
+ MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
+ MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8,
+ MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9,
+ MTHCA_QP_OPTPAR_PM_STATE = 1 << 10,
+ MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11,
+ MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12,
+ MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13,
+ MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
+ MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15,
+ MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16
+};
+
+enum {
+ MTHCA_OPCODE_NOP = 0x00,
+ MTHCA_OPCODE_RDMA_WRITE = 0x08,
+ MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
+ MTHCA_OPCODE_SEND = 0x0a,
+ MTHCA_OPCODE_SEND_IMM = 0x0b,
+ MTHCA_OPCODE_RDMA_READ = 0x10,
+ MTHCA_OPCODE_ATOMIC_CS = 0x11,
+ MTHCA_OPCODE_ATOMIC_FA = 0x12,
+ MTHCA_OPCODE_BIND_MW = 0x18,
+ MTHCA_OPCODE_INVALID = 0xff
+};
+
+enum {
+ MTHCA_NEXT_DBD = 1 << 7,
+ MTHCA_NEXT_FENCE = 1 << 6,
+ MTHCA_NEXT_CQ_UPDATE = 1 << 3,
+ MTHCA_NEXT_EVENT_GEN = 1 << 2,
+ MTHCA_NEXT_SOLICIT = 1 << 1,
+
+ MTHCA_MLX_VL15 = 1 << 17,
+ MTHCA_MLX_SLR = 1 << 16
+};
+
+struct mthca_next_seg {
+ u32 nda_op; /* [31:6] next WQE [4:0] next opcode */
+ u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
+ u32 flags; /* [3] CQ [2] Event [1] Solicit */
+ u32 imm; /* immediate data */
+};
+
+struct mthca_tavor_ud_seg {
+ u32 reserved1;
+ u32 lkey;
+ u64 av_addr;
+ u32 reserved2[4];
+ u32 dqpn;
+ u32 qkey;
+ u32 reserved3[2];
+};
+
+struct mthca_arbel_ud_seg {
+ u32 av[8];
+ u32 dqpn;
+ u32 qkey;
+ u32 reserved[2];
+};
+
+struct mthca_bind_seg {
+ u32 flags; /* [31] Atomic [30] rem write [29] rem read */
+ u32 reserved;
+ u32 new_rkey;
+ u32 lkey;
+ u64 addr;
+ u64 length;
+};
+
+struct mthca_raddr_seg {
+ u64 raddr;
+ u32 rkey;
+ u32 reserved;
+};
+
+struct mthca_atomic_seg {
+ u64 swap_add;
+ u64 compare;
+};
+
+struct mthca_data_seg {
+ u32 byte_count;
+ u32 lkey;
+ u64 addr;
+};
+
+struct mthca_mlx_seg {
+ u32 nda_op;
+ u32 nds;
+ u32 flags; /* [17] VL15 [16] SLR [14:12] static rate
+ [11:8] SL [3] C [2] E */
+ u16 rlid;
+ u16 vcrc;
+};
+
+static const u8 mthca_opcode[] = {
+ [IB_WR_SEND] = MTHCA_OPCODE_SEND,
+ [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,
+ [IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM,
+ [IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
+};
+
+static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ return qp->qpn >= dev->qp_table.sqp_start &&
+ qp->qpn <= dev->qp_table.sqp_start + 3;
+}
+
+static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ return qp->qpn >= dev->qp_table.sqp_start &&
+ qp->qpn <= dev->qp_table.sqp_start + 1;
+}
+
+static void *get_recv_wqe(struct mthca_qp *qp, int n)
+{
+ if (qp->is_direct)
+ return qp->queue.direct.buf + (n << qp->rq.wqe_shift);
+ else
+ return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +
+ ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
+}
+
+static void *get_send_wqe(struct mthca_qp *qp, int n)
+{
+ if (qp->is_direct)
+ return qp->queue.direct.buf + qp->send_wqe_offset +
+ (n << qp->sq.wqe_shift);
+ else
+ return qp->queue.page_list[(qp->send_wqe_offset +
+ (n << qp->sq.wqe_shift)) >>
+ PAGE_SHIFT].buf +
+ ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
+ (PAGE_SIZE - 1));
+}
+
+void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
+ enum ib_event_type event_type)
+{
+ struct mthca_qp *qp;
+ struct ib_event event;
+
+ spin_lock(&dev->qp_table.lock);
+ qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
+ if (qp)
+ atomic_inc(&qp->refcount);
+ spin_unlock(&dev->qp_table.lock);
+
+ if (!qp) {
+ mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ return;
+ }
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.qp = &qp->ibqp;
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+static int to_mthca_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET: return MTHCA_QP_STATE_RST;
+ case IB_QPS_INIT: return MTHCA_QP_STATE_INIT;
+ case IB_QPS_RTR: return MTHCA_QP_STATE_RTR;
+ case IB_QPS_RTS: return MTHCA_QP_STATE_RTS;
+ case IB_QPS_SQD: return MTHCA_QP_STATE_SQD;
+ case IB_QPS_SQE: return MTHCA_QP_STATE_SQE;
+ case IB_QPS_ERR: return MTHCA_QP_STATE_ERR;
+ default: return -1;
+ }
+}
+
+enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
+
+static int to_mthca_st(int transport)
+{
+ switch (transport) {
+ case RC: return MTHCA_QP_ST_RC;
+ case UC: return MTHCA_QP_ST_UC;
+ case UD: return MTHCA_QP_ST_UD;
+ case RD: return MTHCA_QP_ST_RD;
+ case MLX: return MTHCA_QP_ST_MLX;
+ default: return -1;
+ }
+}
+
+static const struct {
+ int trans;
+ u32 req_param[NUM_TRANS];
+ u32 opt_param[NUM_TRANS];
+} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
+ [IB_QPS_INIT] = {
+ .trans = MTHCA_TRANS_RST2INIT,
+ .req_param = {
+ [UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [MLX] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ },
+ /* bug-for-bug compatibility with VAPI: */
+ .opt_param = {
+ [MLX] = IB_QP_PORT
+ }
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
+ [IB_QPS_INIT] = {
+ .trans = MTHCA_TRANS_INIT2INIT,
+ .opt_param = {
+ [UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [MLX] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_RTR] = {
+ .trans = MTHCA_TRANS_INIT2RTR,
+ .req_param = {
+ [RC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER),
+ },
+ .opt_param = {
+ [UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [MLX] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
+ [IB_QPS_RTS] = {
+ .trans = MTHCA_TRANS_RTR2RTS,
+ .req_param = {
+ [UD] = IB_QP_SQ_PSN,
+ [RC] = (IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_SQ_PSN |
+ IB_QP_MAX_QP_RD_ATOMIC),
+ [MLX] = IB_QP_SQ_PSN,
+ },
+ .opt_param = {
+ [UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [MLX] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
+ [IB_QPS_RTS] = {
+ .trans = MTHCA_TRANS_RTS2RTS,
+ .opt_param = {
+ [UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE |
+ IB_QP_MIN_RNR_TIMER),
+ [MLX] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_SQD] = {
+ .trans = MTHCA_TRANS_RTS2SQD,
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
+ [IB_QPS_RTS] = {
+ .trans = MTHCA_TRANS_SQD2RTS,
+ .opt_param = {
+ [UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [MLX] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_SQD] = {
+ .trans = MTHCA_TRANS_SQD2SQD,
+ .opt_param = {
+ [UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_AV |
+ IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [MLX] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
+ [IB_QPS_RTS] = {
+ .trans = MTHCA_TRANS_SQERR2RTS,
+ .opt_param = {
+ [UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [RC] = (IB_QP_CUR_STATE |
+ IB_QP_MIN_RNR_TIMER),
+ [MLX] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
+ [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
+ }
+};
+
+static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ sqp->pkey_index = attr->pkey_index;
+ if (attr_mask & IB_QP_QKEY)
+ sqp->qkey = attr->qkey;
+ if (attr_mask & IB_QP_SQ_PSN)
+ sqp->send_psn = attr->sq_psn;
+}
+
+static void init_port(struct mthca_dev *dev, int port)
+{
+ int err;
+ u8 status;
+ struct mthca_init_ib_param param;
+
+ memset(&param, 0, sizeof param);
+
+ param.enable_1x = 1;
+ param.enable_4x = 1;
+ param.vl_cap = dev->limits.vl_cap;
+ param.mtu_cap = dev->limits.mtu_cap;
+ param.gid_cap = dev->limits.gid_table_len;
+ param.pkey_cap = dev->limits.pkey_table_len;
+
+ err = mthca_INIT_IB(dev, &param, port, &status);
+ if (err)
+ mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
+ if (status)
+ mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
+}
+
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ void *mailbox = NULL;
+ struct mthca_qp_param *qp_param;
+ struct mthca_qp_context *qp_context;
+ u32 req_param, opt_param;
+ u8 status;
+ int err;
+
+ if (attr_mask & IB_QP_CUR_STATE) {
+ if (attr->cur_qp_state != IB_QPS_RTR &&
+ attr->cur_qp_state != IB_QPS_RTS &&
+ attr->cur_qp_state != IB_QPS_SQD &&
+ attr->cur_qp_state != IB_QPS_SQE)
+ return -EINVAL;
+ else
+ cur_state = attr->cur_qp_state;
+ } else {
+ spin_lock_irq(&qp->sq.lock);
+ spin_lock(&qp->rq.lock);
+ cur_state = qp->state;
+ spin_unlock(&qp->rq.lock);
+ spin_unlock_irq(&qp->sq.lock);
+ }
+
+ if (attr_mask & IB_QP_STATE) {
+ if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
+ return -EINVAL;
+ new_state = attr->qp_state;
+ } else
+ new_state = cur_state;
+
+ if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
+ mthca_dbg(dev, "Illegal QP transition "
+ "%d->%d\n", cur_state, new_state);
+ return -EINVAL;
+ }
+
+ req_param = state_table[cur_state][new_state].req_param[qp->transport];
+ opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
+
+ if ((req_param & attr_mask) != req_param) {
+ mthca_dbg(dev, "QP transition "
+ "%d->%d missing req attr 0x%08x\n",
+ cur_state, new_state,
+ req_param & ~attr_mask);
+ return -EINVAL;
+ }
+
+ if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
+ mthca_dbg(dev, "QP transition (transport %d) "
+ "%d->%d has extra attr 0x%08x\n",
+ qp->transport,
+ cur_state, new_state,
+ attr_mask & ~(req_param | opt_param |
+ IB_QP_STATE));
+ return -EINVAL;
+ }
+
+ mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
+ if (!mailbox)
+ return -ENOMEM;
+ qp_param = MAILBOX_ALIGN(mailbox);
+ qp_context = &qp_param->context;
+ memset(qp_param, 0, sizeof *qp_param);
+
+ qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) |
+ (to_mthca_st(qp->transport) << 16));
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE);
+ if (!(attr_mask & IB_QP_PATH_MIG_STATE))
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
+ else {
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
+ switch (attr->path_mig_state) {
+ case IB_MIG_MIGRATED:
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
+ break;
+ case IB_MIG_REARM:
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
+ break;
+ case IB_MIG_ARMED:
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
+ break;
+ }
+ }
+
+ /* leave tavor_sched_queue as 0 */
+
+ if (qp->transport == MLX || qp->transport == UD)
+ qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
+ else if (attr_mask & IB_QP_PATH_MTU)
+ qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ qp_context->rq_size_stride =
+ ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
+ qp_context->sq_size_stride =
+ ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4);
+ }
+
+ /* leave arbel_sched_queue as 0 */
+
+ qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
+ qp_context->local_qpn = cpu_to_be32(qp->qpn);
+ if (attr_mask & IB_QP_DEST_QPN) {
+ qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
+ }
+
+ if (qp->transport == MLX)
+ qp_context->pri_path.port_pkey |=
+ cpu_to_be32(to_msqp(qp)->port << 24);
+ else {
+ if (attr_mask & IB_QP_PORT) {
+ qp_context->pri_path.port_pkey |=
+ cpu_to_be32(attr->port_num << 24);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
+ }
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ qp_context->pri_path.port_pkey |=
+ cpu_to_be32(attr->pkey_index);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
+ }
+
+ if (attr_mask & IB_QP_AV) {
+ qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f;
+ qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid);
+ qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
+ if (attr->ah_attr.ah_flags & IB_AH_GRH) {
+ qp_context->pri_path.g_mylmc |= 1 << 7;
+ qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
+ qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
+ qp_context->pri_path.sl_tclass_flowlabel =
+ cpu_to_be32((attr->ah_attr.sl << 28) |
+ (attr->ah_attr.grh.traffic_class << 20) |
+ (attr->ah_attr.grh.flow_label));
+ memcpy(qp_context->pri_path.rgid,
+ attr->ah_attr.grh.dgid.raw, 16);
+ } else {
+ qp_context->pri_path.sl_tclass_flowlabel =
+ cpu_to_be32(attr->ah_attr.sl << 28);
+ }
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ qp_context->pri_path.ackto = attr->timeout;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
+ }
+
+ /* XXX alt_path */
+
+ /* leave rdd as 0 */
+ qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
+ /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
+ qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey);
+ qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
+ (MTHCA_FLIGHT_LIMIT << 24) |
+ MTHCA_QP_BIT_SRE |
+ MTHCA_QP_BIT_SWE |
+ MTHCA_QP_BIT_SAE);
+ if (qp->sq_policy == IB_SIGNAL_ALL_WR)
+ qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
+ ffs(attr->max_dest_rd_atomic) - 1 : 0,
+ 7) << 21);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN)
+ qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
+ qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
+ qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);
+ }
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ /*
+ * Only enable RDMA/atomics if we have responder
+ * resources set to a non-zero value.
+ */
+ if (qp->resp_depth) {
+ qp_context->params2 |=
+ cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
+ MTHCA_QP_BIT_RWE : 0);
+ qp_context->params2 |=
+ cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
+ MTHCA_QP_BIT_RRE : 0);
+ qp_context->params2 |=
+ cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
+ MTHCA_QP_BIT_RAE : 0);
+ }
+
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
+ MTHCA_QP_OPTPAR_RRE |
+ MTHCA_QP_OPTPAR_RAE);
+
+ qp->atomic_rd_en = attr->qp_access_flags;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ u8 rra_max;
+
+ if (qp->resp_depth && !attr->max_rd_atomic) {
+ /*
+ * Lowering our responder resources to zero.
+ * Turn off RDMA/atomics as responder.
+ * (RWE/RRE/RAE in params2 already zero)
+ */
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
+ MTHCA_QP_OPTPAR_RRE |
+ MTHCA_QP_OPTPAR_RAE);
+ }
+
+ if (!qp->resp_depth && attr->max_rd_atomic) {
+ /*
+ * Increasing our responder resources from
+ * zero. Turn on RDMA/atomics as appropriate.
+ */
+ qp_context->params2 |=
+ cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ?
+ MTHCA_QP_BIT_RWE : 0);
+ qp_context->params2 |=
+ cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
+ MTHCA_QP_BIT_RRE : 0);
+ qp_context->params2 |=
+ cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
+ MTHCA_QP_BIT_RAE : 0);
+
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
+ MTHCA_QP_OPTPAR_RRE |
+ MTHCA_QP_OPTPAR_RAE);
+ }
+
+ for (rra_max = 0;
+ 1 << rra_max < attr->max_rd_atomic &&
+ rra_max < dev->qp_table.rdb_shift;
+ ++rra_max)
+ ; /* nothing */
+
+ qp_context->params2 |= cpu_to_be32(rra_max << 21);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
+
+ qp->resp_depth = attr->max_rd_atomic;
+ }
+
+ qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
+ }
+ if (attr_mask & IB_QP_RQ_PSN)
+ qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+ qp_context->ra_buff_indx =
+ cpu_to_be32(dev->qp_table.rdb_base +
+ ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
+ dev->qp_table.rdb_shift));
+
+ qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index);
+
+ if (attr_mask & IB_QP_QKEY) {
+ qp_context->qkey = cpu_to_be32(attr->qkey);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
+ }
+
+ err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
+ qp->qpn, 0, qp_param, 0, &status);
+ if (status) {
+ mthca_warn(dev, "modify QP %d returned status %02x.\n",
+ state_table[cur_state][new_state].trans, status);
+ err = -EINVAL;
+ }
+
+ if (!err)
+ qp->state = new_state;
+
+ kfree(mailbox);
+
+ if (is_sqp(dev, qp))
+ store_attrs(to_msqp(qp), attr, attr_mask);
+
+ /*
+ * If we are moving QP0 to RTR, bring the IB link up; if we
+ * are moving QP0 to RESET or ERROR, bring the link back down.
+ */
+ if (is_qp0(dev, qp)) {
+ if (cur_state != IB_QPS_RTR &&
+ new_state == IB_QPS_RTR)
+ init_port(dev, to_msqp(qp)->port);
+
+ if (cur_state != IB_QPS_RESET &&
+ cur_state != IB_QPS_ERR &&
+ (new_state == IB_QPS_RESET ||
+ new_state == IB_QPS_ERR))
+ mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
+ }
+
+ return err;
+}
+
+/*
+ * Allocate and register buffer for WQEs. qp->rq.max, sq.max,
+ * rq.max_gs and sq.max_gs must all be assigned.
+ * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
+ * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
+ * queue)
+ */
+static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_qp *qp)
+{
+ int size;
+ int i;
+ int npages, shift;
+ dma_addr_t t;
+ u64 *dma_list = NULL;
+ int err = -ENOMEM;
+
+ size = sizeof (struct mthca_next_seg) +
+ qp->rq.max_gs * sizeof (struct mthca_data_seg);
+
+ for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
+ qp->rq.wqe_shift++)
+ ; /* nothing */
+
+ size = sizeof (struct mthca_next_seg) +
+ qp->sq.max_gs * sizeof (struct mthca_data_seg);
+ switch (qp->transport) {
+ case MLX:
+ size += 2 * sizeof (struct mthca_data_seg);
+ break;
+ case UD:
+ if (dev->hca_type == ARBEL_NATIVE)
+ size += sizeof (struct mthca_arbel_ud_seg);
+ else
+ size += sizeof (struct mthca_tavor_ud_seg);
+ break;
+ default:
+ /* bind seg is as big as atomic + raddr segs */
+ size += sizeof (struct mthca_bind_seg);
+ }
+
+ for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
+ qp->sq.wqe_shift++)
+ ; /* nothing */
+
+ qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
+ 1 << qp->sq.wqe_shift);
+ size = PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift));
+
+ qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
+ GFP_KERNEL);
+ if (!qp->wrid)
+ goto err_out;
+
+ if (size <= MTHCA_MAX_DIRECT_QP_SIZE) {
+ qp->is_direct = 1;
+ npages = 1;
+ shift = get_order(size) + PAGE_SHIFT;
+
+ if (0)
+ mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
+ size, shift);
+
+ qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
+ if (!qp->queue.direct.buf)
+ goto err_out;
+
+ pci_unmap_addr_set(&qp->queue.direct, mapping, t);
+
+ memset(qp->queue.direct.buf, 0, size);
+
+ while (t & ((1 << shift) - 1)) {
+ --shift;
+ npages *= 2;
+ }
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_out_free;
+
+ for (i = 0; i < npages; ++i)
+ dma_list[i] = t + i * (1 << shift);
+ } else {
+ qp->is_direct = 0;
+ npages = size / PAGE_SIZE;
+ shift = PAGE_SHIFT;
+
+ if (0)
+ mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages);
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_out;
+
+ qp->queue.page_list = kmalloc(npages *
+ sizeof *qp->queue.page_list,
+ GFP_KERNEL);
+ if (!qp->queue.page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i) {
+ qp->queue.page_list[i].buf =
+ pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+ if (!qp->queue.page_list[i].buf)
+ goto err_out_free;
+
+ memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE);
+
+ pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t);
+ dma_list[i] = t;
+ }
+ }
+
+ err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift,
+ npages, 0, size,
+ MTHCA_MPT_FLAG_LOCAL_READ,
+ &qp->mr);
+ if (err)
+ goto err_out_free;
+
+ kfree(dma_list);
+ return 0;
+
+ err_out_free:
+ if (qp->is_direct) {
+ pci_free_consistent(dev->pdev, size,
+ qp->queue.direct.buf,
+ pci_unmap_addr(&qp->queue.direct, mapping));
+ } else
+ for (i = 0; i < npages; ++i) {
+ if (qp->queue.page_list[i].buf)
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ qp->queue.page_list[i].buf,
+ pci_unmap_addr(&qp->queue.page_list[i],
+ mapping));
+
+ }
+
+ err_out:
+ kfree(qp->wrid);
+ kfree(dma_list);
+ return err;
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret = 0;
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
+ if (ret)
+ return ret;
+
+ ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
+ if (ret)
+ goto err_qpc;
+
+ qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+ qp->qpn, &qp->rq.db);
+ if (qp->rq.db_index < 0) {
+ ret = -ENOMEM;
+ goto err_eqpc;
+ }
+
+ qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+ qp->qpn, &qp->sq.db);
+ if (qp->sq.db_index < 0) {
+ ret = -ENOMEM;
+ goto err_rq_db;
+ }
+ }
+
+ return 0;
+
+err_rq_db:
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+
+err_eqpc:
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+
+err_qpc:
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+
+ return ret;
+}
+
+static void mthca_free_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ if (dev->hca_type == ARBEL_NATIVE) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+ }
+}
+
+static void mthca_wq_init(struct mthca_wq* wq)
+{
+ spin_lock_init(&wq->lock);
+ wq->next_ind = 0;
+ wq->last_comp = wq->max - 1;
+ wq->head = 0;
+ wq->tail = 0;
+ wq->last = NULL;
+}
+
+static int mthca_alloc_qp_common(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ struct mthca_qp *qp)
+{
+ struct mthca_next_seg *wqe;
+ int ret;
+ int i;
+
+ atomic_set(&qp->refcount, 1);
+ qp->state = IB_QPS_RESET;
+ qp->atomic_rd_en = 0;
+ qp->resp_depth = 0;
+ qp->sq_policy = send_policy;
+ mthca_wq_init(&qp->sq);
+ mthca_wq_init(&qp->rq);
+
+ ret = mthca_alloc_memfree(dev, qp);
+ if (ret)
+ return ret;
+
+ ret = mthca_alloc_wqe_buf(dev, pd, qp);
+ if (ret) {
+ mthca_free_memfree(dev, qp);
+ return ret;
+ }
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ for (i = 0; i < qp->rq.max; ++i) {
+ wqe = get_recv_wqe(qp, i);
+ wqe->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
+ qp->rq.wqe_shift);
+ wqe->ee_nds = cpu_to_be32(1 << (qp->rq.wqe_shift - 4));
+ }
+
+ for (i = 0; i < qp->sq.max; ++i) {
+ wqe = get_send_wqe(qp, i);
+ wqe->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
+ qp->sq.wqe_shift) +
+ qp->send_wqe_offset);
+ }
+ }
+
+ return 0;
+}
+
+static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ int i;
+
+ if (dev->hca_type != ARBEL_NATIVE)
+ return;
+
+ for (i = 0; 1 << i < qp->rq.max; ++i)
+ ; /* nothing */
+
+ qp->rq.max = 1 << i;
+
+ for (i = 0; 1 << i < qp->sq.max; ++i)
+ ; /* nothing */
+
+ qp->sq.max = 1 << i;
+}
+
+int mthca_alloc_qp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_qp_type type,
+ enum ib_sig_type send_policy,
+ struct mthca_qp *qp)
+{
+ int err;
+
+ mthca_align_qp_size(dev, qp);
+
+ switch (type) {
+ case IB_QPT_RC: qp->transport = RC; break;
+ case IB_QPT_UC: qp->transport = UC; break;
+ case IB_QPT_UD: qp->transport = UD; break;
+ default: return -EINVAL;
+ }
+
+ qp->qpn = mthca_alloc(&dev->qp_table.alloc);
+ if (qp->qpn == -1)
+ return -ENOMEM;
+
+ err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
+ send_policy, qp);
+ if (err) {
+ mthca_free(&dev->qp_table.alloc, qp->qpn);
+ return err;
+ }
+
+ spin_lock_irq(&dev->qp_table.lock);
+ mthca_array_set(&dev->qp_table.qp,
+ qp->qpn & (dev->limits.num_qps - 1), qp);
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ return 0;
+}
+
+int mthca_alloc_sqp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ int qpn,
+ int port,
+ struct mthca_sqp *sqp)
+{
+ int err = 0;
+ u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
+
+ mthca_align_qp_size(dev, &sqp->qp);
+
+ sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
+ sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
+ &sqp->header_dma, GFP_KERNEL);
+ if (!sqp->header_buf)
+ return -ENOMEM;
+
+ spin_lock_irq(&dev->qp_table.lock);
+ if (mthca_array_get(&dev->qp_table.qp, mqpn))
+ err = -EBUSY;
+ else
+ mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ if (err)
+ goto err_out;
+
+ sqp->port = port;
+ sqp->qp.qpn = mqpn;
+ sqp->qp.transport = MLX;
+
+ err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
+ send_policy, &sqp->qp);
+ if (err)
+ goto err_out_free;
+
+ atomic_inc(&pd->sqp_count);
+
+ return 0;
+
+ err_out_free:
+ /*
+ * Lock CQs here, so that CQ polling code can do QP lookup
+ * without taking a lock.
+ */
+ spin_lock_irq(&send_cq->lock);
+ if (send_cq != recv_cq)
+ spin_lock(&recv_cq->lock);
+
+ spin_lock(&dev->qp_table.lock);
+ mthca_array_clear(&dev->qp_table.qp, mqpn);
+ spin_unlock(&dev->qp_table.lock);
+
+ if (send_cq != recv_cq)
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+
+ err_out:
+ dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
+ sqp->header_buf, sqp->header_dma);
+
+ return err;
+}
+
+void mthca_free_qp(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ u8 status;
+ int size;
+ int i;
+ struct mthca_cq *send_cq;
+ struct mthca_cq *recv_cq;
+
+ send_cq = to_mcq(qp->ibqp.send_cq);
+ recv_cq = to_mcq(qp->ibqp.recv_cq);
+
+ /*
+ * Lock CQs here, so that CQ polling code can do QP lookup
+ * without taking a lock.
+ */
+ spin_lock_irq(&send_cq->lock);
+ if (send_cq != recv_cq)
+ spin_lock(&recv_cq->lock);
+
+ spin_lock(&dev->qp_table.lock);
+ mthca_array_clear(&dev->qp_table.qp,
+ qp->qpn & (dev->limits.num_qps - 1));
+ spin_unlock(&dev->qp_table.lock);
+
+ if (send_cq != recv_cq)
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+
+ atomic_dec(&qp->refcount);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+ if (qp->state != IB_QPS_RESET)
+ mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
+
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
+ if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
+
+ mthca_free_mr(dev, &qp->mr);
+
+ size = PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift));
+
+ if (qp->is_direct) {
+ pci_free_consistent(dev->pdev, size,
+ qp->queue.direct.buf,
+ pci_unmap_addr(&qp->queue.direct, mapping));
+ } else {
+ for (i = 0; i < size / PAGE_SIZE; ++i) {
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ qp->queue.page_list[i].buf,
+ pci_unmap_addr(&qp->queue.page_list[i],
+ mapping));
+ }
+ }
+
+ kfree(qp->wrid);
+
+ mthca_free_memfree(dev, qp);
+
+ if (is_sqp(dev, qp)) {
+ atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
+ dma_free_coherent(&dev->pdev->dev,
+ to_msqp(qp)->header_buf_size,
+ to_msqp(qp)->header_buf,
+ to_msqp(qp)->header_dma);
+ } else
+ mthca_free(&dev->qp_table.alloc, qp->qpn);
+}
+
+/* Create UD header for an MLX send and build a data segment for it */
+static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
+ int ind, struct ib_send_wr *wr,
+ struct mthca_mlx_seg *mlx,
+ struct mthca_data_seg *data)
+{
+ int header_size;
+ int err;
+
+ ib_ud_header_init(256, /* assume a MAD */
+ sqp->ud_header.grh_present,
+ &sqp->ud_header);
+
+ err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
+ if (err)
+ return err;
+ mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid == 0xffff ?
+ MTHCA_MLX_SLR : 0) |
+ (sqp->ud_header.lrh.service_level << 8));
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
+ mlx->vcrc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ sqp->ud_header.immediate_present = 1;
+ sqp->ud_header.immediate_data = wr->imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (sqp->ud_header.lrh.destination_lid == 0xffff)
+ sqp->ud_header.lrh.source_lid = 0xffff;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ if (!sqp->qp.ibqp.qp_num)
+ ib_get_cached_pkey(&dev->ib_dev, sqp->port,
+ sqp->pkey_index,
+ &sqp->ud_header.bth.pkey);
+ else
+ ib_get_cached_pkey(&dev->ib_dev, sqp->port,
+ wr->wr.ud.pkey_index,
+ &sqp->ud_header.bth.pkey);
+ cpu_to_be16s(&sqp->ud_header.bth.pkey);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+
+ header_size = ib_ud_header_pack(&sqp->ud_header,
+ sqp->header_buf +
+ ind * MTHCA_UD_HEADER_SIZE);
+
+ data->byte_count = cpu_to_be32(header_size);
+ data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
+ data->addr = cpu_to_be64(sqp->header_dma +
+ ind * MTHCA_UD_HEADER_SIZE);
+
+ return 0;
+}
+
+static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
+ struct ib_cq *ib_cq)
+{
+ unsigned cur;
+ struct mthca_cq *cq;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max;
+}
+
+int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ void *wqe;
+ void *prev_wqe;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+ int size;
+ int size0 = 0;
+ u32 f0 = 0;
+ int ind;
+ u8 op0 = 0;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ /* XXX check that state is OK to post send */
+
+ ind = qp->sq.next_ind;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ mthca_err(dev, "SQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->sq.head, qp->sq.tail,
+ qp->sq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_send_wqe(qp, ind);
+ prev_wqe = qp->sq.last;
+ qp->sq.last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->nda_op = 0;
+ ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+ ((struct mthca_next_seg *) wqe)->flags =
+ ((wr->send_flags & IB_SEND_SIGNALED) ?
+ cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
+ ((wr->send_flags & IB_SEND_SOLICITED) ?
+ cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
+ cpu_to_be32(1);
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ ((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
+
+ wqe += sizeof (struct mthca_next_seg);
+ size = sizeof (struct mthca_next_seg) / 16;
+
+ switch (qp->transport) {
+ case RC:
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ ((struct mthca_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.atomic.remote_addr);
+ ((struct mthca_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.atomic.rkey);
+ ((struct mthca_raddr_seg *) wqe)->reserved = 0;
+
+ wqe += sizeof (struct mthca_raddr_seg);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ ((struct mthca_atomic_seg *) wqe)->swap_add =
+ cpu_to_be64(wr->wr.atomic.swap);
+ ((struct mthca_atomic_seg *) wqe)->compare =
+ cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ ((struct mthca_atomic_seg *) wqe)->swap_add =
+ cpu_to_be64(wr->wr.atomic.compare_add);
+ ((struct mthca_atomic_seg *) wqe)->compare = 0;
+ }
+
+ wqe += sizeof (struct mthca_atomic_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16 +
+ sizeof (struct mthca_atomic_seg);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ ((struct mthca_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.rdma.remote_addr);
+ ((struct mthca_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.rdma.rkey);
+ ((struct mthca_raddr_seg *) wqe)->reserved = 0;
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UD:
+ ((struct mthca_tavor_ud_seg *) wqe)->lkey =
+ cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
+ ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
+ cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
+ ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
+ cpu_to_be32(wr->wr.ud.remote_qpn);
+ ((struct mthca_tavor_ud_seg *) wqe)->qkey =
+ cpu_to_be32(wr->wr.ud.remote_qkey);
+
+ wqe += sizeof (struct mthca_tavor_ud_seg);
+ size += sizeof (struct mthca_tavor_ud_seg) / 16;
+ break;
+
+ case MLX:
+ err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ wqe - sizeof (struct mthca_next_seg),
+ wqe);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ break;
+ }
+
+ if (wr->num_sge > qp->sq.max_gs) {
+ mthca_err(dev, "too many gathers\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32(wr->sg_list[i].length);
+ ((struct mthca_data_seg *) wqe)->lkey =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ ((struct mthca_data_seg *) wqe)->addr =
+ cpu_to_be64(wr->sg_list[i].addr);
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ /* Add one more inline data segment for ICRC */
+ if (qp->transport == MLX) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32((1 << 31) | 4);
+ ((u32 *) wqe)[1] = 0;
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ qp->wrid[ind + qp->rq.max] = wr->wr_id;
+
+ if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
+ mthca_err(dev, "opcode invalid\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (prev_wqe) {
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32(((ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) |
+ mthca_opcode[wr->opcode]);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
+ }
+
+ if (!size0) {
+ size0 = size;
+ op0 = mthca_opcode[wr->opcode];
+ }
+
+ ++ind;
+ if (unlikely(ind >= qp->sq.max))
+ ind -= qp->sq.max;
+ }
+
+out:
+ if (likely(nreq)) {
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) | f0 | op0);
+ doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+
+ wmb();
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ qp->sq.next_ind = ind;
+ qp->sq.head += nreq;
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+ return err;
+}
+
+int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+ int size;
+ int size0 = 0;
+ int ind;
+ void *wqe;
+ void *prev_wqe;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ /* XXX check that state is OK to post receive */
+
+ ind = qp->rq.next_ind;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ mthca_err(dev, "RQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->rq.head, qp->rq.tail,
+ qp->rq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_recv_wqe(qp, ind);
+ prev_wqe = qp->rq.last;
+ qp->rq.last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->nda_op = 0;
+ ((struct mthca_next_seg *) wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD);
+ ((struct mthca_next_seg *) wqe)->flags = 0;
+
+ wqe += sizeof (struct mthca_next_seg);
+ size = sizeof (struct mthca_next_seg) / 16;
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32(wr->sg_list[i].length);
+ ((struct mthca_data_seg *) wqe)->lkey =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ ((struct mthca_data_seg *) wqe)->addr =
+ cpu_to_be64(wr->sg_list[i].addr);
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ qp->wrid[ind] = wr->wr_id;
+
+ if (likely(prev_wqe)) {
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD | size);
+ }
+
+ if (!size0)
+ size0 = size;
+
+ ++ind;
+ if (unlikely(ind >= qp->rq.max))
+ ind -= qp->rq.max;
+ }
+
+out:
+ if (likely(nreq)) {
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
+ doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
+
+ wmb();
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ qp->rq.next_ind = ind;
+ qp->rq.head += nreq;
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+ return err;
+}
+
+int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ void *wqe;
+ void *prev_wqe;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+ int size;
+ int size0 = 0;
+ u32 f0 = 0;
+ int ind;
+ u8 op0 = 0;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ /* XXX check that state is OK to post send */
+
+ ind = qp->sq.head & (qp->sq.max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ mthca_err(dev, "SQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->sq.head, qp->sq.tail,
+ qp->sq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_send_wqe(qp, ind);
+ prev_wqe = qp->sq.last;
+ qp->sq.last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->flags =
+ ((wr->send_flags & IB_SEND_SIGNALED) ?
+ cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
+ ((wr->send_flags & IB_SEND_SOLICITED) ?
+ cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
+ cpu_to_be32(1);
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ ((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
+
+ wqe += sizeof (struct mthca_next_seg);
+ size = sizeof (struct mthca_next_seg) / 16;
+
+ switch (qp->transport) {
+ case UD:
+ memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
+ to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
+ ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
+ cpu_to_be32(wr->wr.ud.remote_qpn);
+ ((struct mthca_arbel_ud_seg *) wqe)->qkey =
+ cpu_to_be32(wr->wr.ud.remote_qkey);
+
+ wqe += sizeof (struct mthca_arbel_ud_seg);
+ size += sizeof (struct mthca_arbel_ud_seg) / 16;
+ break;
+
+ case MLX:
+ err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ wqe - sizeof (struct mthca_next_seg),
+ wqe);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ break;
+ }
+
+ if (wr->num_sge > qp->sq.max_gs) {
+ mthca_err(dev, "too many gathers\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32(wr->sg_list[i].length);
+ ((struct mthca_data_seg *) wqe)->lkey =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ ((struct mthca_data_seg *) wqe)->addr =
+ cpu_to_be64(wr->sg_list[i].addr);
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ /* Add one more inline data segment for ICRC */
+ if (qp->transport == MLX) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32((1 << 31) | 4);
+ ((u32 *) wqe)[1] = 0;
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ qp->wrid[ind + qp->rq.max] = wr->wr_id;
+
+ if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
+ mthca_err(dev, "opcode invalid\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (likely(prev_wqe)) {
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32(((ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) |
+ mthca_opcode[wr->opcode]);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD | size);
+ }
+
+ if (!size0) {
+ size0 = size;
+ op0 = mthca_opcode[wr->opcode];
+ }
+
+ ++ind;
+ if (unlikely(ind >= qp->sq.max))
+ ind -= qp->sq.max;
+ }
+
+out:
+ if (likely(nreq)) {
+ u32 doorbell[2];
+
+ doorbell[0] = cpu_to_be32((nreq << 24) |
+ ((qp->sq.head & 0xffff) << 8) |
+ f0 | op0);
+ doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+
+ qp->sq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
+
+ /*
+ * Make sure doorbell record is written before we
+ * write MMIO send doorbell.
+ */
+ wmb();
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+ return err;
+}
+
+int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+ void *wqe;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ /* XXX check that state is OK to post receive */
+
+ ind = qp->rq.head & (qp->rq.max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ mthca_err(dev, "RQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->rq.head, qp->rq.tail,
+ qp->rq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_recv_wqe(qp, ind);
+
+ ((struct mthca_next_seg *) wqe)->flags = 0;
+
+ wqe += sizeof (struct mthca_next_seg);
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32(wr->sg_list[i].length);
+ ((struct mthca_data_seg *) wqe)->lkey =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ ((struct mthca_data_seg *) wqe)->addr =
+ cpu_to_be64(wr->sg_list[i].addr);
+ wqe += sizeof (struct mthca_data_seg);
+ }
+
+ if (i < qp->rq.max_gs) {
+ ((struct mthca_data_seg *) wqe)->byte_count = 0;
+ ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(0x100);
+ ((struct mthca_data_seg *) wqe)->addr = 0;
+ }
+
+ qp->wrid[ind] = wr->wr_id;
+
+ ++ind;
+ if (unlikely(ind >= qp->rq.max))
+ ind -= qp->rq.max;
+ }
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+ return err;
+}
+
+int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, u32 *new_wqe)
+{
+ struct mthca_next_seg *next;
+
+ if (is_send)
+ next = get_send_wqe(qp, index);
+ else
+ next = get_recv_wqe(qp, index);
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ *dbd = 1;
+ else
+ *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
+ if (next->ee_nds & cpu_to_be32(0x3f))
+ *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
+ (next->ee_nds & cpu_to_be32(0x3f));
+ else
+ *new_wqe = 0;
+
+ return 0;
+}
+
+int __devinit mthca_init_qp_table(struct mthca_dev *dev)
+{
+ int err;
+ u8 status;
+ int i;
+
+ spin_lock_init(&dev->qp_table.lock);
+
+ /*
+ * We reserve 2 extra QPs per port for the special QPs. The
+ * special QP for port 1 has to be even, so round up.
+ */
+ dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
+ err = mthca_alloc_init(&dev->qp_table.alloc,
+ dev->limits.num_qps,
+ (1 << 24) - 1,
+ dev->qp_table.sqp_start +
+ MTHCA_MAX_PORTS * 2);
+ if (err)
+ return err;
+
+ err = mthca_array_init(&dev->qp_table.qp,
+ dev->limits.num_qps);
+ if (err) {
+ mthca_alloc_cleanup(&dev->qp_table.alloc);
+ return err;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
+ dev->qp_table.sqp_start + i * 2,
+ &status);
+ if (err)
+ goto err_out;
+ if (status) {
+ mthca_warn(dev, "CONF_SPECIAL_QP returned "
+ "status %02x, aborting.\n",
+ status);
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+ return 0;
+
+ err_out:
+ for (i = 0; i < 2; ++i)
+ mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+
+ mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
+ mthca_alloc_cleanup(&dev->qp_table.alloc);
+
+ return err;
+}
+
+void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev)
+{
+ int i;
+ u8 status;
+
+ for (i = 0; i < 2; ++i)
+ mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+
+ mthca_alloc_cleanup(&dev->qp_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
new file mode 100644
index 000000000000..ce3fff7d02b7
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+int mthca_reset(struct mthca_dev *mdev)
+{
+ int i;
+ int err = 0;
+ u32 *hca_header = NULL;
+ u32 *bridge_header = NULL;
+ struct pci_dev *bridge = NULL;
+
+#define MTHCA_RESET_OFFSET 0xf0010
+#define MTHCA_RESET_VALUE swab32(1)
+
+ /*
+ * Reset the chip. This is somewhat ugly because we have to
+ * save off the PCI header before reset and then restore it
+ * after the chip reboots. We skip config space offsets 22
+ * and 23 since those have a special meaning.
+ *
+ * To make matters worse, for Tavor (PCI-X HCA) we have to
+ * find the associated bridge device and save off its PCI
+ * header as well.
+ */
+
+ if (mdev->hca_type == TAVOR) {
+ /* Look for the bridge -- its device ID will be 2 more
+ than HCA's device ID. */
+ while ((bridge = pci_get_device(mdev->pdev->vendor,
+ mdev->pdev->device + 2,
+ bridge)) != NULL) {
+ if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+ bridge->subordinate == mdev->pdev->bus) {
+ mthca_dbg(mdev, "Found bridge: %s (%s)\n",
+ pci_pretty_name(bridge), pci_name(bridge));
+ break;
+ }
+ }
+
+ if (!bridge) {
+ /*
+ * Didn't find a bridge for a Tavor device --
+ * assume we're in no-bridge mode and hope for
+ * the best.
+ */
+ mthca_warn(mdev, "No bridge found for %s (%s)\n",
+ pci_pretty_name(mdev->pdev), pci_name(mdev->pdev));
+ }
+
+ }
+
+ /* For Arbel do we need to save off the full 4K PCI Express header?? */
+ hca_header = kmalloc(256, GFP_KERNEL);
+ if (!hca_header) {
+ err = -ENOMEM;
+ mthca_err(mdev, "Couldn't allocate memory to save HCA "
+ "PCI header, aborting.\n");
+ goto out;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if (i == 22 || i == 23)
+ continue;
+ if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't save HCA "
+ "PCI header, aborting.\n");
+ goto out;
+ }
+ }
+
+ if (bridge) {
+ bridge_header = kmalloc(256, GFP_KERNEL);
+ if (!bridge_header) {
+ err = -ENOMEM;
+ mthca_err(mdev, "Couldn't allocate memory to save HCA "
+ "bridge PCI header, aborting.\n");
+ goto out;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if (i == 22 || i == 23)
+ continue;
+ if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't save HCA bridge "
+ "PCI header, aborting.\n");
+ goto out;
+ }
+ }
+ }
+
+ /* actually hit reset */
+ {
+ void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) +
+ MTHCA_RESET_OFFSET, 4);
+
+ if (!reset) {
+ err = -ENOMEM;
+ mthca_err(mdev, "Couldn't map HCA reset register, "
+ "aborting.\n");
+ goto out;
+ }
+
+ writel(MTHCA_RESET_VALUE, reset);
+ iounmap(reset);
+ }
+
+ /* Docs say to wait one second before accessing device */
+ msleep(1000);
+
+ /* Now wait for PCI device to start responding again */
+ {
+ u32 v;
+ int c = 0;
+
+ for (c = 0; c < 100; ++c) {
+ if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't access HCA after reset, "
+ "aborting.\n");
+ goto out;
+ }
+
+ if (v != 0xffffffff)
+ goto good;
+
+ msleep(100);
+ }
+
+ err = -ENODEV;
+ mthca_err(mdev, "PCI device did not come back after reset, "
+ "aborting.\n");
+ goto out;
+ }
+
+good:
+ /* Now restore the PCI headers */
+ if (bridge) {
+ /*
+ * Bridge control register is at 0x3e, so we'll
+ * naturally restore it last in this loop.
+ */
+ for (i = 0; i < 16; ++i) {
+ if (i * 4 == PCI_COMMAND)
+ continue;
+
+ if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
+ "aborting.\n", i);
+ goto out;
+ }
+ }
+
+ if (pci_write_config_dword(bridge, PCI_COMMAND,
+ bridge_header[PCI_COMMAND / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
+ "aborting.\n");
+ goto out;
+ }
+ }
+
+ for (i = 0; i < 16; ++i) {
+ if (i * 4 == PCI_COMMAND)
+ continue;
+
+ if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA reg %x, "
+ "aborting.\n", i);
+ goto out;
+ }
+ }
+
+ if (pci_write_config_dword(mdev->pdev, PCI_COMMAND,
+ hca_header[PCI_COMMAND / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA COMMAND, "
+ "aborting.\n");
+ goto out;
+ }
+
+out:
+ if (bridge)
+ pci_dev_put(bridge);
+ kfree(bridge_header);
+ kfree(hca_header);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
new file mode 100644
index 000000000000..1c8791ded6ff
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "mthca_dev.h"
+#include "mthca_memfree.h"
+
+int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
+{
+ uar->index = mthca_alloc(&dev->uar_table.alloc);
+ if (uar->index == -1)
+ return -ENOMEM;
+
+ uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+
+ return 0;
+}
+
+void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
+{
+ mthca_free(&dev->uar_table.alloc, uar->index);
+}
+
+int mthca_init_uar_table(struct mthca_dev *dev)
+{
+ int ret;
+
+ ret = mthca_alloc_init(&dev->uar_table.alloc,
+ dev->limits.num_uars,
+ dev->limits.num_uars - 1,
+ dev->limits.reserved_uars);
+ if (ret)
+ return ret;
+
+ ret = mthca_init_db_tab(dev);
+ if (ret)
+ mthca_alloc_cleanup(&dev->uar_table.alloc);
+
+ return ret;
+}
+
+void mthca_cleanup_uar_table(struct mthca_dev *dev)
+{
+ mthca_cleanup_db_tab(dev);
+
+ /* XXX check if any UARs are still allocated? */
+ mthca_alloc_cleanup(&dev->uar_table.alloc);
+}