summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c17
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/alloc.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/catas.c294
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c407
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/icm.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c392
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/pd.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/reset.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c36
-rw-r--r--include/linux/mlx4/cmd.h3
-rw-r--r--include/linux/mlx4/device.h34
27 files changed, 1046 insertions, 347 deletions
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 0eb141c41416..a31e031afd87 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
- if (slave_id >= dev->dev->num_vfs + 1)
+ if (slave_id >= dev->dev->persist->num_vfs + 1)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 82a7dd87089b..c7619716c31d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->ib_dev = &dev->ib_dev;
for (i = 0;
- i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+ i < min(dev->dev->caps.sqp_demux,
+ (u16)(dev->dev->persist->num_vfs + 1));
i++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev->dev, i);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 57ecc5b204f3..b4fa6f658800 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = dev->dev->pdev->device;
+ props->vendor_part_id = dev->dev->persist->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
@@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
+ return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
@@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
int i;
if (mlx4_is_master(ibdev->dev)) {
- for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
+ for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
+ ++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
@@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
- i, j, dev->pdev->bus->name);
+ i, j, dev->persist->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name, NULL,
&ibdev->eq_table[eq])) {
@@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
- dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
+ dev_err(&dev->persist->pdev->dev,
+ "Device struct alloc failed\n");
return NULL;
}
@@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
- ibdev->ib_dev.dma_device = &dev->pdev->dev;
+ ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
- dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ dev_err(&dev->persist->pdev->dev,
+ "bit map alloc failed\n");
goto err_steer_qp_release;
}
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index c36ccbd9a644..e0d271782d0a 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
if (!mfrpl->ibfrpl.page_list)
goto err_free;
- mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+ mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
+ pdev->dev,
size, &mfrpl->map,
GFP_KERNEL);
if (!mfrpl->mapped_page_list)
@@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
int size = page_list->max_page_list_len * sizeof (u64);
- dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
+ dma_free_coherent(&dev->dev->persist->pdev->dev, size,
+ mfrpl->mapped_page_list,
mfrpl->map);
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index cb4c66e723b5..d10c2b8a5dad 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
char base_name[9];
/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
- strlcpy(name, pci_name(dev->dev->pdev), max);
+ strlcpy(name, pci_name(dev->dev->persist->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
@@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return 0;
- for (i = 0; i <= device->dev->num_vfs; ++i)
+ for (i = 0; i <= device->dev->persist->num_vfs; ++i)
register_one_pkey_tree(device, i);
return 0;
@@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return;
- for (slave = device->dev->num_vfs; slave >= 0; --slave) {
+ for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 963dd7e6d547..a716c26e0d99 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
buf->nbufs = 1;
buf->npages = 1;
buf->page_shift = get_order(size) + PAGE_SHIFT;
- buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+ buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev,
size, &t, gfp);
if (!buf->direct.buf)
return -ENOMEM;
@@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
for (i = 0; i < buf->nbufs; ++i) {
buf->page_list[i].buf =
- dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ dma_alloc_coherent(&dev->persist->pdev->dev,
+ PAGE_SIZE,
&t, gfp);
if (!buf->page_list[i].buf)
goto err_free;
@@ -657,7 +658,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
int i;
if (buf->nbufs == 1)
- dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+ dma_free_coherent(&dev->persist->pdev->dev, size,
+ buf->direct.buf,
buf->direct.map);
else {
if (BITS_PER_LONG == 64 && buf->direct.buf)
@@ -665,7 +667,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
for (i = 0; i < buf->nbufs; ++i)
if (buf->page_list[i].buf)
- dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ dma_free_coherent(&dev->persist->pdev->dev,
+ PAGE_SIZE,
buf->page_list[i].buf,
buf->page_list[i].map);
kfree(buf->page_list);
@@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp
if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
goto out;
- pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
+ pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
if (!pgdir) {
ret = -ENOMEM;
goto out;
@@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
set_bit(i, db->u.pgdir->bits[o]);
if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
- dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
kfree(db->u.pgdir);
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 9c656fe4983d..715de8affcc9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -40,16 +40,177 @@ enum {
MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
};
-static DEFINE_SPINLOCK(catas_lock);
-static LIST_HEAD(catas_list);
-static struct work_struct catas_work;
-static int internal_err_reset = 1;
-module_param(internal_err_reset, int, 0644);
+int mlx4_internal_err_reset = 1;
+module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
- "Reset device on internal errors if non-zero"
- " (default 1, in SRIOV mode default is 0)");
+ "Reset device on internal errors if non-zero (default 1)");
+
+static int read_vendor_id(struct mlx4_dev *dev)
+{
+ u16 vendor_id = 0;
+ int ret;
+
+ ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id);
+ if (ret) {
+ mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret);
+ return ret;
+ }
+
+ if (vendor_id == 0xffff) {
+ mlx4_err(dev, "PCI can't be accessed to read vendor id\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx4_reset_master(struct mlx4_dev *dev)
+{
+ int err = 0;
+
+ if (mlx4_is_master(dev))
+ mlx4_report_internal_err_comm_event(dev);
+
+ if (!pci_channel_offline(dev->persist->pdev)) {
+ err = read_vendor_id(dev);
+ /* If PCI can't be accessed to read vendor ID we assume that its
+ * link was disabled and chip was already reset.
+ */
+ if (err)
+ return 0;
+
+ err = mlx4_reset(dev);
+ if (err)
+ mlx4_err(dev, "Fail to reset HCA\n");
+ }
+
+ return err;
+}
+
+static int mlx4_reset_slave(struct mlx4_dev *dev)
+{
+#define COM_CHAN_RST_REQ_OFFSET 0x10
+#define COM_CHAN_RST_ACK_OFFSET 0x08
+
+ u32 comm_flags;
+ u32 rst_req;
+ u32 rst_ack;
+ unsigned long end;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (pci_channel_offline(dev->persist->pdev))
+ return 0;
+
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ if (comm_flags == 0xffffffff) {
+ mlx4_err(dev, "VF reset is not needed\n");
+ return 0;
+ }
+
+ if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
+ mlx4_err(dev, "VF reset is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+ COM_CHAN_RST_REQ_OFFSET;
+ rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+ COM_CHAN_RST_ACK_OFFSET;
+ if (rst_req != rst_ack) {
+ mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
+ return -EIO;
+ }
+
+ rst_req ^= 1;
+ mlx4_warn(dev, "VF is sending reset request to Firmware\n");
+ comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
+ __raw_writel((__force u32)cpu_to_be32(comm_flags),
+ (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
+ /* Make sure that our comm channel write doesn't
+ * get mixed in with writes from another CPU.
+ */
+ mmiowb();
+
+ end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
+ while (time_before(jiffies, end)) {
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+ COM_CHAN_RST_ACK_OFFSET;
+
+ /* Reading rst_req again since the communication channel can
+ * be reset at any time by the PF and all its bits will be
+ * set to zero.
+ */
+ rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+ COM_CHAN_RST_REQ_OFFSET;
+
+ if (rst_ack == rst_req) {
+ mlx4_warn(dev, "VF Reset succeed\n");
+ return 0;
+ }
+ cond_resched();
+ }
+ mlx4_err(dev, "Fail to send reset over the communication channel\n");
+ return -ETIMEDOUT;
+}
+
+static int mlx4_comm_internal_err(u32 slave_read)
+{
+ return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
+ (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
+}
+
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
+{
+ int err;
+ struct mlx4_dev *dev;
+
+ if (!mlx4_internal_err_reset)
+ return;
+
+ mutex_lock(&persist->device_state_mutex);
+ if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
+ dev = persist->dev;
+ mlx4_err(dev, "device is going to be reset\n");
+ if (mlx4_is_slave(dev))
+ err = mlx4_reset_slave(dev);
+ else
+ err = mlx4_reset_master(dev);
+ BUG_ON(err != 0);
+
+ dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
+ mlx4_err(dev, "device was reset successfully\n");
+ mutex_unlock(&persist->device_state_mutex);
+
+ /* At that step HW was already reset, now notify clients */
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+ mlx4_cmd_wake_completions(dev);
+ return;
+
+out:
+ mutex_unlock(&persist->device_state_mutex);
+}
+
+static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
+{
+ int err = 0;
+
+ mlx4_enter_error_state(persist);
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
+ !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
+ err = mlx4_restart_one(persist->pdev);
+ mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
+ err);
+ }
+ mutex_unlock(&persist->interface_state_mutex);
+}
static void dump_err_buf(struct mlx4_dev *dev)
{
@@ -67,58 +228,40 @@ static void poll_catas(unsigned long dev_ptr)
{
struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 slave_read;
- if (readl(priv->catas_err.map)) {
- /* If the device is off-line, we cannot try to recover it */
- if (pci_channel_offline(dev->pdev))
- mod_timer(&priv->catas_err.timer,
- round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
- else {
- dump_err_buf(dev);
- mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
-
- if (internal_err_reset) {
- spin_lock(&catas_lock);
- list_add(&priv->catas_err.list, &catas_list);
- spin_unlock(&catas_lock);
-
- queue_work(mlx4_wq, &catas_work);
- }
+ if (mlx4_is_slave(dev)) {
+ slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+ if (mlx4_comm_internal_err(slave_read)) {
+ mlx4_warn(dev, "Internal error detected on the communication channel\n");
+ goto internal_err;
}
- } else
- mod_timer(&priv->catas_err.timer,
- round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+ } else if (readl(priv->catas_err.map)) {
+ dump_err_buf(dev);
+ goto internal_err;
+ }
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx4_warn(dev, "Internal error mark was detected on device\n");
+ goto internal_err;
+ }
+
+ mod_timer(&priv->catas_err.timer,
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+ return;
+
+internal_err:
+ if (mlx4_internal_err_reset)
+ queue_work(dev->persist->catas_wq, &dev->persist->catas_work);
}
static void catas_reset(struct work_struct *work)
{
- struct mlx4_priv *priv, *tmppriv;
- struct mlx4_dev *dev;
+ struct mlx4_dev_persistent *persist =
+ container_of(work, struct mlx4_dev_persistent,
+ catas_work);
- LIST_HEAD(tlist);
- int ret;
-
- spin_lock_irq(&catas_lock);
- list_splice_init(&catas_list, &tlist);
- spin_unlock_irq(&catas_lock);
-
- list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
- struct pci_dev *pdev = priv->dev.pdev;
-
- /* If the device is off-line, we cannot reset it */
- if (pci_channel_offline(pdev))
- continue;
-
- ret = mlx4_restart_one(priv->dev.pdev);
- /* 'priv' now is not valid */
- if (ret)
- pr_err("mlx4 %s: Reset failed (%d)\n",
- pci_name(pdev), ret);
- else {
- dev = pci_get_drvdata(pdev);
- mlx4_dbg(dev, "Reset succeeded\n");
- }
- }
+ mlx4_handle_error_state(persist);
}
void mlx4_start_catas_poll(struct mlx4_dev *dev)
@@ -126,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
phys_addr_t addr;
- /*If we are in SRIOV the default of the module param must be 0*/
- if (mlx4_is_mfunc(dev))
- internal_err_reset = 0;
-
INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer);
priv->catas_err.map = NULL;
- addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
- priv->fw.catas_offset;
+ if (!mlx4_is_slave(dev)) {
+ addr = pci_resource_start(dev->persist->pdev,
+ priv->fw.catas_bar) +
+ priv->fw.catas_offset;
- priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
- if (!priv->catas_err.map) {
- mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
- (unsigned long long) addr);
- return;
+ priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
+ if (!priv->catas_err.map) {
+ mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+ (unsigned long long)addr);
+ return;
+ }
}
priv->catas_err.timer.data = (unsigned long) dev;
@@ -157,15 +299,29 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
del_timer_sync(&priv->catas_err.timer);
- if (priv->catas_err.map)
+ if (priv->catas_err.map) {
iounmap(priv->catas_err.map);
+ priv->catas_err.map = NULL;
+ }
- spin_lock_irq(&catas_lock);
- list_del(&priv->catas_err.list);
- spin_unlock_irq(&catas_lock);
+ if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
+ flush_workqueue(dev->persist->catas_wq);
}
-void __init mlx4_catas_init(void)
+int mlx4_catas_init(struct mlx4_dev *dev)
{
- INIT_WORK(&catas_work, catas_reset);
+ INIT_WORK(&dev->persist->catas_work, catas_reset);
+ dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
+ if (!dev->persist->catas_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_catas_end(struct mlx4_dev *dev)
+{
+ if (dev->persist->catas_wq) {
+ destroy_workqueue(dev->persist->catas_wq);
+ dev->persist->catas_wq = NULL;
+ }
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5c93d1451c44..2b48932855e7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -42,6 +42,7 @@
#include <linux/mlx4/device.h>
#include <linux/semaphore.h>
#include <rdma/ib_smi.h>
+#include <linux/delay.h>
#include <asm/io.h>
@@ -182,6 +183,72 @@ static u8 mlx4_errno_to_status(int errno)
}
}
+static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op,
+ u8 op_modifier)
+{
+ switch (op) {
+ case MLX4_CMD_UNMAP_ICM:
+ case MLX4_CMD_UNMAP_ICM_AUX:
+ case MLX4_CMD_UNMAP_FA:
+ case MLX4_CMD_2RST_QP:
+ case MLX4_CMD_HW2SW_EQ:
+ case MLX4_CMD_HW2SW_CQ:
+ case MLX4_CMD_HW2SW_SRQ:
+ case MLX4_CMD_HW2SW_MPT:
+ case MLX4_CMD_CLOSE_HCA:
+ case MLX4_QP_FLOW_STEERING_DETACH:
+ case MLX4_CMD_FREE_RES:
+ case MLX4_CMD_CLOSE_PORT:
+ return CMD_STAT_OK;
+
+ case MLX4_CMD_QP_ATTACH:
+ /* On Detach case return success */
+ if (op_modifier == 0)
+ return CMD_STAT_OK;
+ return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
+ default:
+ return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ }
+}
+
+static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status)
+{
+ /* Any error during the closing commands below is considered fatal */
+ if (op == MLX4_CMD_CLOSE_HCA ||
+ op == MLX4_CMD_HW2SW_EQ ||
+ op == MLX4_CMD_HW2SW_CQ ||
+ op == MLX4_CMD_2RST_QP ||
+ op == MLX4_CMD_HW2SW_SRQ ||
+ op == MLX4_CMD_SYNC_TPT ||
+ op == MLX4_CMD_UNMAP_ICM ||
+ op == MLX4_CMD_UNMAP_ICM_AUX ||
+ op == MLX4_CMD_UNMAP_FA)
+ return 1;
+ /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals
+ * CMD_STAT_REG_BOUND.
+ * This status indicates that memory region has memory windows bound to it
+ * which may result from invalid user space usage and is not fatal.
+ */
+ if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND)
+ return 1;
+ return 0;
+}
+
+static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier,
+ int err)
+{
+ /* Only if reset flow is really active return code is based on
+ * command, otherwise current error code is returned.
+ */
+ if (mlx4_internal_err_reset) {
+ mlx4_enter_error_state(dev->persist);
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ }
+
+ return err;
+}
+
static int comm_pending(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -190,16 +257,30 @@ static int comm_pending(struct mlx4_dev *dev)
return (swab32(status) >> 31) != priv->cmd.comm_toggle;
}
-static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
+static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u32 val;
+ /* To avoid writing to unknown addresses after the device state was
+ * changed to internal error and the function was rest,
+ * check the INTERNAL_ERROR flag which is updated under
+ * device_state_mutex lock.
+ */
+ mutex_lock(&dev->persist->device_state_mutex);
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ mutex_unlock(&dev->persist->device_state_mutex);
+ return -EIO;
+ }
+
priv->cmd.comm_toggle ^= 1;
val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
__raw_writel((__force u32) cpu_to_be32(val),
&priv->mfunc.comm->slave_write);
mmiowb();
+ mutex_unlock(&dev->persist->device_state_mutex);
+ return 0;
}
static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
@@ -219,7 +300,13 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
/* Write command */
down(&priv->cmd.poll_sem);
- mlx4_comm_cmd_post(dev, cmd, param);
+ if (mlx4_comm_cmd_post(dev, cmd, param)) {
+ /* Only in case the device state is INTERNAL_ERROR,
+ * mlx4_comm_cmd_post returns with an error
+ */
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ goto out;
+ }
end = msecs_to_jiffies(timeout) + jiffies;
while (comm_pending(dev) && time_before(jiffies, end))
@@ -231,18 +318,23 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
* is MLX4_DELAY_RESET_SLAVE*/
if ((MLX4_COMM_CMD_RESET == cmd)) {
err = MLX4_DELAY_RESET_SLAVE;
+ goto out;
} else {
- mlx4_warn(dev, "Communication channel timed out\n");
- err = -ETIMEDOUT;
+ mlx4_warn(dev, "Communication channel command 0x%x timed out\n",
+ cmd);
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
}
}
+ if (err)
+ mlx4_enter_error_state(dev->persist);
+out:
up(&priv->cmd.poll_sem);
return err;
}
-static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
- u16 param, unsigned long timeout)
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd,
+ u16 param, u16 op, unsigned long timeout)
{
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
struct mlx4_cmd_context *context;
@@ -258,34 +350,49 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
cmd->free_head = context->next;
spin_unlock(&cmd->context_lock);
- init_completion(&context->done);
+ reinit_completion(&context->done);
- mlx4_comm_cmd_post(dev, op, param);
+ if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) {
+ /* Only in case the device state is INTERNAL_ERROR,
+ * mlx4_comm_cmd_post returns with an error
+ */
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ goto out;
+ }
if (!wait_for_completion_timeout(&context->done,
msecs_to_jiffies(timeout))) {
- mlx4_warn(dev, "communication channel command 0x%x timed out\n",
- op);
- err = -EBUSY;
- goto out;
+ mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n",
+ vhcr_cmd, op);
+ goto out_reset;
}
err = context->result;
if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) {
mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
- op, context->fw_status);
- goto out;
+ vhcr_cmd, context->fw_status);
+ if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+ goto out_reset;
}
-out:
/* wait for comm channel ready
* this is necessary for prevention the race
* when switching between event to polling mode
+ * Skipping this section in case the device is in FATAL_ERROR state,
+ * In this state, no commands are sent via the comm channel until
+ * the device has returned from reset.
*/
- end = msecs_to_jiffies(timeout) + jiffies;
- while (comm_pending(dev) && time_before(jiffies, end))
- cond_resched();
+ if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+ }
+ goto out;
+out_reset:
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ mlx4_enter_error_state(dev->persist);
+out:
spin_lock(&cmd->context_lock);
context->next = cmd->free_head;
cmd->free_head = context - cmd->context;
@@ -296,10 +403,13 @@ out:
}
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
- unsigned long timeout)
+ u16 op, unsigned long timeout)
{
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
if (mlx4_priv(dev)->cmd.use_events)
- return mlx4_comm_cmd_wait(dev, cmd, param, timeout);
+ return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout);
return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
}
@@ -307,7 +417,7 @@ static int cmd_pending(struct mlx4_dev *dev)
{
u32 status;
- if (pci_channel_offline(dev->pdev))
+ if (pci_channel_offline(dev->persist->pdev))
return -EIO;
status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
@@ -323,17 +433,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
{
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
u32 __iomem *hcr = cmd->hcr;
- int ret = -EAGAIN;
+ int ret = -EIO;
unsigned long end;
- mutex_lock(&cmd->hcr_mutex);
-
- if (pci_channel_offline(dev->pdev)) {
+ mutex_lock(&dev->persist->device_state_mutex);
+ /* To avoid writing to unknown addresses after the device state was
+ * changed to internal error and the chip was reset,
+ * check the INTERNAL_ERROR flag which is updated under
+ * device_state_mutex lock.
+ */
+ if (pci_channel_offline(dev->persist->pdev) ||
+ (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
/*
* Device is going through error recovery
* and cannot accept commands.
*/
- ret = -EIO;
goto out;
}
@@ -342,12 +456,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
while (cmd_pending(dev)) {
- if (pci_channel_offline(dev->pdev)) {
+ if (pci_channel_offline(dev->persist->pdev)) {
/*
* Device is going through error recovery
* and cannot accept commands.
*/
- ret = -EIO;
goto out;
}
@@ -391,7 +504,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
ret = 0;
out:
- mutex_unlock(&cmd->hcr_mutex);
+ if (ret)
+ mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n",
+ op, ret, in_param, in_modifier, op_modifier);
+ mutex_unlock(&dev->persist->device_state_mutex);
+
return ret;
}
@@ -428,8 +545,11 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
}
ret = mlx4_status_to_errno(vhcr->status);
}
+ if (ret &&
+ dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ ret = mlx4_internal_err_ret_value(dev, op, op_modifier);
} else {
- ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0,
+ ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op,
MLX4_COMM_TIME + timeout);
if (!ret) {
if (out_is_imm) {
@@ -443,9 +563,14 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
}
}
ret = mlx4_status_to_errno(vhcr->status);
- } else
- mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n",
- op);
+ } else {
+ if (dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ ret = mlx4_internal_err_ret_value(dev, op,
+ op_modifier);
+ else
+ mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op);
+ }
}
mutex_unlock(&priv->cmd.slave_cmd_mutex);
@@ -464,12 +589,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
down(&priv->cmd.poll_sem);
- if (pci_channel_offline(dev->pdev)) {
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
/*
* Device is going through error recovery
* and cannot accept commands.
*/
- err = -EIO;
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
goto out;
}
@@ -483,16 +608,21 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
if (err)
- goto out;
+ goto out_reset;
end = msecs_to_jiffies(timeout) + jiffies;
while (cmd_pending(dev) && time_before(jiffies, end)) {
- if (pci_channel_offline(dev->pdev)) {
+ if (pci_channel_offline(dev->persist->pdev)) {
/*
* Device is going through error recovery
* and cannot accept commands.
*/
err = -EIO;
+ goto out_reset;
+ }
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
goto out;
}
@@ -502,8 +632,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
if (cmd_pending(dev)) {
mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
op);
- err = -ETIMEDOUT;
- goto out;
+ err = -EIO;
+ goto out_reset;
}
if (out_is_imm)
@@ -515,10 +645,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
stat = be32_to_cpu((__force __be32)
__raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
err = mlx4_status_to_errno(stat);
- if (err)
+ if (err) {
mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
op, stat);
+ if (mlx4_closing_cmd_fatal_error(op, stat))
+ goto out_reset;
+ goto out;
+ }
+out_reset:
+ if (err)
+ err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
out:
up(&priv->cmd.poll_sem);
return err;
@@ -565,17 +702,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
goto out;
}
- init_completion(&context->done);
+ reinit_completion(&context->done);
- mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
- in_modifier, op_modifier, op, context->token, 1);
+ err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, context->token, 1);
+ if (err)
+ goto out_reset;
if (!wait_for_completion_timeout(&context->done,
msecs_to_jiffies(timeout))) {
mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
op);
- err = -EBUSY;
- goto out;
+ err = -EIO;
+ goto out_reset;
}
err = context->result;
@@ -592,12 +731,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
else
mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
op, context->fw_status);
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ else if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+ goto out_reset;
+
goto out;
}
if (out_is_imm)
*out_param = context->out_param;
+out_reset:
+ if (err)
+ err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
out:
spin_lock(&cmd->context_lock);
context->next = cmd->free_head;
@@ -612,10 +759,13 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout, int native)
{
- if (pci_channel_offline(dev->pdev))
- return -EIO;
+ if (pci_channel_offline(dev->persist->pdev))
+ return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx4_internal_err_ret_value(dev, op,
+ op_modifier);
if (mlx4_priv(dev)->cmd.use_events)
return mlx4_cmd_wait(dev, in_param, out_param,
out_is_imm, in_modifier,
@@ -631,7 +781,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
EXPORT_SYMBOL_GPL(__mlx4_cmd);
-static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
@@ -1460,8 +1610,10 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
ALIGN(sizeof(struct mlx4_vhcr_cmd),
MLX4_ACCESS_MEM_ALIGN), 1);
if (ret) {
- mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
- __func__, ret);
+ if (!(dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
+ __func__, ret);
kfree(vhcr);
return ret;
}
@@ -1500,11 +1652,14 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
goto out_status;
}
- if (mlx4_ACCESS_MEM(dev, inbox->dma, slave,
- vhcr->in_param,
- MLX4_MAILBOX_SIZE, 1)) {
- mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
- __func__, cmd->opcode);
+ ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave,
+ vhcr->in_param,
+ MLX4_MAILBOX_SIZE, 1);
+ if (ret) {
+ if (!(dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
+ __func__, cmd->opcode);
vhcr_cmd->status = CMD_STAT_INTERNAL_ERR;
goto out_status;
}
@@ -1552,8 +1707,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
}
if (err) {
- mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
- vhcr->op, slave, vhcr->errno, err);
+ if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
+ vhcr->op, slave, vhcr->errno, err);
vhcr_cmd->status = mlx4_errno_to_status(err);
goto out_status;
}
@@ -1568,7 +1724,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
/* If we failed to write back the outbox after the
*command was successfully executed, we must fail this
* slave, as it is now in undefined state */
- mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
+ if (!(dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
goto out;
}
}
@@ -1847,8 +2005,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
break;
case MLX4_COMM_CMD_VHCR_POST:
if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
- (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
+ (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
+ mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
+ slave, cmd, slave_state[slave].last_cmd);
goto reset_slave;
+ }
mutex_lock(&priv->cmd.slave_cmd_mutex);
if (mlx4_master_process_vhcr(dev, slave, NULL)) {
@@ -1882,7 +2043,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
reset_slave:
/* cleanup any slave resources */
- mlx4_delete_all_resources_for_slave(dev, slave);
+ if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_delete_all_resources_for_slave(dev, slave);
+
+ if (cmd != MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
+ slave, cmd);
+ /* Turn on internal error letting slave reset itself immeditaly,
+ * otherwise it might take till timeout on command is passed
+ */
+ reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
+ }
+
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (!slave_state[slave].is_slave_going_down)
slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
@@ -1958,17 +2130,28 @@ void mlx4_master_comm_channel(struct work_struct *work)
static int sync_toggles(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- int wr_toggle;
- int rd_toggle;
+ u32 wr_toggle;
+ u32 rd_toggle;
unsigned long end;
- wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
- end = jiffies + msecs_to_jiffies(5000);
+ wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
+ if (wr_toggle == 0xffffffff)
+ end = jiffies + msecs_to_jiffies(30000);
+ else
+ end = jiffies + msecs_to_jiffies(5000);
while (time_before(jiffies, end)) {
- rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
- if (rd_toggle == wr_toggle) {
- priv->cmd.comm_toggle = rd_toggle;
+ rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
+ if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
+ /* PCI might be offline */
+ msleep(100);
+ wr_toggle = swab32(readl(&priv->mfunc.comm->
+ slave_write));
+ continue;
+ }
+
+ if (rd_toggle >> 31 == wr_toggle >> 31) {
+ priv->cmd.comm_toggle = rd_toggle >> 31;
return 0;
}
@@ -1997,11 +2180,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (mlx4_is_master(dev))
priv->mfunc.comm =
- ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
+ ioremap(pci_resource_start(dev->persist->pdev,
+ priv->fw.comm_bar) +
priv->fw.comm_base, MLX4_COMM_PAGESIZE);
else
priv->mfunc.comm =
- ioremap(pci_resource_start(dev->pdev, 2) +
+ ioremap(pci_resource_start(dev->persist->pdev, 2) +
MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
if (!priv->mfunc.comm) {
mlx4_err(dev, "Couldn't map communication vector\n");
@@ -2073,13 +2257,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (mlx4_init_resource_tracker(dev))
goto err_thread;
- err = mlx4_ARM_COMM_CHANNEL(dev);
- if (err) {
- mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
- err);
- goto err_resource;
- }
-
} else {
err = sync_toggles(dev);
if (err) {
@@ -2089,8 +2266,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
}
return 0;
-err_resource:
- mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
err_thread:
flush_workqueue(priv->mfunc.master.comm_wq);
destroy_workqueue(priv->mfunc.master.comm_wq);
@@ -2107,9 +2282,9 @@ err_comm_admin:
err_comm:
iounmap(priv->mfunc.comm);
err_vhcr:
- dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
- priv->mfunc.vhcr,
- priv->mfunc.vhcr_dma);
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ priv->mfunc.vhcr,
+ priv->mfunc.vhcr_dma);
priv->mfunc.vhcr = NULL;
return -ENOMEM;
}
@@ -2120,7 +2295,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
int flags = 0;
if (!priv->cmd.initialized) {
- mutex_init(&priv->cmd.hcr_mutex);
mutex_init(&priv->cmd.slave_cmd_mutex);
sema_init(&priv->cmd.poll_sem, 1);
priv->cmd.use_events = 0;
@@ -2130,8 +2304,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
}
if (!mlx4_is_slave(dev) && !priv->cmd.hcr) {
- priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
- MLX4_HCR_BASE, MLX4_HCR_SIZE);
+ priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev,
+ 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE);
if (!priv->cmd.hcr) {
mlx4_err(dev, "Couldn't map command register\n");
goto err;
@@ -2140,7 +2314,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
}
if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) {
- priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev,
+ PAGE_SIZE,
&priv->mfunc.vhcr_dma,
GFP_KERNEL);
if (!priv->mfunc.vhcr)
@@ -2150,7 +2325,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
}
if (!priv->cmd.pool) {
- priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
+ priv->cmd.pool = pci_pool_create("mlx4_cmd",
+ dev->persist->pdev,
MLX4_MAILBOX_SIZE,
MLX4_MAILBOX_SIZE, 0);
if (!priv->cmd.pool)
@@ -2166,6 +2342,27 @@ err:
return -ENOMEM;
}
+void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int slave;
+ u32 slave_read;
+
+ /* Report an internal error event to all
+ * communication channels.
+ */
+ for (slave = 0; slave < dev->num_slaves; slave++) {
+ slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
+ slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
+ __raw_writel((__force u32)cpu_to_be32(slave_read),
+ &priv->mfunc.comm[slave].slave_read);
+ /* Make sure that our comm channel write doesn't
+ * get mixed in with writes from another CPU.
+ */
+ mmiowb();
+ }
+}
+
void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2181,6 +2378,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
kfree(priv->mfunc.master.slave_state);
kfree(priv->mfunc.master.vf_admin);
kfree(priv->mfunc.master.vf_oper);
+ dev->num_slaves = 0;
}
iounmap(priv->mfunc.comm);
@@ -2202,7 +2400,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask)
}
if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr &&
(cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) {
- dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
priv->mfunc.vhcr = NULL;
}
@@ -2229,6 +2427,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
for (i = 0; i < priv->cmd.max_cmds; ++i) {
priv->cmd.context[i].token = i;
priv->cmd.context[i].next = i + 1;
+ /* To support fatal error flow, initialize all
+ * cmd contexts to allow simulating completions
+ * with complete() at any time.
+ */
+ init_completion(&priv->cmd.context[i].done);
}
priv->cmd.context[priv->cmd.max_cmds - 1].next = -1;
@@ -2306,8 +2509,9 @@ u32 mlx4_comm_get_version(void)
static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
{
- if ((vf < 0) || (vf >= dev->num_vfs)) {
- mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs);
+ if ((vf < 0) || (vf >= dev->persist->num_vfs)) {
+ mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n",
+ vf, dev->persist->num_vfs);
return -EINVAL;
}
@@ -2316,7 +2520,7 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave)
{
- if (slave < 1 || slave > dev->num_vfs) {
+ if (slave < 1 || slave > dev->persist->num_vfs) {
mlx4_err(dev,
"Bad slave number:%d (number of activated slaves: %lu)\n",
slave, dev->num_slaves);
@@ -2325,6 +2529,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave)
return slave - 1;
}
+void mlx4_cmd_wake_completions(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_context *context;
+ int i;
+
+ spin_lock(&priv->cmd.context_lock);
+ if (priv->cmd.context) {
+ for (i = 0; i < priv->cmd.max_cmds; ++i) {
+ context = &priv->cmd.context[i];
+ context->fw_status = CMD_STAT_INTERNAL_ERR;
+ context->result =
+ mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ complete(&context->done);
+ }
+ }
+ spin_unlock(&priv->cmd.context_lock);
+}
+
struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave)
{
struct mlx4_active_ports actv_ports;
@@ -2388,7 +2611,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev,
if (port <= 0 || port > dev->caps.num_ports)
return slaves_pport;
- for (i = 0; i < dev->num_vfs + 1; i++) {
+ for (i = 0; i < dev->persist->num_vfs + 1; i++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev, i);
if (test_bit(port - 1, actv_ports.ports))
@@ -2408,7 +2631,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv(
bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX);
- for (i = 0; i < dev->num_vfs + 1; i++) {
+ for (i = 0; i < dev->persist->num_vfs + 1; i++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev, i);
if (bitmap_equal(crit_ports->ports, actv_ports.ports,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 82322b1c8411..22da4d0d0f05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
/* Allocate HW buffers on provided NUMA node.
* dev->numa_node is used in mtt range allocation flow.
*/
- set_dev_node(&mdev->dev->pdev->dev, node);
+ set_dev_node(&mdev->dev->persist->pdev->dev, node);
err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
cq->buf_size, 2 * PAGE_SIZE);
- set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
+ set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
if (err)
goto err_cq;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 90e0f045a6bc..569eda9e83d6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
(u16) (mdev->dev->caps.fw_ver >> 32),
(u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff),
(u16) (mdev->dev->caps.fw_ver & 0xffff));
- strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev),
+ strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev),
sizeof(drvinfo->bus_info));
drvinfo->n_stats = 0;
drvinfo->regdump_len = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 9f16f754137b..c643d2bbb7b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -241,8 +241,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
spin_lock_init(&mdev->uar_lock);
mdev->dev = dev;
- mdev->dma_device = &(dev->pdev->dev);
- mdev->pdev = dev->pdev;
+ mdev->dma_device = &dev->persist->pdev->dev;
+ mdev->pdev = dev->persist->pdev;
mdev->device_up = false;
mdev->LSO_support = !!(dev->caps.flags & (1 << 15));
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d0d6dc1b8e46..43a3f9822f74 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2457,7 +2457,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
netif_set_real_num_tx_queues(dev, prof->tx_ring_num);
netif_set_real_num_rx_queues(dev, prof->rx_ring_num);
- SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev);
+ SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev);
dev->dev_port = port - 1;
/*
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a0474eb94aa3..2ba5d368edce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -387,10 +387,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->rx_info, tmp);
/* Allocate HW buffers on provided NUMA node */
- set_dev_node(&mdev->dev->pdev->dev, node);
+ set_dev_node(&mdev->dev->persist->pdev->dev, node);
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
ring->buf_size, 2 * PAGE_SIZE);
- set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
+ set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
if (err)
goto err_info;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 359bb1286eb5..55f9f5c5344e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
/* Allocate HW buffers on provided NUMA node */
- set_dev_node(&mdev->dev->pdev->dev, node);
+ set_dev_node(&mdev->dev->persist->pdev->dev, node);
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
2 * PAGE_SIZE);
- set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
+ set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
if (err) {
en_err(priv, "Failed allocating hwq resources\n");
goto err_bounce;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 3d275fbaf0eb..2f2e6067426d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -237,7 +237,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
struct mlx4_eqe eqe;
/*don't send if we don't have the that slave */
- if (dev->num_vfs < slave)
+ if (dev->persist->num_vfs < slave)
return 0;
memset(&eqe, 0, sizeof eqe);
@@ -255,7 +255,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
struct mlx4_eqe eqe;
/*don't send if we don't have the that slave */
- if (dev->num_vfs < slave)
+ if (dev->persist->num_vfs < slave)
return 0;
memset(&eqe, 0, sizeof eqe);
@@ -310,7 +310,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev,
port);
- for (i = 0; i < dev->num_vfs + 1; i++)
+ for (i = 0; i < dev->persist->num_vfs + 1; i++)
if (test_bit(i, slaves_pport.slaves))
set_and_calc_slave_port_state(dev, i, port,
event, &gen_event);
@@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
i);
-
- mlx4_delete_all_resources_for_slave(dev, i);
+ /* In case of 'Reset flow' FLR can be generated for
+ * a slave before mlx4_load_one is done.
+ * make sure interface is up before trying to delete
+ * slave resources which weren't allocated yet.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_UP)
+ mlx4_delete_all_resources_for_slave(dev, i);
/*return the slave to running mode*/
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
@@ -560,7 +566,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
mlx4_priv(dev)->sense.do_sense_port[port] = 1;
if (!mlx4_is_master(dev))
break;
- for (i = 0; i < dev->num_vfs + 1; i++) {
+ for (i = 0; i < dev->persist->num_vfs + 1;
+ i++) {
if (!test_bit(i, slaves_port.slaves))
continue;
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
@@ -596,7 +603,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
if (!mlx4_is_master(dev))
break;
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
- for (i = 0; i < dev->num_vfs + 1; i++) {
+ for (i = 0;
+ i < dev->persist->num_vfs + 1;
+ i++) {
if (!test_bit(i, slaves_port.slaves))
continue;
if (i == mlx4_master_func_num(dev))
@@ -865,7 +874,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
if (!priv->eq_table.uar_map[index]) {
priv->eq_table.uar_map[index] =
- ioremap(pci_resource_start(dev->pdev, 2) +
+ ioremap(pci_resource_start(dev->persist->pdev, 2) +
((eq->eqn / 4) << PAGE_SHIFT),
PAGE_SIZE);
if (!priv->eq_table.uar_map[index]) {
@@ -928,8 +937,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq_context = mailbox->buf;
for (i = 0; i < npages; ++i) {
- eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
- PAGE_SIZE, &t, GFP_KERNEL);
+ eq->page_list[i].buf = dma_alloc_coherent(&dev->persist->
+ pdev->dev,
+ PAGE_SIZE, &t,
+ GFP_KERNEL);
if (!eq->page_list[i].buf)
goto err_out_free_pages;
@@ -995,7 +1006,7 @@ err_out_free_eq:
err_out_free_pages:
for (i = 0; i < npages; ++i)
if (eq->page_list[i].buf)
- dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
@@ -1044,9 +1055,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
- dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
- eq->page_list[i].buf,
- eq->page_list[i].map);
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ eq->page_list[i].map);
kfree(eq->page_list);
mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
@@ -1060,7 +1071,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
int i, vec;
if (eq_table->have_irq)
- free_irq(dev->pdev->irq, dev);
+ free_irq(dev->persist->pdev->irq, dev);
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
if (eq_table->eq[i].have_irq) {
@@ -1089,7 +1100,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
+ priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev,
+ priv->fw.clr_int_bar) +
priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
if (!priv->clr_base) {
mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n");
@@ -1212,13 +1224,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
i * MLX4_IRQNAME_SIZE,
MLX4_IRQNAME_SIZE,
"mlx4-comp-%d@pci:%s", i,
- pci_name(dev->pdev));
+ pci_name(dev->persist->pdev));
} else {
snprintf(priv->eq_table.irq_names +
i * MLX4_IRQNAME_SIZE,
MLX4_IRQNAME_SIZE,
"mlx4-async@pci:%s",
- pci_name(dev->pdev));
+ pci_name(dev->persist->pdev));
}
eq_name = priv->eq_table.irq_names +
@@ -1235,8 +1247,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
snprintf(priv->eq_table.irq_names,
MLX4_IRQNAME_SIZE,
DRV_NAME "@pci:%s",
- pci_name(dev->pdev));
- err = request_irq(dev->pdev->irq, mlx4_interrupt,
+ pci_name(dev->persist->pdev));
+ err = request_irq(dev->persist->pdev->irq, mlx4_interrupt,
IRQF_SHARED, priv->eq_table.irq_names, dev);
if (err)
goto err_out_async;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 97c9b1db1d27..2a9dd460a95f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
int i;
if (chunk->nsg > 0)
- pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+ pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages,
PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
@@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
int i;
for (i = 0; i < chunk->npages; ++i)
- dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+ dma_free_coherent(&dev->persist->pdev->dev,
+ chunk->mem[i].length,
lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}
@@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
--cur_order;
if (coherent)
- ret = mlx4_alloc_icm_coherent(&dev->pdev->dev,
+ ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
&chunk->mem[chunk->npages],
cur_order, gfp_mask);
else
@@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
if (coherent)
++chunk->nsg;
else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
- chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
@@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
}
if (!coherent && chunk) {
- chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 116895ac8b35..68d2bad325d5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -138,13 +138,13 @@ int mlx4_register_device(struct mlx4_dev *dev)
mutex_lock(&intf_mutex);
+ dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP;
list_add_tail(&priv->dev_list, &dev_list);
list_for_each_entry(intf, &intf_list, list)
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
- if (!mlx4_is_slave(dev))
- mlx4_start_catas_poll(dev);
+ mlx4_start_catas_poll(dev);
return 0;
}
@@ -154,14 +154,14 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
- if (!mlx4_is_slave(dev))
- mlx4_stop_catas_poll(dev);
+ mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx4_remove_device(intf, priv);
list_del(&priv->dev_list);
+ dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
mutex_unlock(&intf_mutex);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 03e9eb0dc761..9c7ef0bffb52 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe,
MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
MLX4_FUNC_CAP_DMFS_A0_STATIC)
+#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV)
+
static char mlx4_version[] =
DRV_NAME ": Mellanox ConnectX core driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -318,10 +320,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
return -ENODEV;
}
- if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
+ if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
dev_cap->uar_size,
- (unsigned long long) pci_resource_len(dev->pdev, 2));
+ (unsigned long long)
+ pci_resource_len(dev->persist->pdev, 2));
return -ENODEV;
}
@@ -541,8 +544,10 @@ static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
*speed = PCI_SPEED_UNKNOWN;
*width = PCIE_LNK_WIDTH_UNKNOWN;
- err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1);
- err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2);
+ err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
+ &lnkcap1);
+ err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
+ &lnkcap2);
if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
*speed = PCIE_SPEED_8_0GT;
@@ -587,7 +592,7 @@ static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
return;
}
- err = pcie_get_minimum_link(dev->pdev, &speed, &width);
+ err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
if (err || speed == PCI_SPEED_UNKNOWN ||
width == PCIE_LNK_WIDTH_UNKNOWN) {
mlx4_warn(dev,
@@ -837,10 +842,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
if (dev->caps.uar_page_size * (dev->caps.num_uars -
dev->caps.reserved_uars) >
- pci_resource_len(dev->pdev, 2)) {
+ pci_resource_len(dev->persist->pdev,
+ 2)) {
mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
dev->caps.uar_page_size * dev->caps.num_uars,
- (unsigned long long) pci_resource_len(dev->pdev, 2));
+ (unsigned long long)
+ pci_resource_len(dev->persist->pdev, 2));
goto err_mem;
}
@@ -1477,7 +1484,8 @@ static void mlx4_slave_exit(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
mutex_lock(&priv->cmd.slave_cmd_mutex);
- if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
+ MLX4_COMM_TIME))
mlx4_warn(dev, "Failed to close slave function\n");
mutex_unlock(&priv->cmd.slave_cmd_mutex);
}
@@ -1492,9 +1500,9 @@ static int map_bf_area(struct mlx4_dev *dev)
if (!dev->caps.bf_reg_size)
return -ENXIO;
- bf_start = pci_resource_start(dev->pdev, 2) +
+ bf_start = pci_resource_start(dev->persist->pdev, 2) +
(dev->caps.num_uars << PAGE_SHIFT);
- bf_len = pci_resource_len(dev->pdev, 2) -
+ bf_len = pci_resource_len(dev->persist->pdev, 2) -
(dev->caps.num_uars << PAGE_SHIFT);
priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
if (!priv->bf_mapping)
@@ -1536,7 +1544,8 @@ static int map_internal_clock(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
priv->clock_mapping =
- ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) +
+ ioremap(pci_resource_start(dev->persist->pdev,
+ priv->fw.clock_bar) +
priv->fw.clock_offset, MLX4_CLOCK_SIZE);
if (!priv->clock_mapping)
@@ -1573,6 +1582,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev)
}
}
+static int mlx4_comm_check_offline(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_OFFLINE_OFFSET 0x09
+
+ u32 comm_flags;
+ u32 offline_bit;
+ unsigned long end;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
+ while (time_before(jiffies, end)) {
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ offline_bit = (comm_flags &
+ (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
+ if (!offline_bit)
+ return 0;
+ /* There are cases as part of AER/Reset flow that PF needs
+ * around 100 msec to load. We therefore sleep for 100 msec
+ * to allow other tasks to make use of that CPU during this
+ * time interval.
+ */
+ msleep(100);
+ }
+ mlx4_err(dev, "Communication channel is offline.\n");
+ return -EIO;
+}
+
+static void mlx4_reset_vf_support(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_RST_OFFSET 0x1e
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 comm_rst;
+ u32 comm_caps;
+
+ comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_CAPS));
+ comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
+
+ if (comm_rst)
+ dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
+}
+
static int mlx4_init_slave(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1588,9 +1641,15 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
mutex_lock(&priv->cmd.slave_cmd_mutex);
priv->cmd.max_cmds = 1;
+ if (mlx4_comm_check_offline(dev)) {
+ mlx4_err(dev, "PF is not responsive, skipping initialization\n");
+ goto err_offline;
+ }
+
+ mlx4_reset_vf_support(dev);
mlx4_warn(dev, "Sending reset\n");
ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
- MLX4_COMM_TIME);
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
/* if we are in the middle of flr the slave will try
* NUM_OF_RESET_RETRIES times before leaving.*/
if (ret_from_reset) {
@@ -1615,22 +1674,24 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
mlx4_warn(dev, "Sending vhcr0\n");
if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
- MLX4_COMM_TIME))
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
goto err;
if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
- MLX4_COMM_TIME))
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
goto err;
if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
- MLX4_COMM_TIME))
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
goto err;
- if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
goto err;
mutex_unlock(&priv->cmd.slave_cmd_mutex);
return 0;
err:
- mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
+err_offline:
mutex_unlock(&priv->cmd.slave_cmd_mutex);
return -EIO;
}
@@ -1705,7 +1766,8 @@ static void choose_steering_mode(struct mlx4_dev *dev,
if (mlx4_log_num_mgm_entry_size <= 0 &&
dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
(!mlx4_is_mfunc(dev) ||
- (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) &&
+ (dev_cap->fs_max_num_qp_per_entry >=
+ (dev->persist->num_vfs + 1))) &&
choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
dev->oper_log_mgm_entry_size =
@@ -2288,7 +2350,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
for (i = 0; i < nreq; ++i)
entries[i].entry = i;
- nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq);
+ nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
+ nreq);
if (nreq < 0) {
kfree(entries);
@@ -2316,7 +2379,7 @@ no_msi:
dev->caps.comp_pool = 0;
for (i = 0; i < 2; ++i)
- priv->eq_table.eq[i].irq = dev->pdev->irq;
+ priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
}
static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
@@ -2344,7 +2407,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
info->port_attr.show = show_port_type;
sysfs_attr_init(&info->port_attr.attr);
- err = device_create_file(&dev->pdev->dev, &info->port_attr);
+ err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
if (err) {
mlx4_err(dev, "Failed to create file for port %d\n", port);
info->port = -1;
@@ -2361,10 +2424,12 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
info->port_mtu_attr.show = show_port_ib_mtu;
sysfs_attr_init(&info->port_mtu_attr.attr);
- err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
+ err = device_create_file(&dev->persist->pdev->dev,
+ &info->port_mtu_attr);
if (err) {
mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
- device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ device_remove_file(&info->dev->persist->pdev->dev,
+ &info->port_attr);
info->port = -1;
}
@@ -2376,8 +2441,9 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
if (info->port < 0)
return;
- device_remove_file(&info->dev->pdev->dev, &info->port_attr);
- device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
+ device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
+ device_remove_file(&info->dev->persist->pdev->dev,
+ &info->port_mtu_attr);
}
static int mlx4_init_steering(struct mlx4_dev *dev)
@@ -2444,10 +2510,11 @@ static int mlx4_get_ownership(struct mlx4_dev *dev)
void __iomem *owner;
u32 ret;
- if (pci_channel_offline(dev->pdev))
+ if (pci_channel_offline(dev->persist->pdev))
return -EIO;
- owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+ MLX4_OWNER_BASE,
MLX4_OWNER_SIZE);
if (!owner) {
mlx4_err(dev, "Failed to obtain ownership bit\n");
@@ -2463,10 +2530,11 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
{
void __iomem *owner;
- if (pci_channel_offline(dev->pdev))
+ if (pci_channel_offline(dev->persist->pdev))
return;
- owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+ MLX4_OWNER_BASE,
MLX4_OWNER_SIZE);
if (!owner) {
mlx4_err(dev, "Failed to obtain ownership bit\n");
@@ -2481,11 +2549,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
!!((flags) & MLX4_FLAG_MASTER))
static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
- u8 total_vfs, int existing_vfs)
+ u8 total_vfs, int existing_vfs, int reset_flow)
{
u64 dev_flags = dev->flags;
int err = 0;
+ if (reset_flow) {
+ dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
+ GFP_KERNEL);
+ if (!dev->dev_vfs)
+ goto free_mem;
+ return dev_flags;
+ }
+
atomic_inc(&pf_loading);
if (dev->flags & MLX4_FLAG_SRIOV) {
if (existing_vfs != total_vfs) {
@@ -2514,13 +2590,14 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
dev_flags |= MLX4_FLAG_SRIOV |
MLX4_FLAG_MASTER;
dev_flags &= ~MLX4_FLAG_SLAVE;
- dev->num_vfs = total_vfs;
+ dev->persist->num_vfs = total_vfs;
}
return dev_flags;
disable_sriov:
atomic_dec(&pf_loading);
- dev->num_vfs = 0;
+free_mem:
+ dev->persist->num_vfs = 0;
kfree(dev->dev_vfs);
return dev_flags & ~MLX4_FLAG_MASTER;
}
@@ -2544,7 +2621,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap
}
static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
- int total_vfs, int *nvfs, struct mlx4_priv *priv)
+ int total_vfs, int *nvfs, struct mlx4_priv *priv,
+ int reset_flow)
{
struct mlx4_dev *dev;
unsigned sum = 0;
@@ -2607,10 +2685,15 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
existing_vfs = pci_num_vf(pdev);
if (existing_vfs)
dev->flags |= MLX4_FLAG_SRIOV;
- dev->num_vfs = total_vfs;
+ dev->persist->num_vfs = total_vfs;
}
}
+ /* on load remove any previous indication of internal error,
+ * device is up.
+ */
+ dev->persist->state = MLX4_DEVICE_STATE_UP;
+
slave_start:
err = mlx4_cmd_init(dev);
if (err) {
@@ -2661,8 +2744,10 @@ slave_start:
goto err_fw;
if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
- u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
- existing_vfs);
+ u64 dev_flags = mlx4_enable_sriov(dev, pdev,
+ total_vfs,
+ existing_vfs,
+ reset_flow);
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
dev->flags = dev_flags;
@@ -2704,7 +2789,7 @@ slave_start:
if (dev->flags & MLX4_FLAG_SRIOV) {
if (!existing_vfs)
pci_disable_sriov(pdev);
- if (mlx4_is_master(dev))
+ if (mlx4_is_master(dev) && !reset_flow)
atomic_dec(&pf_loading);
dev->flags &= ~MLX4_FLAG_SRIOV;
}
@@ -2718,7 +2803,8 @@ slave_start:
}
if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
- u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
+ u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+ existing_vfs, reset_flow);
if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
@@ -2771,12 +2857,14 @@ slave_start:
dev->caps.num_ports);
goto err_close;
}
- memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs));
+ memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
- for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) {
+ for (i = 0;
+ i < sizeof(dev->persist->nvfs)/
+ sizeof(dev->persist->nvfs[0]); i++) {
unsigned j;
- for (j = 0; j < dev->nvfs[i]; ++sum, ++j) {
+ for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
dev->caps.num_ports;
@@ -2828,6 +2916,17 @@ slave_start:
goto err_steer;
mlx4_init_quotas(dev);
+ /* When PF resources are ready arm its comm channel to enable
+ * getting commands
+ */
+ if (mlx4_is_master(dev)) {
+ err = mlx4_ARM_COMM_CHANNEL(dev);
+ if (err) {
+ mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+ err);
+ goto err_steer;
+ }
+ }
for (port = 1; port <= dev->caps.num_ports; port++) {
err = mlx4_init_port_info(dev, port);
@@ -2846,7 +2945,7 @@ slave_start:
priv->removed = 0;
- if (mlx4_is_master(dev) && dev->num_vfs)
+ if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
atomic_dec(&pf_loading);
kfree(dev_cap);
@@ -2905,10 +3004,12 @@ err_cmd:
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
err_sriov:
- if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
+ if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
pci_disable_sriov(pdev);
+ dev->flags &= ~MLX4_FLAG_SRIOV;
+ }
- if (mlx4_is_master(dev) && dev->num_vfs)
+ if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
atomic_dec(&pf_loading);
kfree(priv->dev.dev_vfs);
@@ -3049,11 +3150,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
}
}
- err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+ err = mlx4_catas_init(&priv->dev);
if (err)
goto err_release_regions;
+
+ err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
+ if (err)
+ goto err_catas;
+
return 0;
+err_catas:
+ mlx4_catas_end(&priv->dev);
+
err_release_regions:
pci_release_regions(pdev);
@@ -3076,38 +3185,60 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
dev = &priv->dev;
- dev->pdev = pdev;
- pci_set_drvdata(pdev, dev);
+ dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
+ if (!dev->persist) {
+ kfree(priv);
+ return -ENOMEM;
+ }
+ dev->persist->pdev = pdev;
+ dev->persist->dev = dev;
+ pci_set_drvdata(pdev, dev->persist);
priv->pci_dev_data = id->driver_data;
+ mutex_init(&dev->persist->device_state_mutex);
+ mutex_init(&dev->persist->interface_state_mutex);
ret = __mlx4_init_one(pdev, id->driver_data, priv);
- if (ret)
+ if (ret) {
+ kfree(dev->persist);
kfree(priv);
+ } else {
+ pci_save_state(pdev);
+ }
return ret;
}
+static void mlx4_clean_dev(struct mlx4_dev *dev)
+{
+ struct mlx4_dev_persistent *persist = dev->persist;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
+
+ memset(priv, 0, sizeof(*priv));
+ priv->dev.persist = persist;
+ priv->dev.flags = flags;
+}
+
static void mlx4_unload_one(struct pci_dev *pdev)
{
- struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
struct mlx4_priv *priv = mlx4_priv(dev);
int pci_dev_data;
- int p;
- int active_vfs = 0;
+ int p, i;
if (priv->removed)
return;
+ /* saving current ports type for further use */
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
+ dev->persist->curr_port_poss_type[i] = dev->caps.
+ possible_type[i + 1];
+ }
+
pci_dev_data = priv->pci_dev_data;
- /* Disabling SR-IOV is not allowed while there are active vf's */
- if (mlx4_is_master(dev)) {
- active_vfs = mlx4_how_many_lives_vf(dev);
- if (active_vfs) {
- pr_warn("Removing PF when there are active VF's !!\n");
- pr_warn("Will not disable SR-IOV.\n");
- }
- }
mlx4_stop_sense(dev);
mlx4_unregister_device(dev);
@@ -3151,12 +3282,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
- if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
- mlx4_warn(dev, "Disabling SR-IOV\n");
- pci_disable_sriov(pdev);
- dev->flags &= ~MLX4_FLAG_SRIOV;
- dev->num_vfs = 0;
- }
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
@@ -3168,42 +3293,96 @@ static void mlx4_unload_one(struct pci_dev *pdev)
kfree(dev->caps.qp1_proxy);
kfree(dev->dev_vfs);
- memset(priv, 0, sizeof(*priv));
+ mlx4_clean_dev(dev);
priv->pci_dev_data = pci_dev_data;
priv->removed = 1;
}
static void mlx4_remove_one(struct pci_dev *pdev)
{
- struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
struct mlx4_priv *priv = mlx4_priv(dev);
+ int active_vfs = 0;
+
+ mutex_lock(&persist->interface_state_mutex);
+ persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
+ mutex_unlock(&persist->interface_state_mutex);
+
+ /* Disabling SR-IOV is not allowed while there are active vf's */
+ if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
+ active_vfs = mlx4_how_many_lives_vf(dev);
+ if (active_vfs) {
+ pr_warn("Removing PF when there are active VF's !!\n");
+ pr_warn("Will not disable SR-IOV.\n");
+ }
+ }
+
+ /* device marked to be under deletion running now without the lock
+ * letting other tasks to be terminated
+ */
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+ else
+ mlx4_info(dev, "%s: interface is down\n", __func__);
+ mlx4_catas_end(dev);
+ if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ }
- mlx4_unload_one(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
+ kfree(dev->persist);
kfree(priv);
pci_set_drvdata(pdev, NULL);
}
+static int restore_current_port_types(struct mlx4_dev *dev,
+ enum mlx4_port_type *types,
+ enum mlx4_port_type *poss_types)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err, i;
+
+ mlx4_stop_sense(dev);
+
+ mutex_lock(&priv->port_mutex);
+ for (i = 0; i < dev->caps.num_ports; i++)
+ dev->caps.possible_type[i + 1] = poss_types[i];
+ err = mlx4_change_port_types(dev, types);
+ mlx4_start_sense(dev);
+ mutex_unlock(&priv->port_mutex);
+
+ return err;
+}
+
int mlx4_restart_one(struct pci_dev *pdev)
{
- struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
struct mlx4_priv *priv = mlx4_priv(dev);
int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
int pci_dev_data, err, total_vfs;
pci_dev_data = priv->pci_dev_data;
- total_vfs = dev->num_vfs;
- memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs));
+ total_vfs = dev->persist->num_vfs;
+ memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
mlx4_unload_one(pdev);
- err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+ err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
if (err) {
mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
__func__, pci_name(pdev), err);
return err;
}
+ err = restore_current_port_types(dev, dev->persist->curr_port_type,
+ dev->persist->curr_port_poss_type);
+ if (err)
+ mlx4_err(dev, "could not restore original port types (%d)\n",
+ err);
+
return err;
}
@@ -3258,23 +3437,79 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
- mlx4_unload_one(pdev);
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+
+ mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
+ mlx4_enter_error_state(persist);
- return state == pci_channel_io_perm_failure ?
- PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+
+ mutex_unlock(&persist->interface_state_mutex);
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ pci_disable_device(pdev);
+ return PCI_ERS_RESULT_NEED_RESET;
}
static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
{
- struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
struct mlx4_priv *priv = mlx4_priv(dev);
int ret;
+ int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+ int total_vfs;
+
+ mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ total_vfs = dev->persist->num_vfs;
+ memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
+
+ mutex_lock(&persist->interface_state_mutex);
+ if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
+ ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
+ priv, 1);
+ if (ret) {
+ mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
+ __func__, ret);
+ goto end;
+ }
- ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv);
+ ret = restore_current_port_types(dev, dev->persist->
+ curr_port_type, dev->persist->
+ curr_port_poss_type);
+ if (ret)
+ mlx4_err(dev, "could not restore original port types (%d)\n", ret);
+ }
+end:
+ mutex_unlock(&persist->interface_state_mutex);
return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
+static void mlx4_shutdown(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+
+ mlx4_info(persist->dev, "mlx4_shutdown was called\n");
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+ mutex_unlock(&persist->interface_state_mutex);
+}
+
static const struct pci_error_handlers mlx4_err_handler = {
.error_detected = mlx4_pci_err_detected,
.slot_reset = mlx4_pci_slot_reset,
@@ -3284,7 +3519,7 @@ static struct pci_driver mlx4_driver = {
.name = DRV_NAME,
.id_table = mlx4_pci_table,
.probe = mlx4_init_one,
- .shutdown = mlx4_unload_one,
+ .shutdown = mlx4_shutdown,
.remove = mlx4_remove_one,
.err_handler = &mlx4_err_handler,
};
@@ -3336,7 +3571,6 @@ static int __init mlx4_init(void)
if (mlx4_verify_params())
return -EINVAL;
- mlx4_catas_init();
mlx4_wq = create_singlethread_workqueue("mlx4");
if (!mlx4_wq)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index a3867e7ef885..bd9ea0d01aae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -1318,6 +1318,9 @@ out:
mutex_unlock(&priv->mcg_table.mutex);
mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ /* In case device is under an error, return success as a closing command */
+ err = 0;
return err;
}
@@ -1347,6 +1350,9 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp,
MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err && !attach &&
+ dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ err = 0;
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index bdd4eea2247c..096a81c16a9b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -85,7 +85,9 @@ enum {
MLX4_CLR_INT_SIZE = 0x00008,
MLX4_SLAVE_COMM_BASE = 0x0,
MLX4_COMM_PAGESIZE = 0x1000,
- MLX4_CLOCK_SIZE = 0x00008
+ MLX4_CLOCK_SIZE = 0x00008,
+ MLX4_COMM_CHAN_CAPS = 0x8,
+ MLX4_COMM_CHAN_FLAGS = 0xc
};
enum {
@@ -120,6 +122,10 @@ enum mlx4_mpt_state {
};
#define MLX4_COMM_TIME 10000
+#define MLX4_COMM_OFFLINE_TIME_OUT 30000
+#define MLX4_COMM_CMD_NA_OP 0x0
+
+
enum {
MLX4_COMM_CMD_RESET,
MLX4_COMM_CMD_VHCR0,
@@ -221,19 +227,21 @@ extern int mlx4_debug_level;
#define mlx4_dbg(mdev, format, ...) \
do { \
if (mlx4_debug_level) \
- dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format, \
+ dev_printk(KERN_DEBUG, \
+ &(mdev)->persist->pdev->dev, format, \
##__VA_ARGS__); \
} while (0)
#define mlx4_err(mdev, format, ...) \
- dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+ dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
#define mlx4_info(mdev, format, ...) \
- dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+ dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
#define mlx4_warn(mdev, format, ...) \
- dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+ dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
extern int mlx4_log_num_mgm_entry_size;
extern int log_mtts_per_seg;
+extern int mlx4_internal_err_reset;
#define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
#define ALL_SLAVES 0xff
@@ -606,7 +614,6 @@ struct mlx4_mgm {
struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
- struct mutex hcr_mutex;
struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
@@ -994,7 +1001,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
void mlx4_start_catas_poll(struct mlx4_dev *dev);
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
-void mlx4_catas_init(void);
+int mlx4_catas_init(struct mlx4_dev *dev);
+void mlx4_catas_end(struct mlx4_dev *dev);
int mlx4_restart_one(struct pci_dev *pdev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
@@ -1160,13 +1168,14 @@ enum {
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
int mlx4_multi_func_init(struct mlx4_dev *dev);
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev);
void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev);
void mlx4_cmd_use_polling(struct mlx4_dev *dev);
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
- unsigned long timeout);
+ u16 op, unsigned long timeout);
void mlx4_cq_tasklet_cb(unsigned long data);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
@@ -1176,7 +1185,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
-void mlx4_handle_catas_err(struct mlx4_dev *dev);
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
enum mlx4_port_type *type);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 7094a9c70fd5..8dbdf1d29357 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -708,13 +708,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
if (!mtts)
return -ENOMEM;
- dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+ dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle,
npages * sizeof (u64), DMA_TO_DEVICE);
for (i = 0; i < npages; ++i)
mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
- dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+ dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle,
npages * sizeof (u64), DMA_TO_DEVICE);
return 0;
@@ -1020,13 +1020,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list
/* Make sure MPT status is visible before writing MTT entries */
wmb();
- dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
+ dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
for (i = 0; i < npages; ++i)
fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
- dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
+ dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
fmr->mpt->key = cpu_to_be32(key);
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 74216071201f..a42b4c0a9ed9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
return -ENOMEM;
if (mlx4_is_slave(dev))
- offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
+ offset = uar->index % ((int)pci_resource_len(dev->persist->pdev,
+ 2) /
dev->caps.uar_page_size);
else
offset = uar->index;
- uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
+ uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT)
+ + offset;
uar->map = NULL;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 30eb1ead0fe6..9f268f05290a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port)
slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
dev, &exclusive_ports);
slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
- dev->num_vfs + 1);
+ dev->persist->num_vfs + 1);
}
- vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
+ vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs))
return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1;
return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs;
@@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port)
slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
dev, &exclusive_ports);
slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
- dev->num_vfs + 1);
+ dev->persist->num_vfs + 1);
}
gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
- vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
+ vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
if (slave_gid <= gids % vfs)
return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1);
@@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave)
int num_eth_ports, err;
int i;
- if (slave < 0 || slave > dev->num_vfs)
+ if (slave < 0 || slave > dev->persist->num_vfs)
return;
actv_ports = mlx4_get_active_ports(dev, slave);
@@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
return -EINVAL;
slaves_pport = mlx4_phys_to_slaves_pport(dev, port);
- num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
+ num_vfs = bitmap_weight(slaves_pport.slaves,
+ dev->persist->num_vfs + 1) - 1;
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid,
@@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
dev, &exclusive_ports);
num_vfs_before += bitmap_weight(
slaves_pport_actv.slaves,
- dev->num_vfs + 1);
+ dev->persist->num_vfs + 1);
}
/* candidate_slave_gid isn't necessarily the correct slave, but
@@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
dev, &exclusive_ports);
slave_gid += bitmap_weight(
slaves_pport_actv.slaves,
- dev->num_vfs + 1);
+ dev->persist->num_vfs + 1);
}
}
*slave_id = slave_gid;
diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c
index ea1c6d092145..0076d88587ca 100644
--- a/drivers/net/ethernet/mellanox/mlx4/reset.c
+++ b/drivers/net/ethernet/mellanox/mlx4/reset.c
@@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev)
goto out;
}
- pcie_cap = pci_pcie_cap(dev->pdev);
+ pcie_cap = pci_pcie_cap(dev->persist->pdev);
for (i = 0; i < 64; ++i) {
if (i == 22 || i == 23)
continue;
- if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) {
+ if (pci_read_config_dword(dev->persist->pdev, i * 4,
+ hca_header + i)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n");
goto out;
}
}
- reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE,
+ reset = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+ MLX4_RESET_BASE,
MLX4_RESET_SIZE);
if (!reset) {
err = -ENOMEM;
@@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev)
end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
do {
- if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) &&
- vendor != 0xffff)
+ if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID,
+ &vendor) && vendor != 0xffff)
break;
msleep(1);
@@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev)
/* Now restore the PCI headers */
if (pcie_cap) {
devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
- if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL,
+ if (pcie_capability_write_word(dev->persist->pdev,
+ PCI_EXP_DEVCTL,
devctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n");
goto out;
}
linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
- if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL,
+ if (pcie_capability_write_word(dev->persist->pdev,
+ PCI_EXP_LNKCTL,
linkctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n");
@@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev)
if (i * 4 == PCI_COMMAND)
continue;
- if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) {
+ if (pci_write_config_dword(dev->persist->pdev, i * 4,
+ hca_header[i])) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n",
i);
@@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev)
}
}
- if (pci_write_config_dword(dev->pdev, PCI_COMMAND,
+ if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND,
hca_header[PCI_COMMAND / 4])) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 4efbd1eca611..3e93879bccce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
int allocated, free, reserved, guaranteed, from_free;
int from_rsvd;
- if (slave > dev->num_vfs)
+ if (slave > dev->persist->num_vfs)
return -EINVAL;
spin_lock(&res_alloc->alloc_lock);
allocated = (port > 0) ?
- res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] :
res_alloc->allocated[slave];
free = (port > 0) ? res_alloc->res_port_free[port - 1] :
res_alloc->res_free;
@@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
if (!err) {
/* grant the request */
if (port > 0) {
- res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] += count;
res_alloc->res_port_free[port - 1] -= count;
res_alloc->res_port_rsvd[port - 1] -= from_rsvd;
} else {
@@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
&priv->mfunc.master.res_tracker.res_alloc[res_type];
int allocated, guaranteed, from_rsvd;
- if (slave > dev->num_vfs)
+ if (slave > dev->persist->num_vfs)
return;
spin_lock(&res_alloc->alloc_lock);
allocated = (port > 0) ?
- res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] :
res_alloc->allocated[slave];
guaranteed = res_alloc->guaranteed[slave];
@@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
}
if (port > 0) {
- res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] -= count;
res_alloc->res_port_free[port - 1] += count;
res_alloc->res_port_rsvd[port - 1] += from_rsvd;
} else {
@@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev,
enum mlx4_resource res_type,
int vf, int num_instances)
{
- res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
+ res_alloc->guaranteed[vf] = num_instances /
+ (2 * (dev->persist->num_vfs + 1));
res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
if (vf == mlx4_master_func_num(dev)) {
res_alloc->res_free = num_instances;
@@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
struct resource_allocator *res_alloc =
&priv->mfunc.master.res_tracker.res_alloc[i];
- res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
- res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+ res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) *
+ sizeof(int), GFP_KERNEL);
+ res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) *
+ sizeof(int), GFP_KERNEL);
if (i == RES_MAC || i == RES_VLAN)
res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
- (dev->num_vfs + 1) * sizeof(int),
- GFP_KERNEL);
+ (dev->persist->num_vfs
+ + 1) *
+ sizeof(int), GFP_KERNEL);
else
- res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+ res_alloc->allocated = kzalloc((dev->persist->
+ num_vfs + 1) *
+ sizeof(int), GFP_KERNEL);
if (!res_alloc->quota || !res_alloc->guaranteed ||
!res_alloc->allocated)
goto no_mem_err;
spin_lock_init(&res_alloc->alloc_lock);
- for (t = 0; t < dev->num_vfs + 1; t++) {
+ for (t = 0; t < dev->persist->num_vfs + 1; t++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev, t);
switch (i) {
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 64d25941b329..c989442ffc6a 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -279,6 +279,8 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
struct mlx4_config_dev_params *params);
+void mlx4_cmd_wake_completions(struct mlx4_dev *dev);
+void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev);
/*
* mlx4_get_slave_default_vlan -
* return true if VST ( default vlan)
@@ -288,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
u16 *vlan, u8 *qos);
#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
+#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17)
#endif /* MLX4_CMD_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index f1e41b33462f..5ef54e145e4d 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -208,6 +208,10 @@ enum {
MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1
};
+enum {
+ MLX4_VF_CAP_FLAG_RESET = 1 << 0
+};
+
/* bit enums for an 8-bit flags field indicating special use
* QPs which require special handling in qp_reserve_range.
* Currently, this only includes QPs used by the ETH interface,
@@ -411,6 +415,16 @@ enum {
MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4,
};
+enum {
+ MLX4_DEVICE_STATE_UP = 1 << 0,
+ MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1,
+};
+
+enum {
+ MLX4_INTERFACE_STATE_UP = 1 << 0,
+ MLX4_INTERFACE_STATE_DELETION = 1 << 1,
+};
+
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
@@ -535,6 +549,7 @@ struct mlx4_caps {
u8 alloc_res_qp_mask;
u32 dmfs_high_rate_qpn_base;
u32 dmfs_high_rate_qpn_range;
+ u32 vf_caps;
};
struct mlx4_buf_list {
@@ -744,8 +759,23 @@ struct mlx4_vf_dev {
u8 n_ports;
};
-struct mlx4_dev {
+struct mlx4_dev_persistent {
struct pci_dev *pdev;
+ struct mlx4_dev *dev;
+ int nvfs[MLX4_MAX_PORTS + 1];
+ int num_vfs;
+ enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
+ enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
+ struct work_struct catas_work;
+ struct workqueue_struct *catas_wq;
+ struct mutex device_state_mutex; /* protect HW state */
+ u8 state;
+ struct mutex interface_state_mutex; /* protect SW state */
+ u8 interface_state;
+};
+
+struct mlx4_dev {
+ struct mlx4_dev_persistent *persist;
unsigned long flags;
unsigned long num_slaves;
struct mlx4_caps caps;
@@ -754,13 +784,11 @@ struct mlx4_dev {
struct radix_tree_root qp_table_tree;
u8 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
- int num_vfs;
int numa_node;
int oper_log_mgm_entry_size;
u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
struct mlx4_vf_dev *dev_vfs;
- int nvfs[MLX4_MAX_PORTS + 1];
};
struct mlx4_eqe {