From 872bf2fb69d90e3619befee842fc26db39d8e475 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:35 +0200 Subject: net/mlx4_core: Maintain a persistent memory for mlx4 device Maintain a persistent memory that should survive reset flow/PCI error. This comes as a preparation for coming series to support above flows. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f1e41b33462f..1069ce65e8b4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -744,8 +744,15 @@ struct mlx4_vf_dev { u8 n_ports; }; -struct mlx4_dev { +struct mlx4_dev_persistent { struct pci_dev *pdev; + struct mlx4_dev *dev; + int nvfs[MLX4_MAX_PORTS + 1]; + int num_vfs; +}; + +struct mlx4_dev { + struct mlx4_dev_persistent *persist; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; @@ -754,13 +761,11 @@ struct mlx4_dev { struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; - int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; struct mlx4_vf_dev *dev_vfs; - int nvfs[MLX4_MAX_PORTS + 1]; }; struct mlx4_eqe { -- cgit v1.2.3 From dd0eefe3abbf47442db296bf68f27eb2860c1cdf Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:36 +0200 Subject: net/mlx4_core: Set device configuration data to be persistent across reset When an HCA enters an internal error state, this is detected by the driver. The driver then should reset the HCA and restart the software stack. Keep ports information and some SRIOV configuration in a persistent area to have it valid across reset. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 45 +++++++++++++++++++++++++++++-- include/linux/mlx4/device.h | 2 ++ 2 files changed, 45 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index abcee61f8a47..2c5a555dff89 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3109,18 +3109,34 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } +static void mlx4_clean_dev(struct mlx4_dev *dev) +{ + struct mlx4_dev_persistent *persist = dev->persist; + struct mlx4_priv *priv = mlx4_priv(dev); + + memset(priv, 0, sizeof(*priv)); + priv->dev.persist = persist; +} + static void mlx4_unload_one(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; - int p; + int p, i; int active_vfs = 0; if (priv->removed) return; + /* saving current ports type for further use */ + for (i = 0; i < dev->caps.num_ports; i++) { + dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; + dev->persist->curr_port_poss_type[i] = dev->caps. + possible_type[i + 1]; + } + pci_dev_data = priv->pci_dev_data; /* Disabling SR-IOV is not allowed while there are active vf's */ @@ -3191,7 +3207,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) kfree(dev->caps.qp1_proxy); kfree(dev->dev_vfs); - memset(priv, 0, sizeof(*priv)); + mlx4_clean_dev(dev); priv->pci_dev_data = pci_dev_data; priv->removed = 1; } @@ -3210,6 +3226,25 @@ static void mlx4_remove_one(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); } +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + + return err; +} + int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); @@ -3230,6 +3265,12 @@ int mlx4_restart_one(struct pci_dev *pdev) return err; } + err = restore_current_port_types(dev, dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", + err); + return err; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1069ce65e8b4..8c3837ac1a2d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -749,6 +749,8 @@ struct mlx4_dev_persistent { struct mlx4_dev *dev; int nvfs[MLX4_MAX_PORTS + 1]; int num_vfs; + enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; + enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; }; struct mlx4_dev { -- cgit v1.2.3 From ad9a0bf08ffbf32b8f292c3bb78ca0f24bb8f6b2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:37 +0200 Subject: net/mlx4_core: Refactor the catas flow to work per device Using a WQ per device instead of a single global WQ, this allows independent reset handling per device even when SRIOV is used. This comes as a pre-patch for supporting chip reset for both native and SRIOV. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 75 ++++++++++++++---------------- drivers/net/ethernet/mellanox/mlx4/main.c | 12 ++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 +- include/linux/mlx4/device.h | 2 + 4 files changed, 48 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 1a102c9bac99..5bb9aa6e281d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -40,10 +40,7 @@ enum { MLX4_CATAS_POLL_INTERVAL = 5 * HZ, }; -static DEFINE_SPINLOCK(catas_lock); -static LIST_HEAD(catas_list); -static struct work_struct catas_work; static int internal_err_reset = 1; module_param(internal_err_reset, int, 0644); @@ -77,13 +74,9 @@ static void poll_catas(unsigned long dev_ptr) dump_err_buf(dev); mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - if (internal_err_reset) { - spin_lock(&catas_lock); - list_add(&priv->catas_err.list, &catas_list); - spin_unlock(&catas_lock); - - queue_work(mlx4_wq, &catas_work); - } + if (internal_err_reset) + queue_work(dev->persist->catas_wq, + &dev->persist->catas_work); } } else mod_timer(&priv->catas_err.timer, @@ -92,34 +85,23 @@ static void poll_catas(unsigned long dev_ptr) static void catas_reset(struct work_struct *work) { - struct mlx4_priv *priv, *tmppriv; - struct mlx4_dev *dev; - struct mlx4_dev_persistent *persist; - - LIST_HEAD(tlist); + struct mlx4_dev_persistent *persist = + container_of(work, struct mlx4_dev_persistent, + catas_work); + struct pci_dev *pdev = persist->pdev; int ret; - spin_lock_irq(&catas_lock); - list_splice_init(&catas_list, &tlist); - spin_unlock_irq(&catas_lock); - - list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { - struct pci_dev *pdev = priv->dev.persist->pdev; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - continue; + /* If the device is off-line, we cannot reset it */ + if (pci_channel_offline(pdev)) + return; - ret = mlx4_restart_one(priv->dev.persist->pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else { - persist = pci_get_drvdata(pdev); - mlx4_dbg(persist->dev, "Reset succeeded\n"); - } - } + ret = mlx4_restart_one(pdev); + /* 'priv' now is not valid */ + if (ret) + pr_err("mlx4 %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else + mlx4_dbg(persist->dev, "Reset succeeded\n"); } void mlx4_start_catas_poll(struct mlx4_dev *dev) @@ -158,15 +140,26 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) del_timer_sync(&priv->catas_err.timer); - if (priv->catas_err.map) + if (priv->catas_err.map) { iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } +} - spin_lock_irq(&catas_lock); - list_del(&priv->catas_err.list); - spin_unlock_irq(&catas_lock); +int mlx4_catas_init(struct mlx4_dev *dev) +{ + INIT_WORK(&dev->persist->catas_work, catas_reset); + dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health"); + if (!dev->persist->catas_wq) + return -ENOMEM; + + return 0; } -void __init mlx4_catas_init(void) +void mlx4_catas_end(struct mlx4_dev *dev) { - INIT_WORK(&catas_work, catas_reset); + if (dev->persist->catas_wq) { + destroy_workqueue(dev->persist->catas_wq); + dev->persist->catas_wq = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2c5a555dff89..a61694cc1476 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3064,11 +3064,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, } } - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_catas_init(&priv->dev); if (err) goto err_release_regions; + + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + if (err) + goto err_catas; + return 0; +err_catas: + mlx4_catas_end(&priv->dev); + err_release_regions: pci_release_regions(pdev); @@ -3219,6 +3227,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_priv *priv = mlx4_priv(dev); mlx4_unload_one(pdev); + mlx4_catas_end(dev); pci_release_regions(pdev); pci_disable_device(pdev); kfree(dev->persist); @@ -3403,7 +3412,6 @@ static int __init mlx4_init(void) if (mlx4_verify_params()) return -EINVAL; - mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index faa37ab75a9d..d41af84f965b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -995,7 +995,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); -void mlx4_catas_init(void); +int mlx4_catas_init(struct mlx4_dev *dev); +void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8c3837ac1a2d..da425d2f3708 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -751,6 +751,8 @@ struct mlx4_dev_persistent { int num_vfs; enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; + struct work_struct catas_work; + struct workqueue_struct *catas_wq; }; struct mlx4_dev { -- cgit v1.2.3 From f6bc11e42646e661e699a5593cbd1e9dba7191d0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:38 +0200 Subject: net/mlx4_core: Enhance the catas flow to support device reset This includes: - resetting the chip when a fatal error is detected (the current code does not do this). - exposing the ability to enter error state from outside the catas code by calling its functionality. (E.g. FW Command timeout, AER error). - managing a persistent device state. This is needed to sync between reset flow cases. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 122 ++++++++++++++++++++++------- drivers/net/ethernet/mellanox/mlx4/main.c | 6 ++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- include/linux/mlx4/device.h | 7 ++ 4 files changed, 108 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 5bb9aa6e281d..588d6b5e1211 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -48,6 +48,83 @@ MODULE_PARM_DESC(internal_err_reset, "Reset device on internal errors if non-zero" " (default 1, in SRIOV mode default is 0)"); +static int read_vendor_id(struct mlx4_dev *dev) +{ + u16 vendor_id = 0; + int ret; + + ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id); + if (ret) { + mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret); + return ret; + } + + if (vendor_id == 0xffff) { + mlx4_err(dev, "PCI can't be accessed to read vendor id\n"); + return -EINVAL; + } + + return 0; +} + +static int mlx4_reset_master(struct mlx4_dev *dev) +{ + int err = 0; + + if (!pci_channel_offline(dev->persist->pdev)) { + err = read_vendor_id(dev); + /* If PCI can't be accessed to read vendor ID we assume that its + * link was disabled and chip was already reset. + */ + if (err) + return 0; + + err = mlx4_reset(dev); + if (err) + mlx4_err(dev, "Fail to reset HCA\n"); + } + + return err; +} + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) +{ + int err; + struct mlx4_dev *dev; + + if (!internal_err_reset) + return; + + mutex_lock(&persist->device_state_mutex); + if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + dev = persist->dev; + mlx4_err(dev, "device is going to be reset\n"); + err = mlx4_reset_master(dev); + BUG_ON(err != 0); + + dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; + mlx4_err(dev, "device was reset successfully\n"); + mutex_unlock(&persist->device_state_mutex); + + /* At that step HW was already reset, now notify clients */ + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + return; + +out: + mutex_unlock(&persist->device_state_mutex); +} + +static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) +{ + int err = 0; + + mlx4_enter_error_state(persist); + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err); +} + static void dump_err_buf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -66,21 +143,22 @@ static void poll_catas(unsigned long dev_ptr) struct mlx4_priv *priv = mlx4_priv(dev); if (readl(priv->catas_err.map)) { - /* If the device is off-line, we cannot try to recover it */ - if (pci_channel_offline(dev->persist->pdev)) - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); - else { - dump_err_buf(dev); - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - - if (internal_err_reset) - queue_work(dev->persist->catas_wq, - &dev->persist->catas_work); - } - } else - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + dump_err_buf(dev); + goto internal_err; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_warn(dev, "Internal error mark was detected on device\n"); + goto internal_err; + } + + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + return; + +internal_err: + if (internal_err_reset) + queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } static void catas_reset(struct work_struct *work) @@ -88,20 +166,8 @@ static void catas_reset(struct work_struct *work) struct mlx4_dev_persistent *persist = container_of(work, struct mlx4_dev_persistent, catas_work); - struct pci_dev *pdev = persist->pdev; - int ret; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - return; - ret = mlx4_restart_one(pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else - mlx4_dbg(persist->dev, "Reset succeeded\n"); + mlx4_handle_error_state(persist); } void mlx4_start_catas_poll(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a61694cc1476..dc2d910fcc88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2624,6 +2624,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } } + /* on load remove any previous indication of internal error, + * device is up. + */ + dev->persist->state = MLX4_DEVICE_STATE_UP; + slave_start: err = mlx4_cmd_init(dev); if (err) { @@ -3108,6 +3113,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev->persist->dev = dev; pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; + mutex_init(&dev->persist->device_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d41af84f965b..aa1ecbc5a606 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1178,7 +1178,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); -void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index da425d2f3708..7d5d317cb7a6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -411,6 +411,11 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +enum { + MLX4_DEVICE_STATE_UP = 1 << 0, + MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -753,6 +758,8 @@ struct mlx4_dev_persistent { enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; struct work_struct catas_work; struct workqueue_struct *catas_wq; + struct mutex device_state_mutex; /* protect HW state */ + u8 state; }; struct mlx4_dev { -- cgit v1.2.3 From f5aef5aa35063f2b45c3605871cd525d0cb7fb7a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:39 +0200 Subject: net/mlx4_core: Activate reset flow upon fatal command cases We activate reset flow upon command fatal errors, when the device enters an erroneous state, and must be reset. The cases below are assumed to be fatal: FW command timed-out, an error from FW on closing commands, pci is offline when posting/pending a command. In those cases we place the device into an error state: chip is reset, pending commands are awakened and completed immediately. Subsequent commands will return immediately. The return code in the above cases will depend on the command. Commands which free and close resources will return success (because the chip was reset, so callers may safely free their kernel resources). Other commands will return -EIO. Since the device's state was marked as error, the catas poller will detect this and restart the device's software stack (as is done when a FW internal error is directly detected). The device state is protected by a persistent mutex lives on its mlx4_dev, as such no need any more for the hcr_mutex which is removed. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 11 +- drivers/net/ethernet/mellanox/mlx4/cmd.c | 163 +++++++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx4/mcg.c | 3 + drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- include/linux/mlx4/cmd.h | 1 + 5 files changed, 153 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 588d6b5e1211..63f14ffcc906 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -42,8 +42,8 @@ enum { -static int internal_err_reset = 1; -module_param(internal_err_reset, int, 0644); +int mlx4_internal_err_reset = 1; +module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, "Reset device on internal errors if non-zero" " (default 1, in SRIOV mode default is 0)"); @@ -92,7 +92,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) int err; struct mlx4_dev *dev; - if (!internal_err_reset) + if (!mlx4_internal_err_reset) return; mutex_lock(&persist->device_state_mutex); @@ -110,6 +110,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) /* At that step HW was already reset, now notify clients */ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_cmd_wake_completions(dev); return; out: @@ -157,7 +158,7 @@ static void poll_catas(unsigned long dev_ptr) return; internal_err: - if (internal_err_reset) + if (mlx4_internal_err_reset) queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } @@ -177,7 +178,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) /*If we are in SRIOV the default of the module param must be 0*/ if (mlx4_is_mfunc(dev)) - internal_err_reset = 0; + mlx4_internal_err_reset = 0; INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 7cd90e6a4272..3895b2b5fc92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -182,6 +182,72 @@ static u8 mlx4_errno_to_status(int errno) } } +static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op, + u8 op_modifier) +{ + switch (op) { + case MLX4_CMD_UNMAP_ICM: + case MLX4_CMD_UNMAP_ICM_AUX: + case MLX4_CMD_UNMAP_FA: + case MLX4_CMD_2RST_QP: + case MLX4_CMD_HW2SW_EQ: + case MLX4_CMD_HW2SW_CQ: + case MLX4_CMD_HW2SW_SRQ: + case MLX4_CMD_HW2SW_MPT: + case MLX4_CMD_CLOSE_HCA: + case MLX4_QP_FLOW_STEERING_DETACH: + case MLX4_CMD_FREE_RES: + case MLX4_CMD_CLOSE_PORT: + return CMD_STAT_OK; + + case MLX4_CMD_QP_ATTACH: + /* On Detach case return success */ + if (op_modifier == 0) + return CMD_STAT_OK; + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + default: + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } +} + +static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status) +{ + /* Any error during the closing commands below is considered fatal */ + if (op == MLX4_CMD_CLOSE_HCA || + op == MLX4_CMD_HW2SW_EQ || + op == MLX4_CMD_HW2SW_CQ || + op == MLX4_CMD_2RST_QP || + op == MLX4_CMD_HW2SW_SRQ || + op == MLX4_CMD_SYNC_TPT || + op == MLX4_CMD_UNMAP_ICM || + op == MLX4_CMD_UNMAP_ICM_AUX || + op == MLX4_CMD_UNMAP_FA) + return 1; + /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals + * CMD_STAT_REG_BOUND. + * This status indicates that memory region has memory windows bound to it + * which may result from invalid user space usage and is not fatal. + */ + if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND) + return 1; + return 0; +} + +static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier, + int err) +{ + /* Only if reset flow is really active return code is based on + * command, otherwise current error code is returned. + */ + if (mlx4_internal_err_reset) { + mlx4_enter_error_state(dev->persist); + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + } + + return err; +} + static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -258,7 +324,7 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, cmd->free_head = context->next; spin_unlock(&cmd->context_lock); - init_completion(&context->done); + reinit_completion(&context->done); mlx4_comm_cmd_post(dev, op, param); @@ -323,17 +389,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; - int ret = -EAGAIN; + int ret = -EIO; unsigned long end; - mutex_lock(&cmd->hcr_mutex); - - if (pci_channel_offline(dev->persist->pdev)) { + mutex_lock(&dev->persist->device_state_mutex); + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the chip was reset, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + if (pci_channel_offline(dev->persist->pdev) || + (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -347,7 +417,6 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -391,7 +460,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, ret = 0; out: - mutex_unlock(&cmd->hcr_mutex); + if (ret) + mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n", + op, ret, in_param, in_modifier, op_modifier); + mutex_unlock(&dev->persist->device_state_mutex); + return ret; } @@ -464,12 +537,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); - if (pci_channel_offline(dev->persist->pdev)) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { /* * Device is going through error recovery * and cannot accept commands. */ - err = -EIO; + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -483,7 +556,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) - goto out; + goto out_reset; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { @@ -493,6 +566,11 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, * and cannot accept commands. */ err = -EIO; + goto out_reset; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -502,8 +580,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (cmd_pending(dev)) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -ETIMEDOUT; - goto out; + err = -EIO; + goto out_reset; } if (out_is_imm) @@ -515,10 +593,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); - if (err) + if (err) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); + if (mlx4_closing_cmd_fatal_error(op, stat)) + goto out_reset; + goto out; + } +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: up(&priv->cmd.poll_sem); return err; @@ -565,17 +650,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, goto out; } - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, - in_modifier, op_modifier, op, context->token, 1); + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + if (err) + goto out_reset; if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EBUSY; - goto out; + err = -EIO; + goto out_reset; } err = context->result; @@ -592,12 +679,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, else mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + else if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + goto out; } if (out_is_imm) *out_param = context->out_param; +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; @@ -613,9 +708,12 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, u16 op, unsigned long timeout, int native) { if (pci_channel_offline(dev->persist->pdev)) - return -EIO; + return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_internal_err_ret_value(dev, op, + op_modifier); if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, @@ -2121,7 +2219,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { - mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2232,6 +2329,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; + /* To support fatal error flow, initialize all + * cmd contexts to allow simulating completions + * with complete() at any time. + */ + init_completion(&priv->cmd.context[i].done); } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; @@ -2329,6 +2431,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) return slave - 1; } +void mlx4_cmd_wake_completions(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context; + int i; + + spin_lock(&priv->cmd.context_lock); + if (priv->cmd.context) { + for (i = 0; i < priv->cmd.max_cmds; ++i) { + context = &priv->cmd.context[i]; + context->fw_status = CMD_STAT_INTERNAL_ERR; + context->result = + mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + complete(&context->done); + } + } + spin_unlock(&priv->cmd.context_lock); +} + struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) { struct mlx4_active_ports actv_ports; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index a3867e7ef885..d22d9283d2cd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1318,6 +1318,9 @@ out: mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + /* In case device is under an error, return success as a closing command */ + err = 0; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index aa1ecbc5a606..5c772ea4473b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -235,6 +235,7 @@ do { \ extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; +extern int mlx4_internal_err_reset; #define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) #define ALL_SLAVES 0xff @@ -607,7 +608,6 @@ struct mlx4_mgm { struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; - struct mutex hcr_mutex; struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 64d25941b329..e7543844cc7a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -279,6 +279,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); +void mlx4_cmd_wake_completions(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) -- cgit v1.2.3 From c69453e294c9f16da977b68e658a8028b854c209 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:40 +0200 Subject: net/mlx4_core: Manage interface state for Reset flow cases We need to manage interface state to sync between reset flow and some other relative cases such as remove_one. This has to be done to prevent certain races. For example in case software stack is down as a result of unload call, the remove_one should skip the unload phase. Implement the remove_one case, handling AER and other cases comes next. The interface can be up/down, upon remove_one, the state will include an extra bit indicating that the device is cleaned-up, forcing other tasks to finish before the final cleanup. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 13 +++++++++++-- drivers/net/ethernet/mellanox/mlx4/intf.c | 2 ++ drivers/net/ethernet/mellanox/mlx4/main.c | 13 ++++++++++++- include/linux/mlx4/device.h | 7 +++++++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 63f14ffcc906..3fcf3cfaedfc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -122,8 +122,14 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) int err = 0; mlx4_enter_error_state(persist); - err = mlx4_restart_one(persist->pdev); - mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP && + !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", + err); + } + mutex_unlock(&persist->interface_state_mutex); } static void dump_err_buf(struct mlx4_dev *dev) @@ -211,6 +217,9 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) iounmap(priv->catas_err.map); priv->catas_err.map = NULL; } + + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION) + flush_workqueue(dev->persist->catas_wq); } int mlx4_catas_init(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 116895ac8b35..fba0b96a6f28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -138,6 +138,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mutex_lock(&intf_mutex); + dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; list_add_tail(&priv->dev_list, &dev_list); list_for_each_entry(intf, &intf_list, list) mlx4_add_device(intf, priv); @@ -162,6 +163,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) mlx4_remove_device(intf, priv); list_del(&priv->dev_list); + dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; mutex_unlock(&intf_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index dc2d910fcc88..d59cae5da3f0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3114,6 +3114,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; mutex_init(&dev->persist->device_state_mutex); + mutex_init(&dev->persist->interface_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); if (ret) { @@ -3232,7 +3233,17 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); - mlx4_unload_one(pdev); + mutex_lock(&persist->interface_state_mutex); + persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; + mutex_unlock(&persist->interface_state_mutex); + + /* device marked to be under deletion running now without the lock + * letting other tasks to be terminated + */ + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + else + mlx4_info(dev, "%s: interface is down\n", __func__); mlx4_catas_end(dev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7d5d317cb7a6..33f9ca71925c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -416,6 +416,11 @@ enum { MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, }; +enum { + MLX4_INTERFACE_STATE_UP = 1 << 0, + MLX4_INTERFACE_STATE_DELETION = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -760,6 +765,8 @@ struct mlx4_dev_persistent { struct workqueue_struct *catas_wq; struct mutex device_state_mutex; /* protect HW state */ u8 state; + struct mutex interface_state_mutex; /* protect SW state */ + u8 interface_state; }; struct mlx4_dev { -- cgit v1.2.3 From 55ad359225b2232b9b8f04a0dfa169bd3a7d86d2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:42 +0200 Subject: net/mlx4_core: Enable device recovery flow with SRIOV In SRIOV, both the PF and the VF may attempt device recovery whenever they assume that the device is not functioning. When the PF driver resets the device, the VF should detect this and attempt to reinitialize itself. The VF must be able to reset itself under all circumstances, even if the PF is not responsive. The VF shall reset itself in the following cases: 1. Commands are not processed within reasonable time over the communication channel. This is done considering device state and the correct return code based on the command as was done in the native mode, done in the next patch. 2. The VF driver receives an internal error event reported by the PF on the communication channel. This occurs when the PF driver resets the device or when VF is out of sync with the PF. Add 'VF reset' capability, which allows the VF to reinitialize itself even when the PF is not responsive. As PF and VF may run their reset flow simulantanisly, there are several cases that are handled: - Prevent freeing VF resources upon FLR, when PF is in its unloading stage. - Prevent PF getting VF commands before it has finished initializing its resources. - Upon VF startup, check that comm-channel is online before sending commands to the PF and getting timed-out. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 120 +++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx4/cmd.c | 77 ++++++++++++---- drivers/net/ethernet/mellanox/mlx4/eq.c | 10 ++- drivers/net/ethernet/mellanox/mlx4/intf.c | 6 +- drivers/net/ethernet/mellanox/mlx4/main.c | 135 +++++++++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 7 +- include/linux/mlx4/cmd.h | 2 + include/linux/mlx4/device.h | 5 ++ 8 files changed, 292 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 3fcf3cfaedfc..715de8affcc9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -45,8 +45,7 @@ enum { int mlx4_internal_err_reset = 1; module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, - "Reset device on internal errors if non-zero" - " (default 1, in SRIOV mode default is 0)"); + "Reset device on internal errors if non-zero (default 1)"); static int read_vendor_id(struct mlx4_dev *dev) { @@ -71,6 +70,9 @@ static int mlx4_reset_master(struct mlx4_dev *dev) { int err = 0; + if (mlx4_is_master(dev)) + mlx4_report_internal_err_comm_event(dev); + if (!pci_channel_offline(dev->persist->pdev)) { err = read_vendor_id(dev); /* If PCI can't be accessed to read vendor ID we assume that its @@ -87,6 +89,81 @@ static int mlx4_reset_master(struct mlx4_dev *dev) return err; } +static int mlx4_reset_slave(struct mlx4_dev *dev) +{ +#define COM_CHAN_RST_REQ_OFFSET 0x10 +#define COM_CHAN_RST_ACK_OFFSET 0x08 + + u32 comm_flags; + u32 rst_req; + u32 rst_ack; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (pci_channel_offline(dev->persist->pdev)) + return 0; + + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + if (comm_flags == 0xffffffff) { + mlx4_err(dev, "VF reset is not needed\n"); + return 0; + } + + if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) { + mlx4_err(dev, "VF reset is not supported\n"); + return -EOPNOTSUPP; + } + + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + if (rst_req != rst_ack) { + mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n"); + return -EIO; + } + + rst_req ^= 1; + mlx4_warn(dev, "VF is sending reset request to Firmware\n"); + comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET; + __raw_writel((__force u32)cpu_to_be32(comm_flags), + (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + + end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + + /* Reading rst_req again since the communication channel can + * be reset at any time by the PF and all its bits will be + * set to zero. + */ + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + + if (rst_ack == rst_req) { + mlx4_warn(dev, "VF Reset succeed\n"); + return 0; + } + cond_resched(); + } + mlx4_err(dev, "Fail to send reset over the communication channel\n"); + return -ETIMEDOUT; +} + +static int mlx4_comm_internal_err(u32 slave_read) +{ + return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == + (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; +} + void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) { int err; @@ -101,7 +178,10 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) dev = persist->dev; mlx4_err(dev, "device is going to be reset\n"); - err = mlx4_reset_master(dev); + if (mlx4_is_slave(dev)) + err = mlx4_reset_slave(dev); + else + err = mlx4_reset_master(dev); BUG_ON(err != 0); dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; @@ -148,8 +228,15 @@ static void poll_catas(unsigned long dev_ptr) { struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; struct mlx4_priv *priv = mlx4_priv(dev); - - if (readl(priv->catas_err.map)) { + u32 slave_read; + + if (mlx4_is_slave(dev)) { + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + if (mlx4_comm_internal_err(slave_read)) { + mlx4_warn(dev, "Internal error detected on the communication channel\n"); + goto internal_err; + } + } else if (readl(priv->catas_err.map)) { dump_err_buf(dev); goto internal_err; } @@ -182,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); phys_addr_t addr; - /*If we are in SRIOV the default of the module param must be 0*/ - if (mlx4_is_mfunc(dev)) - mlx4_internal_err_reset = 0; - INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); priv->catas_err.map = NULL; - addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) + - priv->fw.catas_offset; - - priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) { - mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", - (unsigned long long) addr); - return; + if (!mlx4_is_slave(dev)) { + addr = pci_resource_start(dev->persist->pdev, + priv->fw.catas_bar) + + priv->fw.catas_offset; + + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long)addr); + return; + } } priv->catas_err.timer.data = (unsigned long) dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3895b2b5fc92..7652eed4bbc8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, EXPORT_SYMBOL_GPL(__mlx4_cmd); -static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && - (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) + (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) { + mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n", + slave, cmd, slave_state[slave].last_cmd); goto reset_slave; + } mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { @@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ - mlx4_delete_all_resources_for_slave(dev, slave); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, slave); + + if (cmd != MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n", + slave, cmd); + /* Turn on internal error letting slave reset itself immeditaly, + * otherwise it might take till timeout on command is passed + */ + reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR); + } + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; @@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work) static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int wr_toggle; - int rd_toggle; + u32 wr_toggle; + u32 rd_toggle; unsigned long end; - wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; - end = jiffies + msecs_to_jiffies(5000); + wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)); + if (wr_toggle == 0xffffffff) + end = jiffies + msecs_to_jiffies(30000); + else + end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { - rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; - if (rd_toggle == wr_toggle) { - priv->cmd.comm_toggle = rd_toggle; + rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); + if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { + /* PCI might be offline */ + msleep(100); + wr_toggle = swab32(readl(&priv->mfunc.comm-> + slave_write)); + continue; + } + + if (rd_toggle >> 31 == wr_toggle >> 31) { + priv->cmd.comm_toggle = rd_toggle >> 31; return 0; } @@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - err = mlx4_ARM_COMM_CHANNEL(dev); - if (err) { - mlx4_err(dev, " Failed to arm comm channel eq: %x\n", - err); - goto err_resource; - } - } else { err = sync_toggles(dev); if (err) { @@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } return 0; -err_resource: - mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); @@ -2266,6 +2283,27 @@ err: return -ENOMEM; } +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int slave; + u32 slave_read; + + /* Report an internal error event to all + * communication channels. + */ + for (slave = 0; slave < dev->num_slaves; slave++) { + slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read)); + slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR; + __raw_writel((__force u32)cpu_to_be32(slave_read), + &priv->mfunc.comm[slave].slave_read); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + } +} + void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); + dev->num_slaves = 0; } iounmap(priv->mfunc.comm); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 7538c9ce98a9..2f2e6067426d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", i); - - mlx4_delete_all_resources_for_slave(dev, i); + /* In case of 'Reset flow' FLR can be generated for + * a slave before mlx4_load_one is done. + * make sure interface is up before trying to delete + * slave resources which weren't allocated yet. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index fba0b96a6f28..68d2bad325d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -144,8 +144,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); - if (!mlx4_is_slave(dev)) - mlx4_start_catas_poll(dev); + mlx4_start_catas_poll(dev); return 0; } @@ -155,8 +154,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; - if (!mlx4_is_slave(dev)) - mlx4_stop_catas_poll(dev); + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 6bb0fca137cd..1baf1f1e2866 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_DMFS_A0_STATIC) +#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) + static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -1579,6 +1581,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev) } } +static int mlx4_comm_check_offline(struct mlx4_dev *dev) +{ +#define COMM_CHAN_OFFLINE_OFFSET 0x09 + + u32 comm_flags; + u32 offline_bit; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + offline_bit = (comm_flags & + (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); + if (!offline_bit) + return 0; + /* There are cases as part of AER/Reset flow that PF needs + * around 100 msec to load. We therefore sleep for 100 msec + * to allow other tasks to make use of that CPU during this + * time interval. + */ + msleep(100); + } + mlx4_err(dev, "Communication channel is offline.\n"); + return -EIO; +} + +static void mlx4_reset_vf_support(struct mlx4_dev *dev) +{ +#define COMM_CHAN_RST_OFFSET 0x1e + + struct mlx4_priv *priv = mlx4_priv(dev); + u32 comm_rst; + u32 comm_caps; + + comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_CAPS)); + comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); + + if (comm_rst) + dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; +} + static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1594,6 +1640,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; + if (mlx4_comm_check_offline(dev)) { + mlx4_err(dev, "PF is not responsive, skipping initialization\n"); + goto err_offline; + } + + mlx4_reset_vf_support(dev); mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); @@ -1637,6 +1689,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); +err_offline: mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } @@ -2494,11 +2547,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) !!((flags) & MLX4_FLAG_MASTER)) static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, - u8 total_vfs, int existing_vfs) + u8 total_vfs, int existing_vfs, int reset_flow) { u64 dev_flags = dev->flags; int err = 0; + if (reset_flow) { + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (!dev->dev_vfs) + goto free_mem; + return dev_flags; + } + atomic_inc(&pf_loading); if (dev->flags & MLX4_FLAG_SRIOV) { if (existing_vfs != total_vfs) { @@ -2533,6 +2594,7 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, disable_sriov: atomic_dec(&pf_loading); +free_mem: dev->persist->num_vfs = 0; kfree(dev->dev_vfs); return dev_flags & ~MLX4_FLAG_MASTER; @@ -2557,7 +2619,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap } static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, - int total_vfs, int *nvfs, struct mlx4_priv *priv) + int total_vfs, int *nvfs, struct mlx4_priv *priv, + int reset_flow) { struct mlx4_dev *dev; unsigned sum = 0; @@ -2679,8 +2742,10 @@ slave_start: goto err_fw; if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, - existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, + total_vfs, + existing_vfs, + reset_flow); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags = dev_flags; @@ -2722,7 +2787,7 @@ slave_start: if (dev->flags & MLX4_FLAG_SRIOV) { if (!existing_vfs) pci_disable_sriov(pdev); - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev) && !reset_flow) atomic_dec(&pf_loading); dev->flags &= ~MLX4_FLAG_SRIOV; } @@ -2736,7 +2801,8 @@ slave_start: } if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs, reset_flow); if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); @@ -2848,6 +2914,17 @@ slave_start: goto err_steer; mlx4_init_quotas(dev); + /* When PF resources are ready arm its comm channel to enable + * getting commands + */ + if (mlx4_is_master(dev)) { + err = mlx4_ARM_COMM_CHANNEL(dev); + if (err) { + mlx4_err(dev, " Failed to arm comm channel eq: %x\n", + err); + goto err_steer; + } + } for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -2866,7 +2943,7 @@ slave_start: priv->removed = 0; - if (mlx4_is_master(dev) && dev->persist->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(dev_cap); @@ -2925,10 +3002,12 @@ err_cmd: mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: - if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) + if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; + } - if (mlx4_is_master(dev) && dev->persist->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(priv->dev.dev_vfs); @@ -3073,7 +3152,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, if (err) goto err_release_regions; - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); if (err) goto err_catas; @@ -3131,9 +3210,11 @@ static void mlx4_clean_dev(struct mlx4_dev *dev) { struct mlx4_dev_persistent *persist = dev->persist; struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); memset(priv, 0, sizeof(*priv)); priv->dev.persist = persist; + priv->dev.flags = flags; } static void mlx4_unload_one(struct pci_dev *pdev) @@ -3143,7 +3224,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; int p, i; - int active_vfs = 0; if (priv->removed) return; @@ -3157,14 +3237,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) pci_dev_data = priv->pci_dev_data; - /* Disabling SR-IOV is not allowed while there are active vf's */ - if (mlx4_is_master(dev)) { - active_vfs = mlx4_how_many_lives_vf(dev); - if (active_vfs) { - pr_warn("Removing PF when there are active VF's !!\n"); - pr_warn("Will not disable SR-IOV.\n"); - } - } mlx4_stop_sense(dev); mlx4_unregister_device(dev); @@ -3208,12 +3280,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); - if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { - mlx4_warn(dev, "Disabling SR-IOV\n"); - pci_disable_sriov(pdev); - dev->flags &= ~MLX4_FLAG_SRIOV; - dev->persist->num_vfs = 0; - } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); @@ -3235,11 +3301,21 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); + int active_vfs = 0; mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); + /* Disabling SR-IOV is not allowed while there are active vf's */ + if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { + active_vfs = mlx4_how_many_lives_vf(dev); + if (active_vfs) { + pr_warn("Removing PF when there are active VF's !!\n"); + pr_warn("Will not disable SR-IOV.\n"); + } + } + /* device marked to be under deletion running now without the lock * letting other tasks to be terminated */ @@ -3248,6 +3324,11 @@ static void mlx4_remove_one(struct pci_dev *pdev) else mlx4_info(dev, "%s: interface is down\n", __func__); mlx4_catas_end(dev); + if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + } + pci_release_regions(pdev); pci_disable_device(pdev); kfree(dev->persist); @@ -3287,7 +3368,7 @@ int mlx4_restart_one(struct pci_dev *pdev) memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mlx4_unload_one(pdev); - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); if (err) { mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", __func__, pci_name(pdev), err); @@ -3397,7 +3478,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) mutex_lock(&persist->interface_state_mutex); if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, - priv); + priv, 1); if (ret) { mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", __func__, ret); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 5c772ea4473b..2a15b8248e77 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -85,7 +85,9 @@ enum { MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, MLX4_COMM_PAGESIZE = 0x1000, - MLX4_CLOCK_SIZE = 0x00008 + MLX4_CLOCK_SIZE = 0x00008, + MLX4_COMM_CHAN_CAPS = 0x8, + MLX4_COMM_CHAN_FLAGS = 0xc }; enum { @@ -120,6 +122,8 @@ enum mlx4_mpt_state { }; #define MLX4_COMM_TIME 10000 +#define MLX4_COMM_OFFLINE_TIME_OUT 30000 + enum { MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_VHCR0, @@ -1162,6 +1166,7 @@ enum { int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); int mlx4_cmd_use_events(struct mlx4_dev *dev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e7543844cc7a..c989442ffc6a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -280,6 +280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); void mlx4_cmd_wake_completions(struct mlx4_dev *dev); +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) @@ -289,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) +#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17) #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 33f9ca71925c..5ef54e145e4d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,10 @@ enum { MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 }; +enum { + MLX4_VF_CAP_FLAG_RESET = 1 << 0 +}; + /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, @@ -545,6 +549,7 @@ struct mlx4_caps { u8 alloc_res_qp_mask; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + u32 vf_caps; }; struct mlx4_buf_list { -- cgit v1.2.3