summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/catas.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c30
3 files changed, 93 insertions, 11 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 915e947b422d..9c656fe4983d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -69,16 +69,21 @@ static void poll_catas(unsigned long dev_ptr)
struct mlx4_priv *priv = mlx4_priv(dev);
if (readl(priv->catas_err.map)) {
- dump_err_buf(dev);
-
- mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+ /* If the device is off-line, we cannot try to recover it */
+ if (pci_channel_offline(dev->pdev))
+ mod_timer(&priv->catas_err.timer,
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+ else {
+ dump_err_buf(dev);
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
- if (internal_err_reset) {
- spin_lock(&catas_lock);
- list_add(&priv->catas_err.list, &catas_list);
- spin_unlock(&catas_lock);
+ if (internal_err_reset) {
+ spin_lock(&catas_lock);
+ list_add(&priv->catas_err.list, &catas_list);
+ spin_unlock(&catas_lock);
- queue_work(mlx4_wq, &catas_work);
+ queue_work(mlx4_wq, &catas_work);
+ }
}
} else
mod_timer(&priv->catas_err.timer,
@@ -100,6 +105,10 @@ static void catas_reset(struct work_struct *work)
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
struct pci_dev *pdev = priv->dev.pdev;
+ /* If the device is off-line, we cannot reset it */
+ if (pci_channel_offline(pdev))
+ continue;
+
ret = mlx4_restart_one(priv->dev.pdev);
/* 'priv' now is not valid */
if (ret)
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 7e94987d030c..c8fef4353021 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -296,7 +296,12 @@ int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
static int cmd_pending(struct mlx4_dev *dev)
{
- u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+ u32 status;
+
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
return (status & swab32(1 << HCR_GO_BIT)) ||
(mlx4_priv(dev)->cmd.toggle ==
@@ -314,11 +319,29 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
mutex_lock(&cmd->hcr_mutex);
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ ret = -EIO;
+ goto out;
+ }
+
end = jiffies;
if (event)
end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
while (cmd_pending(dev)) {
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ ret = -EIO;
+ goto out;
+ }
+
if (time_after_eq(jiffies, end)) {
mlx4_err(dev, "%s:cmd_pending failed\n", __func__);
goto out;
@@ -431,14 +454,33 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
down(&priv->cmd.poll_sem);
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out;
+ }
+
err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
if (err)
goto out;
end = msecs_to_jiffies(timeout) + jiffies;
- while (cmd_pending(dev) && time_before(jiffies, end))
+ while (cmd_pending(dev) && time_before(jiffies, end)) {
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out;
+ }
+
cond_resched();
+ }
if (cmd_pending(dev)) {
err = -ETIMEDOUT;
@@ -532,6 +574,9 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout, int native)
{
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
if (mlx4_priv(dev)->cmd.use_events)
return mlx4_cmd_wait(dev, in_param, out_param,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 42645166bae2..e717091734d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1775,6 +1775,9 @@ static int mlx4_get_ownership(struct mlx4_dev *dev)
void __iomem *owner;
u32 ret;
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
MLX4_OWNER_SIZE);
if (!owner) {
@@ -1791,6 +1794,9 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
{
void __iomem *owner;
+ if (pci_channel_offline(dev->pdev))
+ return;
+
owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
MLX4_OWNER_SIZE);
if (!owner) {
@@ -2237,11 +2243,33 @@ static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
+static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ mlx4_remove_one(pdev);
+
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
+{
+ int ret = __mlx4_init_one(pdev, NULL);
+
+ return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+static struct pci_error_handlers mlx4_err_handler = {
+ .error_detected = mlx4_pci_err_detected,
+ .slot_reset = mlx4_pci_slot_reset,
+};
+
static struct pci_driver mlx4_driver = {
.name = DRV_NAME,
.id_table = mlx4_pci_table,
.probe = mlx4_init_one,
- .remove = __devexit_p(mlx4_remove_one)
+ .remove = __devexit_p(mlx4_remove_one),
+ .err_handler = &mlx4_err_handler,
};
static int __init mlx4_verify_params(void)