diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_vlan.c')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 261 |
1 files changed, 150 insertions, 111 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 55a9b71ed05a..341753fbda54 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -50,68 +50,112 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static bool is_child_unique(struct ipoib_dev_priv *ppriv, + struct ipoib_dev_priv *priv) +{ + struct ipoib_dev_priv *tpriv; + + ASSERT_RTNL(); + + /* + * Since the legacy sysfs interface uses pkey for deletion it cannot + * support more than one interface with the same pkey, it creates + * ambiguity. The RTNL interface deletes using the netdev so it does + * not have a problem to support duplicated pkeys. + */ + if (priv->child_type != IPOIB_LEGACY_CHILD) + return true; + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the legacy child interfaces to make sure the Pkey + * doesn't match. + */ + if (ppriv->pkey == priv->pkey) + return false; + + list_for_each_entry(tpriv, &ppriv->child_intfs, list) { + if (tpriv->pkey == priv->pkey && + tpriv->child_type == IPOIB_LEGACY_CHILD) + return false; + } + + return true; +} + +/* + * NOTE: If this function fails then the priv->dev will remain valid, however + * priv can have been freed and must not be touched by caller in the error + * case. + * + * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to + * free the net_device (just as rtnl_newlink does) otherwise the net_device + * will be freed when the rtnl is unlocked. + */ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, u16 pkey, int type) { + struct net_device *ndev = priv->dev; int result; - priv->max_ib_mtu = ppriv->max_ib_mtu; - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; - priv->parent = ppriv->dev; - set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + ASSERT_RTNL(); + + /* + * Racing with unregister of the parent must be prevented by the + * caller. + */ + WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED); - ipoib_set_dev_features(priv, ppriv->ca); + if (pkey == 0 || pkey == 0x8000) { + result = -EINVAL; + goto out_early; + } + priv->parent = ppriv->dev; priv->pkey = pkey; + priv->child_type = type; - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - priv->dev->broadcast[8] = pkey >> 8; - priv->dev->broadcast[9] = pkey & 0xff; - - result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port); - if (result < 0) { - ipoib_warn(ppriv, "failed to initialize subinterface: " - "device %s, port %d", - ppriv->ca->name, ppriv->port); - goto err; + if (!is_child_unique(ppriv, priv)) { + result = -ENOTUNIQ; + goto out_early; } - result = register_netdevice(priv->dev); + /* We do not need to touch priv if register_netdevice fails */ + ndev->priv_destructor = ipoib_intf_free; + + result = register_netdevice(ndev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); - goto register_failed; + + /* + * register_netdevice sometimes calls priv_destructor, + * sometimes not. Make sure it was done. + */ + goto out_early; } /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { - if (ipoib_cm_add_mode_attr(priv->dev)) + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_parent)) + if (device_create_file(&ndev->dev, &dev_attr_parent)) goto sysfs_failed; } - priv->child_type = type; - list_add_tail(&priv->list, &ppriv->child_intfs); - return 0; sysfs_failed: - result = -ENOMEM; unregister_netdevice(priv->dev); + return -ENOMEM; -register_failed: - ipoib_dev_cleanup(priv->dev); - -err: +out_early: + if (ndev->priv_destructor) + ndev->priv_destructor(ndev); return result; } @@ -119,129 +163,124 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; - struct ipoib_dev_priv *tpriv; + struct net_device *ndev; int result; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - - snprintf(intf_name, sizeof intf_name, "%s.%04x", - ppriv->dev->name, pkey); - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - - if (!down_write_trylock(&ppriv->vlan_rwsem)) { + if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); + return -EPERM; } + ppriv = ipoib_priv(pdev); + + snprintf(intf_name, sizeof(intf_name), "%s.%04x", + ppriv->dev->name, pkey); + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (!priv) { result = -ENOMEM; goto out; } - - /* - * First ensure this isn't a duplicate. We check the parent device and - * then all of the legacy child interfaces to make sure the Pkey - * doesn't match. - */ - if (ppriv->pkey == pkey) { - result = -ENOTUNIQ; - goto out; - } - - list_for_each_entry(tpriv, &ppriv->child_intfs, list) { - if (tpriv->pkey == pkey && - tpriv->child_type == IPOIB_LEGACY_CHILD) { - result = -ENOTUNIQ; - goto out; - } - } + ndev = priv->dev; result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + if (result && ndev->reg_state == NETREG_UNINITIALIZED) + free_netdev(ndev); + out: - up_write(&ppriv->vlan_rwsem); rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - if (result && priv) { - struct rdma_netdev *rn; + return result; +} + +struct ipoib_vlan_delete_work { + struct work_struct work; + struct net_device *dev; +}; + +/* + * sysfs callbacks of a netdevice cannot obtain the rtnl lock as + * unregister_netdev ultimately deletes the sysfs files while holding the rtnl + * lock. This deadlocks the system. + * + * A callback can use rtnl_trylock to avoid the deadlock but it cannot call + * unregister_netdev as that internally takes and releases the rtnl_lock. So + * instead we find the netdev to unregister and then do the actual unregister + * from the global work queue where we can obtain the rtnl_lock safely. + */ +static void ipoib_vlan_delete_task(struct work_struct *work) +{ + struct ipoib_vlan_delete_work *pwork = + container_of(work, struct ipoib_vlan_delete_work, work); + struct net_device *dev = pwork->dev; + + rtnl_lock(); + + /* Unregistering tasks can race with another task or parent removal */ + if (dev->reg_state == NETREG_REGISTERED) { + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); + ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); + unregister_netdevice(dev); } - return result; + rtnl_unlock(); + + kfree(pwork); } int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv, *tpriv; - struct net_device *dev = NULL; + int rc; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - - if (!down_write_trylock(&ppriv->vlan_rwsem)) { + if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); + return -EPERM; } + ppriv = ipoib_priv(pdev); + + rc = -ENODEV; list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { - list_del(&priv->list); - dev = priv->dev; + struct ipoib_vlan_delete_work *work; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + rc = -ENOMEM; + goto out; + } + + down_write(&ppriv->vlan_rwsem); + list_del_init(&priv->list); + up_write(&ppriv->vlan_rwsem); + work->dev = priv->dev; + INIT_WORK(&work->work, ipoib_vlan_delete_task); + queue_work(ipoib_workqueue, &work->work); + + rc = 0; break; } } - up_write(&ppriv->vlan_rwsem); - - if (dev) { - ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); - unregister_netdevice(dev); - } +out: rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - - if (dev) { - struct rdma_netdev *rn; - - rn = netdev_priv(dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - return 0; - } - return -ENODEV; + return rc; } |