diff options
Diffstat (limited to 'drivers/infiniband/core/device.c')
-rw-r--r-- | drivers/infiniband/core/device.c | 632 |
1 files changed, 623 insertions, 9 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7421ec4883fb..78dc07c6ac4b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,6 +38,8 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/security.h> #include <linux/notifier.h> #include <linux/hashtable.h> @@ -101,6 +103,54 @@ static DECLARE_RWSEM(clients_rwsem); * be registered. */ #define CLIENT_DATA_REGISTERED XA_MARK_1 + +/** + * struct rdma_dev_net - rdma net namespace metadata for a net + * @net: Pointer to owner net namespace + * @id: xarray id to identify the net namespace. + */ +struct rdma_dev_net { + possible_net_t net; + u32 id; +}; + +static unsigned int rdma_dev_net_id; + +/* + * A list of net namespaces is maintained in an xarray. This is necessary + * because we can't get the locking right using the existing net ns list. We + * would require a init_net callback after the list is updated. + */ +static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); +/* + * rwsem to protect accessing the rdma_nets xarray entries. + */ +static DECLARE_RWSEM(rdma_nets_rwsem); + +bool ib_devices_shared_netns = true; +module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); +MODULE_PARM_DESC(netns_mode, + "Share device among net namespaces; default=1 (shared)"); +/** + * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * from a specified net namespace or not. + * @device: Pointer to rdma device which needs to be checked + * @net: Pointer to net namesapce for which access to be checked + * + * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * from a specified net namespace or not. When + * rdma device is in shared mode, it ignores the + * net namespace. When rdma device is exclusive + * to a net namespace, rdma device net namespace is + * checked against the specified one. + */ +bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) +{ + return (ib_devices_shared_netns || + net_eq(read_pnet(&dev->coredev.rdma_net), net)); +} +EXPORT_SYMBOL(rdma_dev_access_netns); + /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in @@ -147,10 +197,73 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, static void ib_policy_change_task(struct work_struct *work); static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); +static void __ibdev_printk(const char *level, const struct ib_device *ibdev, + struct va_format *vaf) +{ + if (ibdev && ibdev->dev.parent) + dev_printk_emit(level[1] - '0', + ibdev->dev.parent, + "%s %s %s: %pV", + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + vaf); + else if (ibdev) + printk("%s%s: %pV", + level, dev_name(&ibdev->dev), vaf); + else + printk("%s(NULL ib_device): %pV", level, vaf); +} + +void ibdev_printk(const char *level, const struct ib_device *ibdev, + const char *format, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + __ibdev_printk(level, ibdev, &vaf); + + va_end(args); +} +EXPORT_SYMBOL(ibdev_printk); + +#define define_ibdev_printk_level(func, level) \ +void func(const struct ib_device *ibdev, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __ibdev_printk(level, ibdev, &vaf); \ + \ + va_end(args); \ +} \ +EXPORT_SYMBOL(func); + +define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); +define_ibdev_printk_level(ibdev_alert, KERN_ALERT); +define_ibdev_printk_level(ibdev_crit, KERN_CRIT); +define_ibdev_printk_level(ibdev_err, KERN_ERR); +define_ibdev_printk_level(ibdev_warn, KERN_WARNING); +define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); +define_ibdev_printk_level(ibdev_info, KERN_INFO); + static struct notifier_block ibdev_lsm_nb = { .notifier_call = ib_security_change, }; +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, + struct net *net); + /* Pointer to the RCU head at the start of the ib_port_data array */ struct ib_port_data_rcu { struct rcu_head rcu_head; @@ -200,16 +313,22 @@ static int ib_device_check_mandatory(struct ib_device *device) * Caller must perform ib_device_put() to return the device reference count * when ib_device_get_by_index() returns valid device pointer. */ -struct ib_device *ib_device_get_by_index(u32 index) +struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) { struct ib_device *device; down_read(&devices_rwsem); device = xa_load(&devices, index); if (device) { + if (!rdma_dev_access_netns(device, net)) { + device = NULL; + goto out; + } + if (!ib_device_try_get(device)) device = NULL; } +out: up_read(&devices_rwsem); return device; } @@ -268,6 +387,26 @@ struct ib_device *ib_device_get_by_name(const char *name, } EXPORT_SYMBOL(ib_device_get_by_name); +static int rename_compat_devs(struct ib_device *device) +{ + struct ib_core_device *cdev; + unsigned long index; + int ret = 0; + + mutex_lock(&device->compat_devs_mutex); + xa_for_each (&device->compat_devs, index, cdev) { + ret = device_rename(&cdev->dev, dev_name(&device->dev)); + if (ret) { + dev_warn(&cdev->dev, + "Fail to rename compatdev to new name %s\n", + dev_name(&device->dev)); + break; + } + } + mutex_unlock(&device->compat_devs_mutex); + return ret; +} + int ib_device_rename(struct ib_device *ibdev, const char *name) { int ret; @@ -287,6 +426,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) if (ret) goto out; strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + ret = rename_compat_devs(ibdev); out: up_write(&devices_rwsem); return ret; @@ -336,6 +476,7 @@ static void ib_device_release(struct device *device) WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); + xa_destroy(&dev->compat_devs); xa_destroy(&dev->client_data); if (dev->port_data) kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, @@ -357,12 +498,42 @@ static int ib_device_uevent(struct device *device, return 0; } +static const void *net_namespace(struct device *d) +{ + struct ib_core_device *coredev = + container_of(d, struct ib_core_device, dev); + + return read_pnet(&coredev->rdma_net); +} + static struct class ib_class = { .name = "infiniband", .dev_release = ib_device_release, .dev_uevent = ib_device_uevent, + .ns_type = &net_ns_type_operations, + .namespace = net_namespace, }; +static void rdma_init_coredev(struct ib_core_device *coredev, + struct ib_device *dev, struct net *net) +{ + /* This BUILD_BUG_ON is intended to catch layout change + * of union of ib_core_device and device. + * dev must be the first element as ib_core and providers + * driver uses it. Adding anything in ib_core_device before + * device will break this assumption. + */ + BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != + offsetof(struct ib_device, dev)); + + coredev->dev.class = &ib_class; + coredev->dev.groups = dev->groups; + device_initialize(&coredev->dev); + coredev->owner = dev; + INIT_LIST_HEAD(&coredev->port_list); + write_pnet(&coredev->rdma_net, net); +} + /** * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -389,10 +560,8 @@ struct ib_device *_ib_alloc_device(size_t size) return NULL; } - device->dev.class = &ib_class; device->groups[0] = &ib_dev_attr_group; - device->dev.groups = device->groups; - device_initialize(&device->dev); + rdma_init_coredev(&device->coredev, device, &init_net); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); @@ -403,7 +572,8 @@ struct ib_device *_ib_alloc_device(size_t size) */ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); init_rwsem(&device->client_data_rwsem); - INIT_LIST_HEAD(&device->port_list); + xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); + mutex_init(&device->compat_devs_mutex); init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); @@ -436,6 +606,7 @@ void ib_dealloc_device(struct ib_device *device) /* Expedite releasing netdev references */ free_netdevs(device); + WARN_ON(!xa_empty(&device->compat_devs)); WARN_ON(!xa_empty(&device->client_data)); WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); @@ -644,6 +815,283 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_OK; } +static void compatdev_release(struct device *dev) +{ + struct ib_core_device *cdev = + container_of(dev, struct ib_core_device, dev); + + kfree(cdev); +} + +static int add_one_compat_dev(struct ib_device *device, + struct rdma_dev_net *rnet) +{ + struct ib_core_device *cdev; + int ret; + + lockdep_assert_held(&rdma_nets_rwsem); + if (!ib_devices_shared_netns) + return 0; + + /* + * Create and add compat device in all namespaces other than where it + * is currently bound to. + */ + if (net_eq(read_pnet(&rnet->net), + read_pnet(&device->coredev.rdma_net))) + return 0; + + /* + * The first of init_net() or ib_register_device() to take the + * compat_devs_mutex wins and gets to add the device. Others will wait + * for completion here. + */ + mutex_lock(&device->compat_devs_mutex); + cdev = xa_load(&device->compat_devs, rnet->id); + if (cdev) { + ret = 0; + goto done; + } + ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); + if (ret) + goto done; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) { + ret = -ENOMEM; + goto cdev_err; + } + + cdev->dev.parent = device->dev.parent; + rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); + cdev->dev.release = compatdev_release; + dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + + ret = device_add(&cdev->dev); + if (ret) + goto add_err; + ret = ib_setup_port_attrs(cdev); + if (ret) + goto port_err; + + ret = xa_err(xa_store(&device->compat_devs, rnet->id, + cdev, GFP_KERNEL)); + if (ret) + goto insert_err; + + mutex_unlock(&device->compat_devs_mutex); + return 0; + +insert_err: + ib_free_port_attrs(cdev); +port_err: + device_del(&cdev->dev); +add_err: + put_device(&cdev->dev); +cdev_err: + xa_release(&device->compat_devs, rnet->id); +done: + mutex_unlock(&device->compat_devs_mutex); + return ret; +} + +static void remove_one_compat_dev(struct ib_device *device, u32 id) +{ + struct ib_core_device *cdev; + + mutex_lock(&device->compat_devs_mutex); + cdev = xa_erase(&device->compat_devs, id); + mutex_unlock(&device->compat_devs_mutex); + if (cdev) { + ib_free_port_attrs(cdev); + device_del(&cdev->dev); + put_device(&cdev->dev); + } +} + +static void remove_compat_devs(struct ib_device *device) +{ + struct ib_core_device *cdev; + unsigned long index; + + xa_for_each (&device->compat_devs, index, cdev) + remove_one_compat_dev(device, index); +} + +static int add_compat_devs(struct ib_device *device) +{ + struct rdma_dev_net *rnet; + unsigned long index; + int ret = 0; + + lockdep_assert_held(&devices_rwsem); + + down_read(&rdma_nets_rwsem); + xa_for_each (&rdma_nets, index, rnet) { + ret = add_one_compat_dev(device, rnet); + if (ret) + break; + } + up_read(&rdma_nets_rwsem); + return ret; +} + +static void remove_all_compat_devs(void) +{ + struct ib_compat_device *cdev; + struct ib_device *dev; + unsigned long index; + + down_read(&devices_rwsem); + xa_for_each (&devices, index, dev) { + unsigned long c_index = 0; + + /* Hold nets_rwsem so that any other thread modifying this + * system param can sync with this thread. + */ + down_read(&rdma_nets_rwsem); + xa_for_each (&dev->compat_devs, c_index, cdev) + remove_one_compat_dev(dev, c_index); + up_read(&rdma_nets_rwsem); + } + up_read(&devices_rwsem); +} + +static int add_all_compat_devs(void) +{ + struct rdma_dev_net *rnet; + struct ib_device *dev; + unsigned long index; + int ret = 0; + + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + unsigned long net_index = 0; + + /* Hold nets_rwsem so that any other thread modifying this + * system param can sync with this thread. + */ + down_read(&rdma_nets_rwsem); + xa_for_each (&rdma_nets, net_index, rnet) { + ret = add_one_compat_dev(dev, rnet); + if (ret) + break; + } + up_read(&rdma_nets_rwsem); + } + up_read(&devices_rwsem); + if (ret) + remove_all_compat_devs(); + return ret; +} + +int rdma_compatdev_set(u8 enable) +{ + struct rdma_dev_net *rnet; + unsigned long index; + int ret = 0; + + down_write(&rdma_nets_rwsem); + if (ib_devices_shared_netns == enable) { + up_write(&rdma_nets_rwsem); + return 0; + } + + /* enable/disable of compat devices is not supported + * when more than default init_net exists. + */ + xa_for_each (&rdma_nets, index, rnet) { + ret++; + break; + } + if (!ret) + ib_devices_shared_netns = enable; + up_write(&rdma_nets_rwsem); + if (ret) + return -EBUSY; + + if (enable) + ret = add_all_compat_devs(); + else + remove_all_compat_devs(); + return ret; +} + +static void rdma_dev_exit_net(struct net *net) +{ + struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct ib_device *dev; + unsigned long index; + int ret; + + down_write(&rdma_nets_rwsem); + /* + * Prevent the ID from being re-used and hide the id from xa_for_each. + */ + ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); + WARN_ON(ret); + up_write(&rdma_nets_rwsem); + + down_read(&devices_rwsem); + xa_for_each (&devices, index, dev) { + get_device(&dev->dev); + /* + * Release the devices_rwsem so that pontentially blocking + * device_del, doesn't hold the devices_rwsem for too long. + */ + up_read(&devices_rwsem); + + remove_one_compat_dev(dev, rnet->id); + + /* + * If the real device is in the NS then move it back to init. + */ + rdma_dev_change_netns(dev, net, &init_net); + + put_device(&dev->dev); + down_read(&devices_rwsem); + } + up_read(&devices_rwsem); + + xa_erase(&rdma_nets, rnet->id); +} + +static __net_init int rdma_dev_init_net(struct net *net) +{ + struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + unsigned long index; + struct ib_device *dev; + int ret; + + /* No need to create any compat devices in default init_net. */ + if (net_eq(net, &init_net)) + return 0; + + write_pnet(&rnet->net, net); + + ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + /* Hold nets_rwsem so that netlink command cannot change + * system configuration for device sharing mode. + */ + down_read(&rdma_nets_rwsem); + ret = add_one_compat_dev(dev, rnet); + up_read(&rdma_nets_rwsem); + if (ret) + break; + } + up_read(&devices_rwsem); + + if (ret) + rdma_dev_exit_net(net); + + return ret; +} + /* * Assign the unique string device name and the unique device index. This is * undone by ib_dealloc_device. @@ -711,6 +1159,9 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } + /* Setup default max segment size for all IB devices */ + dma_set_max_seg_size(device->dma_device, SZ_2G); + } /* @@ -765,8 +1216,12 @@ static void disable_device(struct ib_device *device) ib_device_put(device); wait_for_completion(&device->unreg_completion); - /* Expedite removing unregistered pointers from the hash table */ - free_netdevs(device); + /* + * compat devices must be removed after device refcount drops to zero. + * Otherwise init_net() may add more compatdevs after removing compat + * devices and before device is disabled. + */ + remove_compat_devs(device); } /* @@ -807,7 +1262,8 @@ static int enable_device_and_get(struct ib_device *device) break; } up_read(&clients_rwsem); - + if (!ret) + ret = add_compat_devs(device); out: up_read(&devices_rwsem); return ret; @@ -847,6 +1303,11 @@ int ib_register_device(struct ib_device *device, const char *name) ib_device_register_rdmacg(device); + /* + * Ensure that ADD uevent is not fired because it + * is too early amd device is not initialized yet. + */ + dev_set_uevent_suppress(&device->dev, true); ret = device_add(&device->dev); if (ret) goto cg_cleanup; @@ -859,6 +1320,9 @@ int ib_register_device(struct ib_device *device, const char *name) } ret = enable_device_and_get(device); + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -887,6 +1351,7 @@ int ib_register_device(struct ib_device *device, const char *name) dev_cleanup: device_del(&device->dev); cg_cleanup: + dev_set_uevent_suppress(&device->dev, false); ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); return ret; @@ -908,6 +1373,10 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + + /* Expedite removing unregistered pointers from the hash table */ + free_netdevs(ib_dev); + ib_device_unregister_sysfs(ib_dev); device_del(&ib_dev->dev); ib_device_unregister_rdmacg(ib_dev); @@ -1038,6 +1507,126 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) } EXPORT_SYMBOL(ib_unregister_device_queued); +/* + * The caller must pass in a device that has the kref held and the refcount + * released. If the device is in cur_net and still registered then it is moved + * into net. + */ +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, + struct net *net) +{ + int ret2 = -EINVAL; + int ret; + + mutex_lock(&device->unregistration_lock); + + /* + * If a device not under ib_device_get() or if the unregistration_lock + * is not held, the namespace can be changed, or it can be unregistered. + * Check again under the lock. + */ + if (refcount_read(&device->refcount) == 0 || + !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { + ret = -ENODEV; + goto out; + } + + kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); + disable_device(device); + + /* + * At this point no one can be using the device, so it is safe to + * change the namespace. + */ + write_pnet(&device->coredev.rdma_net, net); + + down_read(&devices_rwsem); + /* + * Currently rdma devices are system wide unique. So the device name + * is guaranteed free in the new namespace. Publish the new namespace + * at the sysfs level. + */ + ret = device_rename(&device->dev, dev_name(&device->dev)); + up_read(&devices_rwsem); + if (ret) { + dev_warn(&device->dev, + "%s: Couldn't rename device after namespace change\n", + __func__); + /* Try and put things back and re-enable the device */ + write_pnet(&device->coredev.rdma_net, cur_net); + } + + ret2 = enable_device_and_get(device); + if (ret2) { + /* + * This shouldn't really happen, but if it does, let the user + * retry at later point. So don't disable the device. + */ + dev_warn(&device->dev, + "%s: Couldn't re-enable device after namespace change\n", + __func__); + } + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + + ib_device_put(device); +out: + mutex_unlock(&device->unregistration_lock); + if (ret) + return ret; + return ret2; +} + +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd) +{ + struct net *net; + int ret; + + net = get_net_ns_by_fd(ns_fd); + if (IS_ERR(net)) { + ret = PTR_ERR(net); + goto net_err; + } + + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto ns_err; + } + + /* + * Currently supported only for those providers which support + * disassociation and don't do port specific sysfs init. Once a + * port_cleanup infrastructure is implemented, this limitation will be + * removed. + */ + if (!dev->ops.disassociate_ucontext || dev->ops.init_port || + ib_devices_shared_netns) { + ret = -EOPNOTSUPP; + goto ns_err; + } + + get_device(&dev->dev); + ib_device_put(dev); + ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); + put_device(&dev->dev); + + put_net(net); + return ret; + +ns_err: + put_net(net); +net_err: + ib_device_put(dev); + return ret; +} + +static struct pernet_operations rdma_dev_net_ops = { + .init = rdma_dev_init_net, + .exit = rdma_dev_exit_net, + .id = &rdma_dev_net_id, + .size = sizeof(struct rdma_dev_net), +}; + static int assign_client_id(struct ib_client *client) { int ret; @@ -1515,6 +2104,9 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) + continue; + ret = nldev_cb(dev, skb, cb, idx); if (ret) break; @@ -1787,6 +2379,14 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); + SET_DEVICE_OP(dev_ops, iw_accept); + SET_DEVICE_OP(dev_ops, iw_add_ref); + SET_DEVICE_OP(dev_ops, iw_connect); + SET_DEVICE_OP(dev_ops, iw_create_listen); + SET_DEVICE_OP(dev_ops, iw_destroy_listen); + SET_DEVICE_OP(dev_ops, iw_get_qp); + SET_DEVICE_OP(dev_ops, iw_reject); + SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); @@ -1823,7 +2423,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_DEVICE_OP(dev_ops, unmap_fmr); + SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_pd); + SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); } EXPORT_SYMBOL(ib_set_device_ops); @@ -1903,12 +2505,20 @@ static int __init ib_core_init(void) goto err_sa; } + ret = register_pernet_device(&rdma_dev_net_ops); + if (ret) { + pr_warn("Couldn't init compat dev. ret %d\n", ret); + goto err_compat; + } + nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); roce_gid_mgmt_init(); return 0; +err_compat: + unregister_lsm_notifier(&ibdev_lsm_nb); err_sa: ib_sa_cleanup(); err_mad: @@ -1933,6 +2543,7 @@ static void __exit ib_core_cleanup(void) roce_gid_mgmt_cleanup(); nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + unregister_pernet_device(&rdma_dev_net_ops); unregister_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); ib_mad_cleanup(); @@ -1950,5 +2561,8 @@ static void __exit ib_core_cleanup(void) MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); -subsys_initcall(ib_core_init); +/* ib core relies on netdev stack to first register net_ns_type_operations + * ns kobject type before ib_core initialization. + */ +fs_initcall(ib_core_init); module_exit(ib_core_cleanup); |