summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/cache.c3
-rw-r--r--drivers/infiniband/core/cma.c36
-rw-r--r--drivers/infiniband/core/cma_priv.h4
-rw-r--r--drivers/infiniband/core/core_priv.h9
-rw-r--r--drivers/infiniband/core/device.c731
-rw-r--r--drivers/infiniband/core/security.c61
-rw-r--r--drivers/infiniband/core/ucma.c7
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c15
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c2
-rw-r--r--drivers/infiniband/core/verbs.c27
10 files changed, 534 insertions, 361 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 7b04590f307f..2338d0b3a0ca 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1460,6 +1460,9 @@ void ib_cache_release_one(struct ib_device *device)
{
int p;
+ if (!device->cache.ports)
+ return;
+
/*
* The release function frees all the cache elements.
* This function should be called as part of freeing
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e15546ae4d0f..c43512752b8a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -888,6 +888,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
id_priv->tos_set = false;
+ id_priv->timeout_set = false;
id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
@@ -1130,6 +1131,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
} else
ret = -ENOSYS;
+ if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
+ qp_attr->timeout = id_priv->timeout;
+
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -2410,6 +2414,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
return PTR_ERR(id);
id->tos = id_priv->tos;
+ id->tos_set = id_priv->tos_set;
id_priv->cm_id.iw = id;
memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@@ -2462,6 +2467,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
+ dev_id_priv->tos_set = id_priv->tos_set;
+ dev_id_priv->tos = id_priv->tos;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
@@ -2490,6 +2497,34 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
}
EXPORT_SYMBOL(rdma_set_service_type);
+/**
+ * rdma_set_ack_timeout() - Set the ack timeout of QP associated
+ * with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
+ *
+ * This function should be called before rdma_connect() on active side,
+ * and on passive side before rdma_accept(). It is applicable to primary
+ * path only. The timeout will affect the local side of the QP, it is not
+ * negotiated with remote side and zero disables the timer.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
+{
+ struct rdma_id_private *id_priv;
+
+ if (id->qp_type != IB_QPT_RC)
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ id_priv->timeout = timeout;
+ id_priv->timeout_set = true;
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_set_ack_timeout);
+
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
void *context)
{
@@ -3809,6 +3844,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
return PTR_ERR(cm_id);
cm_id->tos = id_priv->tos;
+ cm_id->tos_set = id_priv->tos_set;
id_priv->cm_id.iw = cm_id;
memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index cf47c69436a7..ca7307277518 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -84,9 +84,11 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
- bool tos_set;
+ u8 tos_set:1;
+ u8 timeout_set:1;
u8 reuseaddr;
u8 afonly;
+ u8 timeout;
enum ib_gid_type gid_type;
/*
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index bcb3e3029a9b..a1826f4c2e23 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -181,7 +181,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
-void ib_security_destroy_port_pkey_list(struct ib_device *device);
+void ib_security_release_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
u8 port_num,
@@ -202,8 +202,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
enum ib_qp_type qp_type);
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
+void ib_mad_agent_security_change(void);
#else
-static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
+static inline void ib_security_release_port_pkey_list(struct ib_device *device)
{
}
@@ -267,6 +268,10 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
{
return 0;
}
+
+static inline void ib_mad_agent_security_change(void)
+{
+}
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 55221990d946..3325be4f91a5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -37,7 +37,6 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/init.h>
-#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/security.h>
#include <linux/notifier.h>
@@ -51,39 +50,88 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
-struct ib_client_data {
- struct list_head list;
- struct ib_client *client;
- void * data;
- /* The device or client is going down. Do not call client or device
- * callbacks other than remove(). */
- bool going_down;
-};
-
struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
-/* The device_list and client_list contain devices and clients after their
- * registration has completed, and the devices and clients are removed
- * during unregistration. */
-static LIST_HEAD(device_list);
-static LIST_HEAD(client_list);
+/*
+ * Each of the three rwsem locks (devices, clients, client_data) protects the
+ * xarray of the same name. Specifically it allows the caller to assert that
+ * the MARK will/will not be changing under the lock, and for devices and
+ * clients, that the value in the xarray is still a valid pointer. Change of
+ * the MARK is linked to the object state, so holding the lock and testing the
+ * MARK also asserts that the contained object is in a certain state.
+ *
+ * This is used to build a two stage register/unregister flow where objects
+ * can continue to be in the xarray even though they are still in progress to
+ * register/unregister.
+ *
+ * The xarray itself provides additional locking, and restartable iteration,
+ * which is also relied on.
+ *
+ * Locks should not be nested, with the exception of client_data, which is
+ * allowed to nest under the read side of the other two locks.
+ *
+ * The devices_rwsem also protects the device name list, any change or
+ * assignment of device name must also hold the write side to guarantee unique
+ * names.
+ */
/*
- * device_mutex and lists_rwsem protect access to both device_list and
- * client_list. device_mutex protects writer access by device and client
- * registration / de-registration. lists_rwsem protects reader access to
- * these lists. Iterators of these lists must lock it for read, while updates
- * to the lists must be done with a write lock. A special case is when the
- * device_mutex is locked. In this case locking the lists for read access is
- * not necessary as the device_mutex implies it.
+ * devices contains devices that have had their names assigned. The
+ * devices may not be registered. Users that care about the registration
+ * status need to call ib_device_try_get() on the device to ensure it is
+ * registered, and keep it registered, for the required duration.
*
- * lists_rwsem also protects access to the client data list.
*/
-static DEFINE_MUTEX(device_mutex);
-static DECLARE_RWSEM(lists_rwsem);
+static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
+static DECLARE_RWSEM(devices_rwsem);
+#define DEVICE_REGISTERED XA_MARK_1
+
+static LIST_HEAD(client_list);
+#define CLIENT_REGISTERED XA_MARK_1
+static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
+static DECLARE_RWSEM(clients_rwsem);
+
+/*
+ * If client_data is registered then the corresponding client must also still
+ * be registered.
+ */
+#define CLIENT_DATA_REGISTERED XA_MARK_1
+/*
+ * xarray has this behavior where it won't iterate over NULL values stored in
+ * allocated arrays. So we need our own iterator to see all values stored in
+ * the array. This does the same thing as xa_for_each except that it also
+ * returns NULL valued entries if the array is allocating. Simplified to only
+ * work on simple xarrays.
+ */
+static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
+ xa_mark_t filter)
+{
+ XA_STATE(xas, xa, *indexp);
+ void *entry;
+
+ rcu_read_lock();
+ do {
+ entry = xas_find_marked(&xas, ULONG_MAX, filter);
+ if (xa_is_zero(entry))
+ break;
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ if (entry) {
+ *indexp = xas.xa_index;
+ if (xa_is_zero(entry))
+ return NULL;
+ return entry;
+ }
+ return XA_ERROR(-ENOENT);
+}
+#define xan_for_each_marked(xa, index, entry, filter) \
+ for (index = 0, entry = xan_find_marked(xa, &(index), filter); \
+ !xa_is_err(entry); \
+ (index)++, entry = xan_find_marked(xa, &(index), filter))
static int ib_security_change(struct notifier_block *nb, unsigned long event,
void *lsm_data);
@@ -133,17 +181,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
return 0;
}
-static struct ib_device *__ib_device_get_by_index(u32 index)
-{
- struct ib_device *device;
-
- list_for_each_entry(device, &device_list, core_list)
- if (device->index == index)
- return device;
-
- return NULL;
-}
-
/*
* Caller must perform ib_device_put() to return the device reference count
* when ib_device_get_by_index() returns valid device pointer.
@@ -152,13 +189,13 @@ struct ib_device *ib_device_get_by_index(u32 index)
{
struct ib_device *device;
- down_read(&lists_rwsem);
- device = __ib_device_get_by_index(index);
+ down_read(&devices_rwsem);
+ device = xa_load(&devices, index);
if (device) {
if (!ib_device_try_get(device))
device = NULL;
}
- up_read(&lists_rwsem);
+ up_read(&devices_rwsem);
return device;
}
@@ -179,8 +216,9 @@ EXPORT_SYMBOL(ib_device_put);
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
+ unsigned long index;
- list_for_each_entry(device, &device_list, core_list)
+ xa_for_each (&devices, index, device)
if (!strcmp(name, dev_name(&device->dev)))
return device;
@@ -189,12 +227,14 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
int ib_device_rename(struct ib_device *ibdev, const char *name)
{
- int ret = 0;
+ int ret;
- if (!strcmp(name, dev_name(&ibdev->dev)))
- return ret;
+ down_write(&devices_rwsem);
+ if (!strcmp(name, dev_name(&ibdev->dev))) {
+ ret = 0;
+ goto out;
+ }
- mutex_lock(&device_mutex);
if (__ib_device_get_by_name(name)) {
ret = -EEXIST;
goto out;
@@ -205,52 +245,56 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
goto out;
strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
out:
- mutex_unlock(&device_mutex);
+ up_write(&devices_rwsem);
return ret;
}
static int alloc_name(struct ib_device *ibdev, const char *name)
{
- unsigned long *inuse;
struct ib_device *device;
+ unsigned long index;
+ struct ida inuse;
+ int rc;
int i;
- inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!inuse)
- return -ENOMEM;
-
- list_for_each_entry(device, &device_list, core_list) {
+ lockdep_assert_held_exclusive(&devices_rwsem);
+ ida_init(&inuse);
+ xa_for_each (&devices, index, device) {
char buf[IB_DEVICE_NAME_MAX];
if (sscanf(dev_name(&device->dev), name, &i) != 1)
continue;
- if (i < 0 || i >= PAGE_SIZE * 8)
+ if (i < 0 || i >= INT_MAX)
continue;
snprintf(buf, sizeof buf, name, i);
- if (!strcmp(buf, dev_name(&device->dev)))
- set_bit(i, inuse);
+ if (strcmp(buf, dev_name(&device->dev)) != 0)
+ continue;
+
+ rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
+ if (rc < 0)
+ goto out;
}
- i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
- free_page((unsigned long) inuse);
+ rc = ida_alloc(&inuse, GFP_KERNEL);
+ if (rc < 0)
+ goto out;
- return dev_set_name(&ibdev->dev, name, i);
+ rc = dev_set_name(&ibdev->dev, name, rc);
+out:
+ ida_destroy(&inuse);
+ return rc;
}
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
- if (dev->reg_state == IB_DEV_UNREGISTERED) {
- /*
- * In IB_DEV_UNINITIALIZED state, cache or port table
- * is not even created. Free cache and port table only when
- * device reaches UNREGISTERED state.
- */
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
- }
+ WARN_ON(refcount_read(&dev->refcount));
+ ib_cache_release_one(dev);
+ ib_security_release_port_pkey_list(dev);
+ kfree(dev->port_pkey_list);
+ kfree(dev->port_immutable);
+ xa_destroy(&dev->client_data);
kfree(dev);
}
@@ -301,8 +345,12 @@ struct ib_device *_ib_alloc_device(size_t size)
INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock);
- rwlock_init(&device->client_data_lock);
- INIT_LIST_HEAD(&device->client_data_list);
+ /*
+ * client_data needs to be alloc because we don't want our mark to be
+ * destroyed if the user stores NULL in the client data.
+ */
+ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
+ init_rwsem(&device->client_data_rwsem);
INIT_LIST_HEAD(&device->port_list);
init_completion(&device->unreg_completion);
@@ -318,36 +366,93 @@ EXPORT_SYMBOL(_ib_alloc_device);
*/
void ib_dealloc_device(struct ib_device *device)
{
- WARN_ON(!list_empty(&device->client_data_list));
- WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
- device->reg_state != IB_DEV_UNINITIALIZED);
+ WARN_ON(!xa_empty(&device->client_data));
+ WARN_ON(refcount_read(&device->refcount));
rdma_restrack_clean(device);
put_device(&device->dev);
}
EXPORT_SYMBOL(ib_dealloc_device);
-static int add_client_context(struct ib_device *device, struct ib_client *client)
+/*
+ * add_client_context() and remove_client_context() must be safe against
+ * parallel calls on the same device - registration/unregistration of both the
+ * device and client can be occurring in parallel.
+ *
+ * The routines need to be a fence, any caller must not return until the add
+ * or remove is fully completed.
+ */
+static int add_client_context(struct ib_device *device,
+ struct ib_client *client)
{
- struct ib_client_data *context;
+ int ret = 0;
if (!device->kverbs_provider && !client->no_kverbs_req)
- return -EOPNOTSUPP;
+ return 0;
- context = kmalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
+ down_write(&device->client_data_rwsem);
+ /*
+ * Another caller to add_client_context got here first and has already
+ * completely initialized context.
+ */
+ if (xa_get_mark(&device->client_data, client->client_id,
+ CLIENT_DATA_REGISTERED))
+ goto out;
- context->client = client;
- context->data = NULL;
- context->going_down = false;
+ ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
+ GFP_KERNEL));
+ if (ret)
+ goto out;
+ downgrade_write(&device->client_data_rwsem);
+ if (client->add)
+ client->add(device);
+
+ /* Readers shall not see a client until add has been completed */
+ xa_set_mark(&device->client_data, client->client_id,
+ CLIENT_DATA_REGISTERED);
+ up_read(&device->client_data_rwsem);
+ return 0;
- down_write(&lists_rwsem);
- write_lock_irq(&device->client_data_lock);
- list_add(&context->list, &device->client_data_list);
- write_unlock_irq(&device->client_data_lock);
- up_write(&lists_rwsem);
+out:
+ up_write(&device->client_data_rwsem);
+ return ret;
+}
- return 0;
+static void remove_client_context(struct ib_device *device,
+ unsigned int client_id)
+{
+ struct ib_client *client;
+ void *client_data;
+
+ down_write(&device->client_data_rwsem);
+ if (!xa_get_mark(&device->client_data, client_id,
+ CLIENT_DATA_REGISTERED)) {
+ up_write(&device->client_data_rwsem);
+ return;
+ }
+ client_data = xa_load(&device->client_data, client_id);
+ xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
+ client = xa_load(&clients, client_id);
+ downgrade_write(&device->client_data_rwsem);
+
+ /*
+ * Notice we cannot be holding any exclusive locks when calling the
+ * remove callback as the remove callback can recurse back into any
+ * public functions in this module and thus try for any locks those
+ * functions take.
+ *
+ * For this reason clients and drivers should not call the
+ * unregistration functions will holdling any locks.
+ *
+ * It tempting to drop the client_data_rwsem too, but this is required
+ * to ensure that unregister_client does not return until all clients
+ * are completely unregistered, which is required to avoid module
+ * unloading races.
+ */
+ if (client->remove)
+ client->remove(device, client_data);
+
+ xa_erase(&device->client_data, client_id);
+ up_read(&device->client_data_rwsem);
}
static int verify_immutable(const struct ib_device *dev, u8 port)
@@ -424,9 +529,10 @@ static int setup_port_pkey_list(struct ib_device *device)
static void ib_policy_change_task(struct work_struct *work)
{
struct ib_device *dev;
+ unsigned long index;
- down_read(&lists_rwsem);
- list_for_each_entry(dev, &device_list, core_list) {
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
int i;
for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) {
@@ -442,7 +548,7 @@ static void ib_policy_change_task(struct work_struct *work)
ib_security_cache_change(dev, i, sp);
}
}
- up_read(&lists_rwsem);
+ up_read(&devices_rwsem);
}
static int ib_security_change(struct notifier_block *nb, unsigned long event,
@@ -452,32 +558,58 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event,
return NOTIFY_DONE;
schedule_work(&ib_policy_change_work);
+ ib_mad_agent_security_change();
return NOTIFY_OK;
}
-/**
- * __dev_new_index - allocate an device index
- *
- * Returns a suitable unique value for a new device interface
- * number. It assumes that there are less than 2^32-1 ib devices
- * will be present in the system.
+/*
+ * Assign the unique string device name and the unique device index.
*/
-static u32 __dev_new_index(void)
+static int assign_name(struct ib_device *device, const char *name)
{
- /*
- * The device index to allow stable naming.
- * Similar to struct net -> ifindex.
- */
- static u32 index;
+ static u32 last_id;
+ int ret;
- for (;;) {
- if (!(++index))
- index = 1;
+ down_write(&devices_rwsem);
+ /* Assign a unique name to the device */
+ if (strchr(name, '%'))
+ ret = alloc_name(device, name);
+ else
+ ret = dev_set_name(&device->dev, name);
+ if (ret)
+ goto out;
- if (!__ib_device_get_by_index(index))
- return index;
+ if (__ib_device_get_by_name(dev_name(&device->dev))) {
+ ret = -ENFILE;
+ goto out;
}
+ strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+
+ /* Cyclically allocate a user visible ID for the device */
+ device->index = last_id;
+ ret = xa_alloc(&devices, &device->index, INT_MAX, device, GFP_KERNEL);
+ if (ret == -ENOSPC) {
+ device->index = 0;
+ ret = xa_alloc(&devices, &device->index, INT_MAX, device,
+ GFP_KERNEL);
+ }
+ if (ret)
+ goto out;
+ last_id = device->index + 1;
+
+ ret = 0;
+
+out:
+ up_write(&devices_rwsem);
+ return ret;
+}
+
+static void release_name(struct ib_device *device)
+{
+ down_write(&devices_rwsem);
+ xa_erase(&devices, device->index);
+ up_write(&devices_rwsem);
}
static void setup_dma_device(struct ib_device *device)
@@ -515,19 +647,18 @@ static void setup_dma_device(struct ib_device *device)
}
}
-static void cleanup_device(struct ib_device *device)
-{
- ib_cache_cleanup_one(device);
- ib_cache_release_one(device);
- kfree(device->port_pkey_list);
- kfree(device->port_immutable);
-}
-
+/*
+ * setup_device() allocates memory and sets up data that requires calling the
+ * device ops, this is the only reason these actions are not done during
+ * ib_alloc_device. It is undone by ib_dealloc_device().
+ */
static int setup_device(struct ib_device *device)
{
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
int ret;
+ setup_dma_device(device);
+
ret = ib_device_check_mandatory(device);
if (ret)
return ret;
@@ -544,28 +675,64 @@ static int setup_device(struct ib_device *device)
if (ret) {
dev_warn(&device->dev,
"Couldn't query the device attributes\n");
- goto port_cleanup;
+ return ret;
}
ret = setup_port_pkey_list(device);
if (ret) {
dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
- goto port_cleanup;
+ return ret;
}
- ret = ib_cache_setup_one(device);
- if (ret) {
- dev_warn(&device->dev,
- "Couldn't set up InfiniBand P_Key/GID cache\n");
- goto pkey_cleanup;
- }
return 0;
+}
-pkey_cleanup:
- kfree(device->port_pkey_list);
-port_cleanup:
- kfree(device->port_immutable);
- return ret;
+static void disable_device(struct ib_device *device)
+{
+ struct ib_client *client;
+
+ WARN_ON(!refcount_read(&device->refcount));
+
+ down_write(&devices_rwsem);
+ xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
+ up_write(&devices_rwsem);
+
+ down_read(&clients_rwsem);
+ list_for_each_entry_reverse(client, &client_list, list)
+ remove_client_context(device, client->client_id);
+ up_read(&clients_rwsem);
+
+ /* Pairs with refcount_set in enable_device */
+ ib_device_put(device);
+ wait_for_completion(&device->unreg_completion);
+}
+
+/*
+ * An enabled device is visible to all clients and to all the public facing
+ * APIs that return a device pointer.
+ */
+static int enable_device(struct ib_device *device)
+{
+ struct ib_client *client;
+ unsigned long index;
+ int ret;
+
+ refcount_set(&device->refcount, 1);
+ down_write(&devices_rwsem);
+ xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
+ up_write(&devices_rwsem);
+
+ down_read(&clients_rwsem);
+ xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+ ret = add_client_context(device, client);
+ if (ret) {
+ up_read(&clients_rwsem);
+ disable_device(device);
+ return ret;
+ }
+ }
+ up_read(&clients_rwsem);
+ return 0;
}
/**
@@ -580,32 +747,21 @@ port_cleanup:
int ib_register_device(struct ib_device *device, const char *name)
{
int ret;
- struct ib_client *client;
-
- setup_dma_device(device);
-
- mutex_lock(&device_mutex);
- if (strchr(name, '%')) {
- ret = alloc_name(device, name);
- if (ret)
- goto out;
- } else {
- ret = dev_set_name(&device->dev, name);
- if (ret)
- goto out;
- }
- if (__ib_device_get_by_name(dev_name(&device->dev))) {
- ret = -ENFILE;
- goto out;
- }
- strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+ ret = assign_name(device, name);
+ if (ret)
+ return ret;
ret = setup_device(device);
if (ret)
goto out;
- device->index = __dev_new_index();
+ ret = ib_cache_setup_one(device);
+ if (ret) {
+ dev_warn(&device->dev,
+ "Couldn't set up InfiniBand P_Key/GID cache\n");
+ goto out;
+ }
ib_device_register_rdmacg(device);
@@ -616,24 +772,19 @@ int ib_register_device(struct ib_device *device, const char *name)
goto cg_cleanup;
}
- refcount_set(&device->refcount, 1);
- device->reg_state = IB_DEV_REGISTERED;
-
- list_for_each_entry(client, &client_list, list)
- if (!add_client_context(device, client) && client->add)
- client->add(device);
+ ret = enable_device(device);
+ if (ret)
+ goto sysfs_cleanup;
- down_write(&lists_rwsem);
- list_add_tail(&device->core_list, &device_list);
- up_write(&lists_rwsem);
- mutex_unlock(&device_mutex);
return 0;
+sysfs_cleanup:
+ ib_device_unregister_sysfs(device);
cg_cleanup:
ib_device_unregister_rdmacg(device);
- cleanup_device(device);
+ ib_cache_cleanup_one(device);
out:
- mutex_unlock(&device_mutex);
+ release_name(device);
return ret;
}
EXPORT_SYMBOL(ib_register_device);
@@ -646,55 +797,44 @@ EXPORT_SYMBOL(ib_register_device);
*/
void ib_unregister_device(struct ib_device *device)
{
- struct ib_client_data *context, *tmp;
- unsigned long flags;
-
- /*
- * Wait for all netlink command callers to finish working on the
- * device.
- */
- ib_device_put(device);
- wait_for_completion(&device->unreg_completion);
-
- mutex_lock(&device_mutex);
-
- down_write(&lists_rwsem);
- list_del(&device->core_list);
- write_lock_irq(&device->client_data_lock);
- list_for_each_entry(context, &device->client_data_list, list)
- context->going_down = true;
- write_unlock_irq(&device->client_data_lock);
- downgrade_write(&lists_rwsem);
-
- list_for_each_entry(context, &device->client_data_list, list) {
- if (context->client->remove)
- context->client->remove(device, context->data);
- }
- up_read(&lists_rwsem);
-
+ disable_device(device);
ib_device_unregister_sysfs(device);
ib_device_unregister_rdmacg(device);
-
- mutex_unlock(&device_mutex);
-
ib_cache_cleanup_one(device);
+ release_name(device);
+}
+EXPORT_SYMBOL(ib_unregister_device);
- ib_security_destroy_port_pkey_list(device);
- kfree(device->port_pkey_list);
+static int assign_client_id(struct ib_client *client)
+{
+ int ret;
- down_write(&lists_rwsem);
- write_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list,
- list) {
- list_del(&context->list);
- kfree(context);
- }
- write_unlock_irqrestore(&device->client_data_lock, flags);
- up_write(&lists_rwsem);
+ down_write(&clients_rwsem);
+ /*
+ * The add/remove callbacks must be called in FIFO/LIFO order. To
+ * achieve this we assign client_ids so they are sorted in
+ * registration order, and retain a linked list we can reverse iterate
+ * to get the LIFO order. The extra linked list can go away if xarray
+ * learns to reverse iterate.
+ */
+ if (list_empty(&client_list))
+ client->client_id = 0;
+ else
+ client->client_id =
+ list_last_entry(&client_list, struct ib_client, list)
+ ->client_id;
+ ret = xa_alloc(&clients, &client->client_id, INT_MAX, client,
+ GFP_KERNEL);
+ if (ret)
+ goto out;
- device->reg_state = IB_DEV_UNREGISTERED;
+ xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
+ list_add_tail(&client->list, &client_list);
+
+out:
+ up_write(&clients_rwsem);
+ return ret;
}
-EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_register_client - Register an IB client
@@ -712,19 +852,23 @@ EXPORT_SYMBOL(ib_unregister_device);
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
+ unsigned long index;
+ int ret;
- mutex_lock(&device_mutex);
-
- list_for_each_entry(device, &device_list, core_list)
- if (!add_client_context(device, client) && client->add)
- client->add(device);
-
- down_write(&lists_rwsem);
- list_add_tail(&client->list, &client_list);
- up_write(&lists_rwsem);
-
- mutex_unlock(&device_mutex);
+ ret = assign_client_id(client);
+ if (ret)
+ return ret;
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
+ ret = add_client_context(device, client);
+ if (ret) {
+ up_read(&devices_rwsem);
+ ib_unregister_client(client);
+ return ret;
+ }
+ }
+ up_read(&devices_rwsem);
return 0;
}
EXPORT_SYMBOL(ib_register_client);
@@ -736,108 +880,56 @@ EXPORT_SYMBOL(ib_register_client);
* Upper level users use ib_unregister_client() to remove their client
* registration. When ib_unregister_client() is called, the client
* will receive a remove callback for each IB device still registered.
+ *
+ * This is a full fence, once it returns no client callbacks will be called,
+ * or are running in another thread.
*/
void ib_unregister_client(struct ib_client *client)
{
- struct ib_client_data *context;
struct ib_device *device;
+ unsigned long index;
- mutex_lock(&device_mutex);
+ down_write(&clients_rwsem);
+ xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
+ up_write(&clients_rwsem);
+ /*
+ * Every device still known must be serialized to make sure we are
+ * done with the client callbacks before we return.
+ */
+ down_read(&devices_rwsem);
+ xa_for_each (&devices, index, device)
+ remove_client_context(device, client->client_id);
+ up_read(&devices_rwsem);
- down_write(&lists_rwsem);
+ down_write(&clients_rwsem);
list_del(&client->list);
- up_write(&lists_rwsem);
-
- list_for_each_entry(device, &device_list, core_list) {
- struct ib_client_data *found_context = NULL;
-
- down_write(&lists_rwsem);
- write_lock_irq(&device->client_data_lock);
- list_for_each_entry(context, &device->client_data_list, list)
- if (context->client == client) {
- context->going_down = true;
- found_context = context;
- break;
- }
- write_unlock_irq(&device->client_data_lock);
- up_write(&lists_rwsem);
-
- if (client->remove)
- client->remove(device, found_context ?
- found_context->data : NULL);
-
- if (!found_context) {
- dev_warn(&device->dev,
- "No client context found for %s\n",
- client->name);
- continue;
- }
-
- down_write(&lists_rwsem);
- write_lock_irq(&device->client_data_lock);
- list_del(&found_context->list);
- write_unlock_irq(&device->client_data_lock);
- up_write(&lists_rwsem);
- kfree(found_context);
- }
-
- mutex_unlock(&device_mutex);
+ xa_erase(&clients, client->client_id);
+ up_write(&clients_rwsem);
}
EXPORT_SYMBOL(ib_unregister_client);
/**
- * ib_get_client_data - Get IB client context
- * @device:Device to get context for
- * @client:Client to get context for
- *
- * ib_get_client_data() returns client context set with
- * ib_set_client_data().
- */
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
-{
- struct ib_client_data *context;
- void *ret = NULL;
- unsigned long flags;
-
- read_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry(context, &device->client_data_list, list)
- if (context->client == client) {
- ret = context->data;
- break;
- }
- read_unlock_irqrestore(&device->client_data_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_get_client_data);
-
-/**
* ib_set_client_data - Set IB client context
* @device:Device to set context for
* @client:Client to set context for
* @data:Context to set
*
- * ib_set_client_data() sets client context that can be retrieved with
- * ib_get_client_data().
+ * ib_set_client_data() sets client context data that can be retrieved with
+ * ib_get_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
*/
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data)
{
- struct ib_client_data *context;
- unsigned long flags;
-
- write_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry(context, &device->client_data_list, list)
- if (context->client == client) {
- context->data = data;
- goto out;
- }
+ void *rc;
- dev_warn(&device->dev, "No client context found for %s\n",
- client->name);
+ if (WARN_ON(IS_ERR(data)))
+ data = NULL;
-out:
- write_unlock_irqrestore(&device->client_data_lock, flags);
+ rc = xa_store(&device->client_data, client->client_id, data,
+ GFP_KERNEL);
+ WARN_ON(xa_is_err(rc));
}
EXPORT_SYMBOL(ib_set_client_data);
@@ -995,11 +1087,12 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
void *cookie)
{
struct ib_device *dev;
+ unsigned long index;
- down_read(&lists_rwsem);
- list_for_each_entry(dev, &device_list, core_list)
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
- up_read(&lists_rwsem);
+ up_read(&devices_rwsem);
}
/**
@@ -1011,19 +1104,19 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct netlink_callback *cb)
{
+ unsigned long index;
struct ib_device *dev;
unsigned int idx = 0;
int ret = 0;
- down_read(&lists_rwsem);
- list_for_each_entry(dev, &device_list, core_list) {
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
ret = nldev_cb(dev, skb, cb, idx);
if (ret)
break;
idx++;
}
-
- up_read(&lists_rwsem);
+ up_read(&devices_rwsem);
return ret;
}
@@ -1181,6 +1274,7 @@ EXPORT_SYMBOL(ib_find_pkey);
* @gid: A GID that the net_dev uses to communicate.
* @addr: Contains the IP address that the request specified as its
* destination.
+ *
*/
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
u8 port,
@@ -1189,29 +1283,30 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
const struct sockaddr *addr)
{
struct net_device *net_dev = NULL;
- struct ib_client_data *context;
+ unsigned long index;
+ void *client_data;
if (!rdma_protocol_ib(dev, port))
return NULL;
- down_read(&lists_rwsem);
-
- list_for_each_entry(context, &dev->client_data_list, list) {
- struct ib_client *client = context->client;
+ /*
+ * Holding the read side guarantees that the client will not become
+ * unregistered while we are calling get_net_dev_by_params()
+ */
+ down_read(&dev->client_data_rwsem);
+ xan_for_each_marked (&dev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
- if (context->going_down)
+ if (!client || !client->get_net_dev_by_params)
continue;
- if (client->get_net_dev_by_params) {
- net_dev = client->get_net_dev_by_params(dev, port, pkey,
- gid, addr,
- context->data);
- if (net_dev)
- break;
- }
+ net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
+ addr, client_data);
+ if (net_dev)
+ break;
}
-
- up_read(&lists_rwsem);
+ up_read(&dev->client_data_rwsem);
return net_dev;
}
@@ -1227,6 +1322,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
(ptr)->name = ops->name; \
} while (0)
+#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
+
SET_DEVICE_OP(dev_ops, add_gid);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
@@ -1316,6 +1413,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
SET_DEVICE_OP(dev_ops, unmap_fmr);
+
+ SET_OBJ_SIZE(dev_ops, ib_pd);
}
EXPORT_SYMBOL(ib_set_device_ops);
@@ -1434,6 +1533,8 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
+ WARN_ON(!xa_empty(&clients));
+ WARN_ON(!xa_empty(&devices));
}
MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1efadbccf394..dad6a94a43f3 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -39,6 +39,10 @@
#include "core_priv.h"
#include "mad_priv.h"
+static LIST_HEAD(mad_agent_list);
+/* Lock to protect mad_agent_list */
+static DEFINE_SPINLOCK(mad_agent_list_lock);
+
static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
{
struct pkey_index_qp_list *pkey = NULL;
@@ -554,13 +558,12 @@ void ib_security_cache_change(struct ib_device *device,
}
}
-void ib_security_destroy_port_pkey_list(struct ib_device *device)
+void ib_security_release_port_pkey_list(struct ib_device *device)
{
struct pkey_index_qp_list *pkey, *tmp_pkey;
int i;
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
- spin_lock(&device->port_pkey_list[i].list_lock);
list_for_each_entry_safe(pkey,
tmp_pkey,
&device->port_pkey_list[i].pkey_list,
@@ -568,7 +571,6 @@ void ib_security_destroy_port_pkey_list(struct ib_device *device)
list_del(&pkey->pkey_index_list);
kfree(pkey);
}
- spin_unlock(&device->port_pkey_list[i].list_lock);
}
}
@@ -676,19 +678,18 @@ static int ib_security_pkey_access(struct ib_device *dev,
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
-static int ib_mad_agent_security_change(struct notifier_block *nb,
- unsigned long event,
- void *data)
+void ib_mad_agent_security_change(void)
{
- struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb);
-
- if (event != LSM_POLICY_CHANGE)
- return NOTIFY_DONE;
-
- ag->smp_allowed = !security_ib_endport_manage_subnet(
- ag->security, dev_name(&ag->device->dev), ag->port_num);
-
- return NOTIFY_OK;
+ struct ib_mad_agent *ag;
+
+ spin_lock(&mad_agent_list_lock);
+ list_for_each_entry(ag,
+ &mad_agent_list,
+ mad_agent_sec_list)
+ WRITE_ONCE(ag->smp_allowed,
+ !security_ib_endport_manage_subnet(ag->security,
+ dev_name(&ag->device->dev), ag->port_num));
+ spin_unlock(&mad_agent_list_lock);
}
int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
@@ -699,6 +700,8 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
if (!rdma_protocol_ib(agent->device, agent->port_num))
return 0;
+ INIT_LIST_HEAD(&agent->mad_agent_sec_list);
+
ret = security_ib_alloc_security(&agent->security);
if (ret)
return ret;
@@ -706,20 +709,22 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
if (qp_type != IB_QPT_SMI)
return 0;
+ spin_lock(&mad_agent_list_lock);
ret = security_ib_endport_manage_subnet(agent->security,
dev_name(&agent->device->dev),
agent->port_num);
if (ret)
- return ret;
-
- agent->lsm_nb.notifier_call = ib_mad_agent_security_change;
- ret = register_lsm_notifier(&agent->lsm_nb);
- if (ret)
- return ret;
+ goto free_security;
- agent->smp_allowed = true;
- agent->lsm_nb_reg = true;
+ WRITE_ONCE(agent->smp_allowed, true);
+ list_add(&agent->mad_agent_sec_list, &mad_agent_list);
+ spin_unlock(&mad_agent_list_lock);
return 0;
+
+free_security:
+ spin_unlock(&mad_agent_list_lock);
+ security_ib_free_security(agent->security);
+ return ret;
}
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
@@ -727,9 +732,13 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
if (!rdma_protocol_ib(agent->device, agent->port_num))
return;
+ if (agent->qp->qp_type == IB_QPT_SMI) {
+ spin_lock(&mad_agent_list_lock);
+ list_del(&agent->mad_agent_sec_list);
+ spin_unlock(&mad_agent_list_lock);
+ }
+
security_ib_free_security(agent->security);
- if (agent->lsm_nb_reg)
- unregister_lsm_notifier(&agent->lsm_nb);
}
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
@@ -738,7 +747,7 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
return 0;
if (map->agent.qp->qp_type == IB_QPT_SMI) {
- if (!map->agent.smp_allowed)
+ if (!READ_ONCE(map->agent.smp_allowed))
return -EACCES;
return 0;
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 01d68ed46c1b..7468b26b8a01 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1236,6 +1236,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
break;
+ case RDMA_OPTION_ID_ACK_TIMEOUT:
+ if (optlen != sizeof(u8)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
+ break;
default:
ret = -ENOSYS;
}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index aa260cafbd85..5ac143f22df0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -407,9 +407,9 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
- if (IS_ERR(pd)) {
- ret = PTR_ERR(pd);
+ pd = rdma_zalloc_drv_obj(ib_dev, ib_pd);
+ if (!pd) {
+ ret = -ENOMEM;
goto err;
}
@@ -417,11 +417,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
pd->uobject = uobj;
pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
+ pd->res.type = RDMA_RESTRACK_PD;
+
+ ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata);
+ if (ret)
+ goto err_alloc;
uobj->object = pd;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
- pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_uadd(&pd->res);
ret = uverbs_response(attrs, &resp, sizeof(resp));
@@ -432,7 +436,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
err_copy:
ib_dealloc_pd(pd);
-
+err_alloc:
+ kfree(pd);
err:
uobj_alloc_abort(uobj);
return ret;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index cbc72312eb41..f224cb727224 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -188,7 +188,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
if (ret)
return ret;
- ib_dealloc_pd((struct ib_pd *)uobject->object);
+ ib_dealloc_pd(pd);
return 0;
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3220fb42ecce..de5d895a5054 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -254,10 +254,11 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
{
struct ib_pd *pd;
int mr_access_flags = 0;
+ int ret;
- pd = device->ops.alloc_pd(device, NULL, NULL);
- if (IS_ERR(pd))
- return pd;
+ pd = rdma_zalloc_drv_obj(device, ib_pd);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
pd->device = device;
pd->uobject = NULL;
@@ -265,6 +266,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
atomic_set(&pd->usecnt, 0);
pd->flags = flags;
+ pd->res.type = RDMA_RESTRACK_PD;
+ rdma_restrack_set_task(&pd->res, caller);
+
+ ret = device->ops.alloc_pd(pd, NULL, NULL);
+ if (ret) {
+ kfree(pd);
+ return ERR_PTR(ret);
+ }
+ rdma_restrack_kadd(&pd->res);
+
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
@@ -275,10 +286,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
}
- pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_set_task(&pd->res, caller);
- rdma_restrack_kadd(&pd->res);
-
if (mr_access_flags) {
struct ib_mr *mr;
@@ -329,10 +336,8 @@ void ib_dealloc_pd(struct ib_pd *pd)
WARN_ON(atomic_read(&pd->usecnt));
rdma_restrack_del(&pd->res);
- /* Making delalloc_pd a void return is a WIP, no driver should return
- an error here. */
- ret = pd->device->ops.dealloc_pd(pd);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
+ pd->device->ops.dealloc_pd(pd);
+ kfree(pd);
}
EXPORT_SYMBOL(ib_dealloc_pd);