summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c13
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c2
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c33
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c115
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c67
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c10
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c30
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c89
12 files changed, 283 insertions, 93 deletions
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 505bc47fd575..531a57f9ee7e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
return sport;
}
-static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
struct rdma_ah_init_attr *init_attr)
{
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;
+ int rate_val;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -67,7 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
+ rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr));
+ if (rate_val < 0)
+ return rate_val;
+ ah->av.stat_rate_sl = rate_val << 4;
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (init_attr->xmit_slave)
@@ -88,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
+
+ return 0;
}
int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
@@ -120,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
return err;
}
- create_ib_ah(dev, ah, init_attr);
- return 0;
+ return create_ib_ah(dev, ah, init_attr);
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 4f6c1968a2ee..ad6c195d077b 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -391,7 +391,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@@ -411,7 +411,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
- ret = mlx5_lag_query_cong_counters(dev->mdev,
+ ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
@@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
struct ib_qp *qp)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ bool new = false;
int err;
if (!counter->id) {
@@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
return err;
counter->id =
MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ new = true;
}
err = mlx5_ib_qp_set_counter(qp, counter);
@@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
return 0;
fail_set_counter:
- mlx5_ib_counter_dealloc(counter);
- counter->id = 0;
+ if (new) {
+ mlx5_ib_counter_dealloc(counter);
+ counter->id = 0;
+ }
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 4c54dc578069..1aa5311b03e9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -490,7 +490,7 @@ repoll:
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
- if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
/* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 69999d8d24f3..f49f78b69ab9 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1914,6 +1914,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@@ -1922,7 +1923,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
- INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index b4c97fb62abf..9ded2b7c1e31 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -282,7 +282,7 @@ static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx,
int err;
u64 address;
- if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic))
+ if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic))
return ERR_PTR(-EOPNOTSUPP);
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 8c47cb4edd0a..435c456a4fd5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1766,6 +1766,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int err;
+
+ err = mlx5_nic_vport_update_local_lb(master, true);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
+
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
+}
+
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
@@ -3448,6 +3475,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
+
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@@ -3543,6 +3572,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
return true;
unbind:
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 45d9dc9c6c8f..726b81b6330c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -56,7 +56,7 @@ static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode);
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
@@ -919,6 +919,25 @@ mkeys_err:
return ERR_PTR(ret);
}
+static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
+{
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+
+ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+}
+
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mkey_cache *cache = &dev->cache;
@@ -970,6 +989,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
err:
mutex_unlock(&cache->rb_lock);
mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5r_destroy_cache_entries(dev);
+ destroy_workqueue(cache->wq);
mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
return ret;
}
@@ -1003,17 +1024,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
/* At this point all entries are disabled and have no concurrent work. */
- mutex_lock(&dev->cache.rb_lock);
- node = rb_first(root);
- while (node) {
- ent = rb_entry(node, struct mlx5_cache_ent, node);
- node = rb_next(node);
- clean_keys(dev, ent);
- rb_erase(&ent->node, root);
- mlx5r_mkeys_uninit(ent);
- kfree(ent);
- }
- mutex_unlock(&dev->cache.rb_lock);
+ mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
del_timer_sync(&dev->delay_timer);
@@ -1115,7 +1126,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct mlx5r_cache_rb_key rb_key = {};
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
- unsigned int page_size;
+ unsigned long page_size;
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
@@ -1219,7 +1230,7 @@ err_1:
*/
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -1425,7 +1436,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT);
} else {
- unsigned int page_size =
+ unsigned long page_size =
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
mutex_lock(&dev->slow_path_mutex);
@@ -1550,7 +1561,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
- if (!umem_dmabuf->sgt)
+ if (!umem_dmabuf->sgt || !mr)
return;
mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
@@ -1935,7 +1946,8 @@ err:
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (!mr->umem && !mr->data_direct && mr->descs) {
+ if (!mr->umem && !mr->data_direct &&
+ mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1956,7 +1968,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
if (mr->mmkey.cache_ent) {
spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
- mr->mmkey.cache_ent->in_use--;
goto end;
}
@@ -2017,15 +2028,55 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
+
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+
+ return ret;
+}
+
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
+ if (from_cache)
+ ent->in_use--;
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(30 * 1000));
@@ -2041,7 +2092,27 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
mr->mmkey.cache_ent = NULL;
spin_unlock_irq(&ent->mkeys_queue.lock);
}
- return destroy_mkey(dev, mr);
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+ ret = destroy_mkey(dev, mr);
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+ return ret;
}
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
@@ -2089,7 +2160,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
/* Stop DMA */
- rc = mlx5_revoke_mr(mr);
+ rc = mlx5r_handle_mkey_cleanup(mr);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4b37446758fd..98a76c9db7ab 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -228,13 +228,28 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *imr = mr->parent;
+ /*
+ * If userspace is racing freeing the parent implicit ODP MR then we can
+ * loose the race with parent destruction. In this case
+ * mlx5_ib_free_odp_mr() will free everything in the implicit_children
+ * xarray so NOP is fine. This child MR cannot be destroyed here because
+ * we are under its umem_mutex.
+ */
if (!refcount_inc_not_zero(&imr->mmkey.usecount))
return;
- xa_erase(&imr->implicit_children, idx);
+ xa_lock(&imr->implicit_children);
+ if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) !=
+ mr) {
+ xa_unlock(&imr->implicit_children);
+ mlx5r_deref_odp_mkey(&imr->mmkey);
+ return;
+ }
+
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
xa_erase(&mr_to_mdev(mr)->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key));
+ xa_unlock(&imr->implicit_children);
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
@@ -268,6 +283,8 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
if (!umem_odp->npages)
goto out;
mr = umem_odp->private;
+ if (!mr)
+ goto out;
start = max_t(u64, ib_umem_start(umem_odp), range->start);
end = min_t(u64, ib_umem_end(umem_odp), range->end);
@@ -292,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
blk_start_idx = idx;
in_block = 1;
}
-
- /* Count page invalidations */
- invalidations += idx - blk_start_idx + 1;
} else {
u64 umr_offset = idx & umr_block_mask;
@@ -304,14 +318,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
}
}
}
- if (in_block)
+ if (in_block) {
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
+ }
mlx5_update_odp_stats(mr, invalidations, invalidations);
@@ -500,18 +519,18 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
refcount_inc(&ret->mmkey.usecount);
goto out_lock;
}
- xa_unlock(&imr->implicit_children);
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
- xa_erase(&imr->implicit_children, idx);
- goto out_mr;
+ __xa_erase(&imr->implicit_children, idx);
+ goto out_lock;
}
mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD;
}
+ xa_unlock(&imr->implicit_children);
mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;
@@ -944,8 +963,7 @@ out:
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
- * Returns number of OS pages retrieved on success. The caller may continue to
- * the next data segment.
+ * Returns zero on success. The caller may continue to the next data segment.
* Can return the following error codes:
* -EAGAIN to designate a temporary error. The caller will abort handling the
* page fault and resolve it.
@@ -958,7 +976,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 *bytes_committed,
u32 *bytes_mapped)
{
- int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0;
+ int ret, i, outlen, cur_outlen = 0, depth = 0, pages_in_range;
struct pf_frame *head = NULL, *frame;
struct mlx5_ib_mkey *mmkey;
struct mlx5_ib_mr *mr;
@@ -993,13 +1011,20 @@ next_mr:
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ pages_in_range = (ALIGN(io_virt + bcnt, PAGE_SIZE) -
+ (io_virt & PAGE_MASK)) >>
+ PAGE_SHIFT;
ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false);
if (ret < 0)
goto end;
mlx5_update_odp_stats(mr, faults, ret);
- npages += ret;
+ if (ret < pages_in_range) {
+ ret = -EFAULT;
+ goto end;
+ }
+
ret = 0;
break;
@@ -1090,7 +1115,7 @@ end:
kfree(out);
*bytes_committed = 0;
- return ret ? ret : npages;
+ return ret;
}
/*
@@ -1109,8 +1134,7 @@ end:
* the committed bytes).
* @receive_queue: receive WQE end of sg list
*
- * Returns the number of pages loaded if positive, zero for an empty WQE, or a
- * negative error code.
+ * Returns zero for success or a negative error code.
*/
static int pagefault_data_segments(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
@@ -1118,7 +1142,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
void *wqe_end, u32 *bytes_mapped,
u32 *total_wqe_bytes, bool receive_queue)
{
- int ret = 0, npages = 0;
+ int ret = 0;
u64 io_virt;
__be32 key;
u32 byte_count;
@@ -1175,10 +1199,9 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
bytes_mapped);
if (ret < 0)
break;
- npages += ret;
}
- return ret < 0 ? ret : npages;
+ return ret;
}
/*
@@ -1414,12 +1437,6 @@ resolve_page_fault:
free_page((unsigned long)wqe_start);
}
-static int pages_in_range(u64 address, u32 length)
-{
- return (ALIGN(address + length, PAGE_SIZE) -
- (address & PAGE_MASK)) >> PAGE_SHIFT;
-}
-
static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault)
{
@@ -1458,7 +1475,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
- } else if (ret < 0 || pages_in_range(address, length) > ret) {
+ } else if (ret < 0) {
mlx5_ib_page_fault_resume(dev, pfault, 1);
if (ret != -ENOENT)
mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n",
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 10ce3b44f645..ded139b4e87a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3420,11 +3420,11 @@ static int ib_to_mlx5_rate_map(u8 rate)
return 0;
}
-static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
{
u32 stat_rate_support;
- if (rate == IB_RATE_PORT_CURRENT)
+ if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS)
return 0;
if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS)
@@ -3569,7 +3569,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
sizeof(grh->dgid.raw));
}
- err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
+ err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah));
if (err < 0)
return err;
MLX5_SET(ads, path, stat_rate, err);
@@ -4547,6 +4547,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
MLX5_SET(dctc, dctc, counter_set_id, set_id);
+
+ qp->port = attr->port_num;
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
struct mlx5_ib_modify_qp_resp resp = {};
u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
@@ -5033,7 +5035,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
}
if (qp_attr_mask & IB_QP_PORT)
- qp_attr->port_num = MLX5_GET(dctc, dctc, port);
+ qp_attr->port_num = mqp->port;
if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
if (qp_attr_mask & IB_QP_AV) {
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index b6ee7c3ee1ca..2530e7730635 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
int mlx5_ib_qp_event_init(void);
void mlx5_ib_qp_event_cleanup(void);
+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate);
#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index d3dcc272200a..146d03ae40bd 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
spin_lock_irqsave(&table->lock, flags);
common = radix_tree_lookup(&table->tree, rsn);
- if (common)
+ if (common && !common->invalid)
refcount_inc(&common->refcount);
+ else
+ common = NULL;
spin_unlock_irqrestore(&table->lock, flags);
@@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev,
return 0;
}
+static void modify_resource_common_state(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp,
+ bool invalid)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ qp->common.invalid = invalid;
+ spin_unlock_irqrestore(&table->lock, flags);
+}
+
static void destroy_resource_common(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *qp)
{
@@ -609,8 +623,20 @@ err_destroy_rq:
int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *rq)
{
+ int ret;
+
+ /* The rq destruction can be called again in case it fails, hence we
+ * mark the common resource as invalid and only once FW destruction
+ * is completed successfully we actually destroy the resources.
+ */
+ modify_resource_common_state(dev, rq, true);
+ ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ if (ret) {
+ modify_resource_common_state(dev, rq, false);
+ return ret;
+ }
destroy_resource_common(dev, rq);
- return destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ return 0;
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 887fd6fa3ba9..80c665d15218 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -32,13 +32,15 @@ static __be64 get_umr_disable_mr_mask(void)
return cpu_to_be64(result);
}
-static __be64 get_umr_update_translation_mask(void)
+static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
+ if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5))
+ result |= MLX5_MKEY_MASK_PAGE_SIZE_5;
return cpu_to_be64(result);
}
@@ -231,30 +233,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
ib_dealloc_pd(dev->umrc.pd);
}
-static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
-{
- struct umr_common *umrc = &dev->umrc;
- struct ib_qp_attr attr;
- int err;
-
- attr.qp_state = IB_QPS_RESET;
- err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
- if (err) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto err;
- }
-
- err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
- if (err)
- goto err;
-
- umrc->state = MLX5_UMR_STATE_ACTIVE;
- return 0;
-
-err:
- umrc->state = MLX5_UMR_STATE_ERR;
- return err;
-}
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
@@ -302,6 +280,61 @@ out:
return err;
}
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
+ struct mlx5r_umr_context *umr_context,
+ struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_qp_attr attr;
+ int err;
+
+ mutex_lock(&umrc->lock);
+ /* Preventing any further WRs to be sent now */
+ if (umrc->state != MLX5_UMR_STATE_RECOVER) {
+ mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
+ umrc->state);
+ umrc->state = MLX5_UMR_STATE_RECOVER;
+ }
+ mutex_unlock(&umrc->lock);
+
+ /* Sending a final/barrier WR (the failed one) and wait for its completion.
+ * This will ensure that all the previous WRs got a completion before
+ * we set the QP state to RESET.
+ */
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
+ with_data);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
+ goto err;
+ }
+
+ /* Since the QP is in an error state, it will only receive
+ * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
+ * we don't care about its status.
+ */
+ wait_for_completion(&umr_context->done);
+
+ attr.qp_state = IB_QPS_RESET;
+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+ if (err) {
+ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
+ goto err;
+ }
+
+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+ if (err) {
+ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
+ goto err;
+ }
+
+ umrc->state = MLX5_UMR_STATE_ACTIVE;
+ return 0;
+
+err:
+ umrc->state = MLX5_UMR_STATE_ERR;
+ return err;
+}
+
static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
@@ -366,9 +399,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
mlx5_ib_warn(dev,
"reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
umr_context.status, mkey);
- mutex_lock(&umrc->lock);
- err = mlx5r_umr_recover(dev);
- mutex_unlock(&umrc->lock);
+ err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
if (err)
mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
err);
@@ -625,7 +656,7 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
if (update_translation) {
- wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev);
if (!mr->ibmr.length)
MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
}