summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2020-10-26 16:23:14 +0300
committerJason Gunthorpe <jgg@nvidia.com>2020-11-02 22:10:50 +0300
commitd5c7916fe4613e9128b0f877f7e2dd0c85f5d2d2 (patch)
tree69c1fb067c7b813ddbd2c5156f190e43bc01e9fa
parentf1eaac37da20823816af274d69a9eed7444e9822 (diff)
downloadlinux-d5c7916fe4613e9128b0f877f7e2dd0c85f5d2d2.tar.xz
RDMA/mlx5: Use ib_umem_find_best_pgsz() for mkc's
Now that all the PAS arrays or UMR XLT's for mkcs are filled using rdma_for_each_block() we can use the common ib_umem_find_best_pgsz() algorithm. Link: https://lore.kernel.org/r/20201026132314.1336717-6-leon@kernel.org Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--drivers/infiniband/core/umem.c9
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h27
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c37
3 files changed, 56 insertions, 17 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e9fecbdf391b..f1fc7e39c782 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
dma_addr_t mask;
int i;
+ if (umem->is_odp) {
+ unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
+
+ /* ODP must always be self consistent. */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+ return page_size;
+ }
+
/* rdma_for_each_block() has a bug if the page size is smaller than the
* page size used to build the umem. For now prevent smaller page sizes
* from being returned.
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index aadd43425a58..bb44080170be 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -42,6 +42,33 @@
#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+static __always_inline unsigned long
+__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
+ unsigned int pgsz_shift)
+{
+ unsigned int largest_pg_shift =
+ min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift,
+ BITS_PER_LONG - 1);
+
+ /*
+ * Despite a command allowing it, the device does not support lower than
+ * 4k page size.
+ */
+ pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift);
+ return GENMASK(largest_pg_shift, pgsz_shift);
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \
+ ib_umem_find_best_pgsz(umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), \
+ pgsz_shift), \
+ iova)
+
enum {
MLX5_IB_MMAP_OFFSET_START = 9,
MLX5_IB_MMAP_OFFSET_END = 255,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3fa3809c2660..b091d84ba435 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -964,11 +964,13 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
- int page_shift;
+ unsigned int page_size;
- mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift);
- ent = mr_cache_ent_from_order(dev, order_base_2(ib_umem_num_dma_blocks(
- umem, 1UL << page_shift)));
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
+ ent = mr_cache_ent_from_order(
+ dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
if (!ent)
return ERR_PTR(-E2BIG);
@@ -990,7 +992,7 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
mr->mmkey.iova = iova;
mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
- mr->page_shift = page_shift;
+ mr->page_shift = order_base_2(page_size);
return mr;
}
@@ -1280,8 +1282,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int access_flags, bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ unsigned int page_size;
struct mlx5_ib_mr *mr;
- int page_shift;
__be64 *pas;
void *mkc;
int inlen;
@@ -1289,22 +1291,23 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
+ page_size =
+ mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
+
mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift);
-
- mr->page_shift = page_shift;
mr->ibmr.pd = pd;
mr->access_flags = access_flags;
+ mr->page_shift = order_base_2(page_size);
inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
if (populate)
- inlen +=
- sizeof(*pas) *
- roundup(ib_umem_num_dma_blocks(umem, 1UL << page_shift),
- 2);
+ inlen += sizeof(*pas) *
+ roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1316,7 +1319,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err = -EINVAL;
goto err_2;
}
- mlx5_ib_populate_pas(umem, 1ULL << page_shift, pas,
+ mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
}
@@ -1334,11 +1337,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(iova, umem->length, page_shift));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ get_octo_len(iova, umem->length, mr->page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(iova, umem->length, page_shift));
+ get_octo_len(iova, umem->length, mr->page_shift));
}
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);