summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/odp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/odp.c')
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c188
1 files changed, 139 insertions, 49 deletions
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 01e0f6200631..335fd0c6ea2a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -101,9 +101,9 @@ static int check_parent(struct ib_umem_odp *odp,
return mr && mr->parent == parent && !odp->dying;
}
-struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
+static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
{
- if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp))
+ if (WARN_ON(!mr || !is_odp_mr(mr)))
return NULL;
return to_ib_umem_odp(mr->umem)->per_mm;
@@ -315,6 +315,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
+ if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive))
+ caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
@@ -330,6 +333,27 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+ if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive))
+ caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
@@ -439,7 +463,7 @@ next_mr:
if (nentries)
nentries++;
} else {
- odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
+ odp = ib_alloc_odp_umem(odp_mr, addr,
MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp)) {
mutex_unlock(&odp_mr->umem_mutex);
@@ -492,13 +516,13 @@ next_mr:
}
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ struct ib_udata *udata,
int access_flags)
{
- struct ib_ucontext *ctx = pd->ibpd.uobject->context;
struct mlx5_ib_mr *imr;
struct ib_umem *umem;
- umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+ umem = ib_umem_get(udata, 0, 0, IB_ACCESS_ON_DEMAND, 0);
if (IS_ERR(umem))
return ERR_CAST(umem);
@@ -685,6 +709,21 @@ struct pf_frame {
int depth;
};
+static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey)
+{
+ struct mlx5_ib_mw *mw;
+ struct mlx5_ib_devx_mr *devx_mr;
+
+ if (mmkey->type == MLX5_MKEY_MW) {
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+ return mw->ndescs;
+ }
+
+ devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr,
+ mmkey);
+ return devx_mr->ndescs;
+}
+
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@@ -705,11 +744,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mw *mw;
struct mlx5_ib_mr *mr;
struct mlx5_klm *pklm;
u32 *out = NULL;
size_t offset;
+ int ndescs;
srcu_key = srcu_read_lock(&dev->mr_srcu);
@@ -739,12 +778,12 @@ next_mr:
goto srcu_unlock;
}
- if (prefetch && !mr->umem->is_odp) {
+ if (prefetch && !is_odp_mr(mr)) {
ret = -EINVAL;
goto srcu_unlock;
}
- if (!mr->umem->is_odp) {
+ if (!is_odp_mr(mr)) {
mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
key);
if (bytes_mapped)
@@ -762,7 +801,8 @@ next_mr:
break;
case MLX5_MKEY_MW:
- mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+ case MLX5_MKEY_INDIRECT_DEVX:
+ ndescs = get_indirect_num_descs(mmkey);
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
mlx5_ib_dbg(dev, "indirection level exceeded\n");
@@ -771,7 +811,7 @@ next_mr:
}
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
- sizeof(*pklm) * (mw->ndescs - 2);
+ sizeof(*pklm) * (ndescs - 2);
if (outlen > cur_outlen) {
kfree(out);
@@ -786,14 +826,14 @@ next_mr:
pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
bsf0_klm0_pas_mtt0_1);
- ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
if (ret)
goto srcu_unlock;
offset = io_virt - MLX5_GET64(query_mkey_out, out,
memory_key_mkey_entry.start_addr);
- for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ for (i = 0; bcnt && i < ndescs; i++, pklm++) {
if (offset >= be32_to_cpu(pklm->bcount)) {
offset -= be32_to_cpu(pklm->bcount);
continue;
@@ -853,7 +893,6 @@ srcu_unlock:
/**
* Parse a series of data segments for page fault handling.
*
- * @qp the QP on which the fault occurred.
* @pfault contains page fault information.
* @wqe points at the first data segment in the WQE.
* @wqe_end points after the end of the WQE.
@@ -870,7 +909,7 @@ srcu_unlock:
*/
static int pagefault_data_segments(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
- struct mlx5_ib_qp *qp, void *wqe,
+ void *wqe,
void *wqe_end, u32 *bytes_mapped,
u32 *total_wqe_bytes, int receive_queue)
{
@@ -881,10 +920,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
size_t bcnt;
int inline_segment;
- /* Skip SRQ next-WQE segment. */
- if (receive_queue && qp->ibqp.srq)
- wqe += sizeof(struct mlx5_wqe_srq_next_seg);
-
if (bytes_mapped)
*bytes_mapped = 0;
if (total_wqe_bytes)
@@ -1009,6 +1044,10 @@ static int mlx5_ib_mr_initiator_pfault_handler(
MLX5_WQE_CTRL_OPCODE_MASK;
switch (qp->ibqp.qp_type) {
+ case IB_QPT_XRC_INI:
+ *wqe += sizeof(struct mlx5_wqe_xrc_seg);
+ transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps;
+ break;
case IB_QPT_RC:
transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
break;
@@ -1028,7 +1067,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
return -EFAULT;
}
- if (qp->ibqp.qp_type != IB_QPT_RC) {
+ if (qp->ibqp.qp_type == IB_QPT_UD) {
av = *wqe;
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
@@ -1053,21 +1092,34 @@ static int mlx5_ib_mr_initiator_pfault_handler(
}
/*
- * Parse responder WQE. Advances the wqe pointer to point at the
- * scatter-gather list, and set wqe_end to the end of the WQE.
+ * Parse responder WQE and set wqe_end to the end of the WQE.
*/
-static int mlx5_ib_mr_responder_pfault_handler(
- struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
- struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
+static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_srq *srq,
+ void **wqe, void **wqe_end,
+ int wqe_length)
{
- struct mlx5_ib_wq *wq = &qp->rq;
- int wqe_size = 1 << wq->wqe_shift;
+ int wqe_size = 1 << srq->msrq.wqe_shift;
- if (qp->ibqp.srq) {
- mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+ if (wqe_size > wqe_length) {
+ mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
return -EFAULT;
}
+ *wqe_end = *wqe + wqe_size;
+ *wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+ return 0;
+}
+
+static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ void *wqe, void **wqe_end,
+ int wqe_length)
+{
+ struct mlx5_ib_wq *wq = &qp->rq;
+ int wqe_size = 1 << wq->wqe_shift;
+
if (qp->wq_sig) {
mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
return -EFAULT;
@@ -1091,7 +1143,7 @@ invalid_transport_or_opcode:
return -EFAULT;
}
- *wqe_end = *wqe + wqe_size;
+ *wqe_end = wqe + wqe_size;
return 0;
}
@@ -1099,22 +1151,25 @@ invalid_transport_or_opcode:
static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
u32 wq_num, int pf_type)
{
- enum mlx5_res_type res_type;
+ struct mlx5_core_rsc_common *common = NULL;
+ struct mlx5_core_srq *srq;
switch (pf_type) {
case MLX5_WQE_PF_TYPE_RMP:
- res_type = MLX5_RES_SRQ;
+ srq = mlx5_cmd_get_srq(dev, wq_num);
+ if (srq)
+ common = &srq->common;
break;
case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
case MLX5_WQE_PF_TYPE_RESP:
case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
- res_type = MLX5_RES_QP;
+ common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP);
break;
default:
- return NULL;
+ break;
}
- return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+ return common;
}
static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
@@ -1124,6 +1179,14 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
return to_mibqp(mqp);
}
+static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res)
+{
+ struct mlx5_core_srq *msrq =
+ container_of(res, struct mlx5_core_srq, common);
+
+ return to_mibsrq(msrq);
+}
+
static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault)
{
@@ -1134,8 +1197,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
int resume_with_error = 1;
u16 wqe_index = pfault->wqe.wqe_index;
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
- struct mlx5_core_rsc_common *res;
- struct mlx5_ib_qp *qp;
+ struct mlx5_core_rsc_common *res = NULL;
+ struct mlx5_ib_qp *qp = NULL;
+ struct mlx5_ib_srq *srq = NULL;
+ size_t bytes_copied;
res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
if (!res) {
@@ -1147,6 +1212,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
case MLX5_RES_QP:
qp = res_to_qp(res);
break;
+ case MLX5_RES_SRQ:
+ case MLX5_RES_XSRQ:
+ srq = res_to_srq(res);
+ break;
default:
mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
goto resolve_page_fault;
@@ -1158,9 +1227,23 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE, &qp->trans_qp.base);
- if (ret < 0) {
+ if (qp) {
+ if (requestor) {
+ ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index,
+ buffer, PAGE_SIZE,
+ &bytes_copied);
+ } else {
+ ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index,
+ buffer, PAGE_SIZE,
+ &bytes_copied);
+ }
+ } else {
+ ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index,
+ buffer, PAGE_SIZE,
+ &bytes_copied);
+ }
+
+ if (ret) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n",
ret, wqe_index, pfault->token);
goto resolve_page_fault;
@@ -1168,11 +1251,18 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
wqe = buffer;
if (requestor)
- ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe,
- &wqe_end, ret);
+ ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp,
+ &wqe, &wqe_end,
+ bytes_copied);
+ else if (qp)
+ ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp,
+ wqe, &wqe_end,
+ bytes_copied);
else
- ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe,
- &wqe_end, ret);
+ ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq,
+ &wqe, &wqe_end,
+ bytes_copied);
+
if (ret < 0)
goto resolve_page_fault;
@@ -1181,7 +1271,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end,
+ ret = pagefault_data_segments(dev, pfault, wqe, wqe_end,
&bytes_mapped, &total_wqe_bytes,
!requestor);
if (ret == -EAGAIN) {
@@ -1595,10 +1685,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
struct prefetch_mr_work *w =
container_of(work, struct prefetch_mr_work, work);
- if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED)
+ if (ib_device_try_get(&w->dev->ib_dev)) {
mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list,
w->num_sge);
-
+ ib_device_put(&w->dev->ib_dev);
+ }
+ put_device(&w->dev->ib_dev.dev);
kfree(w);
}
@@ -1617,15 +1709,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list,
num_sge);
- if (dev->ib_dev.reg_state != IB_DEV_REGISTERED)
- return -ENODEV;
-
work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
if (!work)
return -ENOMEM;
memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
+ get_device(&dev->ib_dev.dev);
work->dev = dev;
work->pf_flags = pf_flags;
work->num_sge = num_sge;