summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx4/qp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx4/qp.c')
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c202
1 files changed, 130 insertions, 72 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 40042184ad58..31a480e5b0d0 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -415,9 +415,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return 0;
err_wrid:
- if (pd->uobject && !init_attr->srq)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
- else {
+ if (pd->uobject) {
+ if (!init_attr->srq)
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
+ &qp->db);
+ } else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
}
@@ -742,7 +744,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
printk(KERN_ERR "path MTU (%u) is invalid\n",
attr->path_mtu);
- return -EINVAL;
+ goto out;
}
context->mtu_msgmax = (attr->path_mtu << 5) | 31;
}
@@ -781,10 +783,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
- err = -EINVAL;
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
goto out;
- }
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
@@ -798,15 +798,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_ALT_PATH) {
if (attr->alt_port_num == 0 ||
attr->alt_port_num > dev->dev->caps.num_ports)
- return -EINVAL;
+ goto out;
if (attr->alt_pkey_index >=
dev->dev->caps.pkey_table_len[attr->alt_port_num])
- return -EINVAL;
+ goto out;
if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
attr->alt_port_num))
- return -EINVAL;
+ goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
@@ -1183,12 +1183,86 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
return cur + nreq >= wq->max_post;
}
+static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+{
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = 0;
+ }
+
+}
+
+static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr)
+{
+ memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
+ dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+static void set_mlx_icrc_seg(void *dseg)
+{
+ u32 *t = dseg;
+ struct mlx4_wqe_inline_seg *iseg = dseg;
+
+ t[1] = 0;
+
+ /*
+ * Need a barrier here before writing the byte_count field to
+ * make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment begins
+ * a new cacheline, the HCA prefetcher could grab the 64-byte
+ * chunk and get a valid (!= * 0xffffffff) byte count but
+ * stale data, and end up sending the wrong data.
+ */
+ wmb();
+
+ iseg->byte_count = cpu_to_be32((1 << 31) | 4);
+}
+
+static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+
+ /*
+ * Need a barrier here before writing the byte_count field to
+ * make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment begins
+ * a new cacheline, the HCA prefetcher could grab the 64-byte
+ * chunk and get a valid (!= * 0xffffffff) byte count but
+ * stale data, and end up sending the wrong data.
+ */
+ wmb();
+
+ dseg->byte_count = cpu_to_be32(sg->length);
+}
+
+static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_data_seg *dseg;
unsigned long flags;
int nreq;
int err = 0;
@@ -1238,26 +1312,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.atomic.remote_addr);
- ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.atomic.rkey);
- ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.swap);
- ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
- cpu_to_be64(wr->wr.atomic.compare_add);
- } else {
- ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.compare_add);
- ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
- }
-
+ set_atomic_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_atomic_seg);
+
size += (sizeof (struct mlx4_wqe_raddr_seg) +
sizeof (struct mlx4_wqe_atomic_seg)) / 16;
@@ -1266,15 +1327,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
-
break;
default:
@@ -1284,13 +1340,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_QPT_UD:
- memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
- &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
- ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
- cpu_to_be32(wr->wr.ud.remote_qpn);
- ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
- cpu_to_be32(wr->wr.ud.remote_qkey);
-
+ set_datagram_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
@@ -1312,27 +1362,27 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
}
- for (i = 0; i < wr->num_sge; ++i) {
- ((struct mlx4_wqe_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mlx4_wqe_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mlx4_wqe_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
+ /*
+ * Write data segments in reverse order, so as to
+ * overwrite cacheline stamp last within each
+ * cacheline. This avoids issues with WQE
+ * prefetching.
+ */
- wqe += sizeof (struct mlx4_wqe_data_seg);
- size += sizeof (struct mlx4_wqe_data_seg) / 16;
- }
+ dseg = wqe;
+ dseg += wr->num_sge - 1;
+ size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
/* Add one more inline data segment for ICRC for MLX sends */
- if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
- ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
- cpu_to_be32((1 << 31) | 4);
- ((u32 *) wqe)[1] = 0;
- wqe += sizeof (struct mlx4_wqe_data_seg);
+ if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)) {
+ set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
+ for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
+ set_data_seg(dseg, wr->sg_list + i);
+
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
@@ -1421,11 +1471,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
scat = get_recv_wqe(qp, ind);
- for (i = 0; i < wr->num_sge; ++i) {
- scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
- scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
- scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
- }
+ for (i = 0; i < wr->num_sge; ++i)
+ __set_data_seg(scat + i, wr->sg_list + i);
if (i < qp->rq.max_gs) {
scat[i].byte_count = 0;
@@ -1498,7 +1545,7 @@ static int to_ib_qp_access_flags(int mlx4_flags)
static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
struct mlx4_qp_path *path)
{
- memset(ib_ah_attr, 0, sizeof *path);
+ memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
@@ -1515,7 +1562,7 @@ static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
ib_ah_attr->grh.traffic_class =
(be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xffffff;
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
memcpy(ib_ah_attr->grh.dgid.raw,
path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
}
@@ -1560,7 +1607,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
}
qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
- qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+ if (qp_attr->qp_state == IB_QPS_INIT)
+ qp_attr->port_num = qp->port;
+ else
+ qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
@@ -1578,17 +1628,25 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+
if (!ibqp->uobject) {
- qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
- qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
- qp_attr->cap.max_send_sge = qp->sq.max_gs;
- qp_attr->cap.max_recv_sge = qp->rq.max_gs;
- qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) -
- send_wqe_overhead(qp->ibqp.qp_type) -
- sizeof (struct mlx4_wqe_inline_seg);
- qp_init_attr->cap = qp_attr->cap;
+ qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ } else {
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
}
+ /*
+ * We don't support inline sends for kernel QPs (yet), and we
+ * don't know what userspace's value should be.
+ */
+ qp_attr->cap.max_inline_data = 0;
+
+ qp_init_attr->cap = qp_attr->cap;
+
return 0;
}