diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx4/qp.c')
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 202 |
1 files changed, 130 insertions, 72 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 40042184ad58..31a480e5b0d0 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -415,9 +415,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return 0; err_wrid: - if (pd->uobject && !init_attr->srq) - mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); - else { + if (pd->uobject) { + if (!init_attr->srq) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), + &qp->db); + } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); } @@ -742,7 +744,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { printk(KERN_ERR "path MTU (%u) is invalid\n", attr->path_mtu); - return -EINVAL; + goto out; } context->mtu_msgmax = (attr->path_mtu << 5) | 31; } @@ -781,10 +783,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, - attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) { - err = -EINVAL; + attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; - } optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); @@ -798,15 +798,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_ALT_PATH) { if (attr->alt_port_num == 0 || attr->alt_port_num > dev->dev->caps.num_ports) - return -EINVAL; + goto out; if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len[attr->alt_port_num]) - return -EINVAL; + goto out; if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, attr->alt_port_num)) - return -EINVAL; + goto out; context->alt_path.pkey_index = attr->alt_pkey_index; context->alt_path.ackto = attr->alt_timeout << 3; @@ -1183,12 +1183,86 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + +static void set_mlx_icrc_seg(void *dseg) +{ + u32 *t = dseg; + struct mlx4_wqe_inline_seg *iseg = dseg; + + t[1] = 0; + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + iseg->byte_count = cpu_to_be32((1 << 31) | 4); +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + dseg->byte_count = cpu_to_be32(sg->length); +} + +static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; struct mlx4_wqe_ctrl_seg *ctrl; + struct mlx4_wqe_data_seg *dseg; unsigned long flags; int nreq; int err = 0; @@ -1238,26 +1312,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_atomic_seg); + size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; @@ -1266,15 +1327,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; - break; default: @@ -1284,13 +1340,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, - &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - + set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; @@ -1312,27 +1362,27 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; } - for (i = 0; i < wr->num_sge; ++i) { - ((struct mlx4_wqe_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mlx4_wqe_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mlx4_wqe_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + /* + * Write data segments in reverse order, so as to + * overwrite cacheline stamp last within each + * cacheline. This avoids issues with WQE + * prefetching. + */ - wqe += sizeof (struct mlx4_wqe_data_seg); - size += sizeof (struct mlx4_wqe_data_seg) / 16; - } + dseg = wqe; + dseg += wr->num_sge - 1; + size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); /* Add one more inline data segment for ICRC for MLX sends */ - if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) { - ((struct mlx4_wqe_inline_seg *) wqe)->byte_count = - cpu_to_be32((1 << 31) | 4); - ((u32 *) wqe)[1] = 0; - wqe += sizeof (struct mlx4_wqe_data_seg); + if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI)) { + set_mlx_icrc_seg(dseg + 1); size += sizeof (struct mlx4_wqe_data_seg) / 16; } + for (i = wr->num_sge - 1; i >= 0; --i, --dseg) + set_data_seg(dseg, wr->sg_list + i); + ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; @@ -1421,11 +1471,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, scat = get_recv_wqe(qp, ind); - for (i = 0; i < wr->num_sge; ++i) { - scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); - scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); - scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); - } + for (i = 0; i < wr->num_sge; ++i) + __set_data_seg(scat + i, wr->sg_list + i); if (i < qp->rq.max_gs) { scat[i].byte_count = 0; @@ -1498,7 +1545,7 @@ static int to_ib_qp_access_flags(int mlx4_flags) static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { - memset(ib_ah_attr, 0, sizeof *path); + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) @@ -1515,7 +1562,7 @@ static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = - be32_to_cpu(path->tclass_flowlabel) & 0xffffff; + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); } @@ -1560,7 +1607,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr } qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; - qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + if (qp_attr->qp_state == IB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; @@ -1578,17 +1628,25 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr done: qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; - qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; - qp_attr->cap.max_send_sge = qp->sq.max_gs; - qp_attr->cap.max_recv_sge = qp->rq.max_gs; - qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - - send_wqe_overhead(qp->ibqp.qp_type) - - sizeof (struct mlx4_wqe_inline_seg); - qp_init_attr->cap = qp_attr->cap; + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; } + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + return 0; } |