diff options
author | Eli Cohen <eli@mellanox.co.il> | 2008-07-15 10:48:44 +0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-07-15 10:48:44 +0400 |
commit | 9670e553915e67fb68f13258644342c68dc26b84 (patch) | |
tree | 68730cafe26cdb14d591f5f9c80bdeb1d6c85642 /drivers/infiniband/hw/mlx4 | |
parent | 164ba0893c27a216557396320b6063fdac040392 (diff) | |
download | linux-9670e553915e67fb68f13258644342c68dc26b84.tar.xz |
IB/mlx4: Optimize QP stamping
The idea is that for QPs with fixed size work requests (eg selective
signaling QPs), before stamping the WQE, we read the value of the DS
field, which gives the effective size of the descriptor as used in the
previous post. Then we stamp only that area, since the rest of the
descriptor is already stamped.
When initializing the send queue buffer, make sure the DS field is
initialized to the max descriptor size so that the subsequent stamping
will be done on the entire descriptor area.
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a80df22deae8..4b0ac5d68c46 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -129,9 +129,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) int ind; void *buf; __be32 stamp; + struct mlx4_wqe_ctrl_seg *ctrl; - s = roundup(size, 1U << qp->sq.wqe_shift); if (qp->sq_max_wqes_per_wr > 1) { + s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : @@ -141,7 +142,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) *wqe = stamp; } } else { - buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); @@ -1063,6 +1065,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); + if (qp->sq_max_wqes_per_wr == 1) + ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } |