summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx4/en_tx.c
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2015-04-09 04:49:36 +0300
committerDavid S. Miller <davem@davemloft.net>2015-04-09 21:25:25 +0300
commit12b3375f3963536ba3ad47d2db49f72067b4905e (patch)
tree1aa5051a9936718911a87d8cc4eed864d703132b /drivers/net/ethernet/mellanox/mlx4/en_tx.c
parent019be1cff44bdfed23163be7469419be4f38589b (diff)
downloadlinux-12b3375f3963536ba3ad47d2db49f72067b4905e.tar.xz
mlx4/mlx5: Use dma_wmb/rmb where appropriate
This patch should help to improve the performance of the mlx4 and mlx5 on a number of architectures. For example, on x86 the dma_wmb/rmb equates out to a barrer() call as the architecture is already strong ordered, and on PowerPC the call works out to a lwsync which is significantly less expensive than the sync call that was being used for wmb. I placed the new barriers between any spots that seemed to be trying to order memory/memory reads or writes, if there are any spots that involved MMIO I left the existing wmb in place as the new barriers cannot order transactions between coherent and non-coherent memories. v2: Reduced the replacments to just the spots where I could clearly identify the usage pattern. Cc: Amir Vadai <amirv@mellanox.com> Cc: Ido Shamay <idos@mellanox.com> Cc: Eli Cohen <eli@mellanox.com> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_tx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 55f9f5c5344e..1783705273d8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -416,7 +416,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
* make sure we read the CQE after we read the
* ownership bit
*/
- rmb();
+ dma_rmb();
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR)) {
@@ -667,7 +667,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
skb_frag_size(&shinfo->frags[0]));
}
- wmb();
+ dma_wmb();
inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
}
}
@@ -804,7 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
data->addr = cpu_to_be64(dma);
data->lkey = ring->mr_key;
- wmb();
+ dma_wmb();
data->byte_count = cpu_to_be32(byte_count);
--data;
}
@@ -821,7 +821,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
data->addr = cpu_to_be64(dma);
data->lkey = ring->mr_key;
- wmb();
+ dma_wmb();
data->byte_count = cpu_to_be32(byte_count);
}
/* tx completion can avoid cache line miss for common cases */
@@ -938,7 +938,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Ensure new descriptor hits memory
* before setting ownership of this descriptor to HW
*/
- wmb();
+ dma_wmb();
tx_desc->ctrl.owner_opcode = op_own;
wmb();
@@ -958,7 +958,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Ensure new descriptor hits memory
* before setting ownership of this descriptor to HW
*/
- wmb();
+ dma_wmb();
tx_desc->ctrl.owner_opcode = op_own;
if (send_doorbell) {
wmb();