From a61d5ce9cc56e2e41bbb1ad62ca7a16d7e7567bd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 8 Dec 2016 12:58:45 +0200 Subject: net/mlx5: Fix static checker warnings For some reason, sparse doesn't like using an expression of type (!x) with a bitwise | and &. In order to mitigate that, we use a local variable. This removes the following sparse complaints on the core driver (and similar ones on the IB driver too): drivers/net/ethernet/mellanox/mlx5/core/srq.c:83:9: warning: dubious: !x & y drivers/net/ethernet/mellanox/mlx5/core/srq.c:96:9: warning: dubious: !x & y drivers/net/ethernet/mellanox/mlx5/core/port.c:59:9: warning: dubious: !x & y drivers/net/ethernet/mellanox/mlx5/core/vport.c:561:9: warning: dubious: !x & y Signed-off-by: Or Gerlitz Signed-off-by: Matan Barak Reported-by: Bart Van Assche Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 7b6cd67a263f..dd9a263ed368 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -67,10 +67,11 @@ /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ + u32 _v = v; \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ - (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ + (~__mlx5_dw_mask(typ, fld))) | (((_v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) -- cgit v1.2.3 From f32f5bd2eb7e91a5090c06bbe1ed14bffbbda5d3 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 19 Nov 2015 17:12:26 +0200 Subject: net/mlx5: Configure cache line size for start and end padding There is a hardware feature that will pad the start or end of a DMA to be cache line aligned to avoid RMWs on the last cache line. The default cache line size setting for this feature is 64B. This change configures the hardware to use 128B alignment on systems with 128B cache lines. In addition we lower bound MPWRQ stride by HCA cacheline in mlx5e, MPWRQ stride should be at least the HCA cacheline, the current default is 64B and in case HCA_CAP.cach_line_128byte capability is set, MPWRQ RX stride will automatically be aligned to 128B. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++++ include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9b52c58cd528..9b23d3329847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -70,8 +70,13 @@ #define MLX5_RX_HEADROOM NET_SKB_PAD -#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ + (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ + max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) +#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) + #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ab6f4d3b8063..1b7fe43ab22b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -89,8 +89,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; priv->params.mpwqe_log_stride_sz = MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; + MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) : + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev); priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - priv->params.mpwqe_log_stride_sz; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f7e50ba67f94..c4242a4e8130 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -543,6 +543,12 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + cache_line_128byte, + cache_line_size() == 128 ? 1 : 0); + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a919dfb920ae..cc8ae860cd45 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -804,10 +804,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_150[0xa]; u8 log_max_ra_res_qp[0x6]; - u8 pad_cap[0x1]; + u8 end_pad[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; - u8 reserved_at_163[0xd]; + u8 start_pad[0x1]; + u8 cache_line_128byte[0x1]; + u8 reserved_at_163[0xb]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; -- cgit v1.2.3 From 2b31f7ae5f645edd852addfca445895b5806f3f9 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 28 Nov 2016 18:04:50 +0200 Subject: net/mlx5: TX WQE update Add new TX WQE fields for Connect-X5 vlan insertion support, type and vlan_tci, when type = MLX5_ETH_WQE_INSERT_VLAN the HW will insert the vlan and prio fields (vlan_tci) to the packet. Those bits and the inline header fields are mutually exclusive, and valid only when: MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_NOT_REQUIRED and MLX5_CAP_ETH(mdev, wqe_vlan_insert), who will be set in ConnectX-5 and later HW generations. Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/infiniband/hw/mlx5/qp.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 8 ++++---- include/linux/mlx5/mlx5_ifc.h | 3 ++- include/linux/mlx5/qp.h | 16 ++++++++++++++-- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6a83fb32599d..e31bf11ae64f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2984,20 +2984,20 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, if (wr->opcode == IB_WR_LSO) { struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); + int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start); u64 left, leftlen, copysz; void *pdata = ud_wr->header; left = ud_wr->hlen; eseg->mss = cpu_to_be16(ud_wr->mss); - eseg->inline_hdr_sz = cpu_to_be16(left); + eseg->inline_hdr.sz = cpu_to_be16(left); /* * check if there is space till the end of queue, if yes, * copy all in one shot, otherwise copy till the end of queue, * rollback and than the copy the left */ - leftlen = qend - (void *)eseg->inline_hdr_start; + leftlen = qend - (void *)eseg->inline_hdr.start; copysz = min_t(u64, leftlen, left); memcpy(seg - size_of_inl_hdr_start, pdata, copysz); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fd8dff6acc12..965e69e9ff1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -687,8 +687,8 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, memset(wqe, 0, sizeof(*wqe)); /* copy the inline part */ - memcpy(eseg->inline_hdr_start, xdp->data, MLX5E_XDP_MIN_INLINE); - eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index cfb68371c397..678c07c8fbb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -284,18 +284,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) wi->num_bytes = num_bytes; if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); ihs += VLAN_HLEN; } else { - memcpy(eseg->inline_hdr_start, skb_data, ihs); + memcpy(eseg->inline_hdr.start, skb_data, ihs); mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); } - eseg->inline_hdr_sz = cpu_to_be16(ihs); + eseg->inline_hdr.sz = cpu_to_be16(ihs); ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cc8ae860cd45..afcd4736d8df 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -577,7 +577,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_at_5[0x3]; + u8 reserved_at_5[0x2]; + u8 wqe_vlan_insert[0x1]; u8 self_lb_en_modifiable[0x1]; u8 reserved_at_9[0x2]; u8 max_lso_cap[0x5]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 219c699c17b7..3096370fe831 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -221,14 +221,26 @@ enum { MLX5_ETH_WQE_L4_CSUM = 1 << 7, }; +enum { + MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, +}; + struct mlx5_wqe_eth_seg { u8 rsvd0[4]; u8 cs_flags; u8 rsvd1; __be16 mss; __be32 rsvd2; - __be16 inline_hdr_sz; - u8 inline_hdr_start[2]; + union { + struct { + __be16 sz; + u8 start[2]; + } inline_hdr; + struct { + __be16 type; + __be16 vlan_tci; + } insert; + }; }; struct mlx5_wqe_xrc_seg { -- cgit v1.2.3 From a6f402e4990145252ce4fde59b273fa7e4f91e1b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 6 Dec 2016 13:53:49 +0200 Subject: net/mlx5e: Tx, no inline copy on ConnectX-5 ConnectX-5 and later HW generations will report min inline mode == MLX5_INLINE_MODE_NONE, which means driver is not required to copy packet headers to inline fields of TX WQE. When inline is not required, vlan insertion will be handled in the TX descriptor rather than copy to inline. For LSO case driver is still required to copy headers, for the HW to duplicate on wire. This will improve CPU utilization and boost TX performance. Tested with pktgen burst single flow: CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz HCA: Mellanox Technologies MT28800 Family [ConnectX-5 Ex] Before: 15.1Mpps After: 17.2Mpps Improvement: 14% Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 +++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 30 +++++++++++++---------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1b7fe43ab22b..9cd38401fdc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1029,9 +1029,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; - sq->min_inline_mode = - MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ? - param->min_inline_mode : 0; + sq->min_inline_mode = param->min_inline_mode; err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); if (err) @@ -1095,7 +1093,10 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); - MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + + if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); @@ -3533,6 +3534,10 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); + if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE && + !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2; + priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 678c07c8fbb0..f193128bac4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -154,6 +154,8 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, int hlen; switch (mode) { + case MLX5_INLINE_MODE_NONE: + return 0; case MLX5_INLINE_MODE_TCP_UDP: hlen = eth_get_headlen(skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) @@ -283,21 +285,23 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) wi->num_bytes = num_bytes; - if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, - &skb_len); - ihs += VLAN_HLEN; - } else { - memcpy(eseg->inline_hdr.start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); + ihs += VLAN_HLEN; + } else { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + } + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } - eseg->inline_hdr.sz = cpu_to_be16(ihs); - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), - MLX5_SEND_WQE_DS); - dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; wi->num_dma = 0; -- cgit v1.2.3 From b70149dd7dc98661fbbfe7281f127caa408028fe Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 6 Dec 2016 14:04:05 +0200 Subject: net/mlx5e: XDP Tx, no inline copy on ConnectX-5 ConnectX-5 and later HW generations will report min inline mode == MLX5_INLINE_MODE_NONE, which means driver is not required to copy packet headers to inline fields of TX WQE. Avoid copy to inline segment in XDP TX routine when HW inline mode doesn't require it. This will improve CPU utilization and boost XDP TX performance. Tested with xdp2 single flow: CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz HCA: Mellanox Technologies MT28800 Family [ConnectX-5 Ex] Before: 7.4Mpps After: 7.8Mpps Improvement: 5% Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 20 +++++++++++++------- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9b23d3329847..8be4b12b5545 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -120,8 +120,7 @@ #define MLX5E_XDP_IHS_DS_COUNT \ DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT \ - (MLX5E_XDP_IHS_DS_COUNT + \ - (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) + ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) #define MLX5E_XDP_TX_WQEBBS \ DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9cd38401fdc9..ed230757d9c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1806,8 +1806,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); param->max_inline = priv->params.tx_max_inline; - /* FOR XDP SQs will support only L2 inline mode */ - param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->min_inline_mode = priv->params.tx_min_inline_mode; param->type = MLX5E_SQ_XDP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 965e69e9ff1e..b039b87742a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -657,9 +657,10 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; + u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT; ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; - dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + dma_addr_t dma_addr = di->addr + data_offset; unsigned int dma_len = xdp->data_end - xdp->data; if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || @@ -680,17 +681,22 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, return false; } - dma_len -= MLX5E_XDP_MIN_INLINE; dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); memset(wqe, 0, sizeof(*wqe)); - /* copy the inline part */ - memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); - eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dseg = (struct mlx5_wqe_data_seg *)eseg + 1; + /* copy the inline part if required */ + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dma_len -= MLX5E_XDP_MIN_INLINE; + dma_addr += MLX5E_XDP_MIN_INLINE; - dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + ds_cnt += MLX5E_XDP_IHS_DS_COUNT; + dseg++; + } /* write the dma part */ dseg->addr = cpu_to_be64(dma_addr); @@ -698,7 +704,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, dseg->lkey = sq->mkey_be; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->db.xdp.di[pi] = *di; wi->opcode = MLX5_OPCODE_SEND; -- cgit v1.2.3 From 8ca967ab67671f07ac7daef4f854559bc66799a3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 9 Jan 2017 21:57:49 +0200 Subject: net/mlx5e: Bring back bfreg uar map dedicated pointer 4K Uar series modified the mlx5e driver to use the new bfreg API, and mistakenly removed the sq->uar_map iomem data path dedicated pointer, which was meant to be read from xmit path for cache locality utilization. Fix that by returning that pointer to the SQ struct. Fixes: 7309cb4ad71e ("IB/mlx5: Support 4k UAR for libmlx5") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8be4b12b5545..95ca03c0d9f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -475,6 +475,7 @@ struct mlx5e_sq { /* read only */ struct mlx5_wq_cyc wq; u32 dma_fifo_mask; + void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; u16 bf_buf_size; @@ -831,9 +832,9 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); if (bf_sz) - __iowrite64_copy(sq->bfreg.map + ofst, ctrl, bf_sz); + __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); else - mlx5_write64((__be32 *)ctrl, sq->bfreg.map + ofst, NULL); + mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); /* flush the write-combining mapped buffer */ wmb(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ed230757d9c5..3cce6281e075 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1016,6 +1016,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) return err; + sq->uar_map = sq->bfreg.map; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, -- cgit v1.2.3