summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-10-27 16:39:57 +0300
committerDavid S. Miller <davem@davemloft.net>2021-10-27 16:39:57 +0300
commitc230dc8627de832fe13bee64613a52a46c5b54ab (patch)
tree64d4d8693b501984fd3807eba748ebec24f422fb
parent06338ceff92510544a732380dbb2d621bd3775bf (diff)
parent8ca9caee851c444810b70d9c167e7321f3ff68f8 (diff)
downloadlinux-c230dc8627de832fe13bee64613a52a46c5b54ab.tar.xz
Merge tag 'mlx5-updates-2021-10-26' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2021-10-26 HW-GRO support in mlx5 Beside the HW GRO this series includes two trivial non-mlx5 patches: - net: Prevent HW-GRO and LRO features operate together - lib: bitmap: Introduce node-aware alloc API Khalid Manaa Says: ================== This series implements the HW-GRO offload using the HW feature SHAMPO. HW-GRO: Hardware offload for the Generic Receive Offload feature. SHAMPO: Split Headers And Merge Payload Offload. This feature performs headers data split for each received packed and merge the payloads of the packets of the same session. There are new HW components for this feature: The headers buffer: – cyclic buffer where the packets headers will be located Reservation buffer: – capability to divide RQ WQEs to reservations, a definite size in granularity of 4KB, the reservation is used to define the largest segment that we can create by packets stitching. Each reservation will have a session and the new received packet can be merged to the session, terminate it, or open a new one according to the match criteria. When a new packet is received the headers will be written to the headers buffer and the data will be written to the reservation, in case the packet matches the session the data will be written continuously otherwise it will be written after performing an alignment. SHAMPO RQ, WQ and CQE changes: ----------------------------- RQ (receive queue) new params: -shampo_no_match_alignment_granularity: the HW alignment granularity in case the received packet doesn't match the current session. -shampo_match_criteria_type: the type of match criteria. -reservation_timeout: the maximum time that the HW will hold the reservation. -Each RQ has SKB that represents the current opened flow. WQ (work queue) new params: -headers_mkey: mkey that represents the headers buffer, where the packets headers will be written by the HW. -shampo_enable: flag to verify if the WQ supports SHAMPO feature. -log_reservation_size: the log of the reservation size where the data of the packet will be written by the HW. -log_max_num_of_packets_per_reservation: log of the maximum number of packets that can be written to the same reservation. -log_headers_entry_size: log of the header entry size of the headers buffer. -log_headers_buffer_entry_num: log of the entries number of the headers buffer. CQEs (Completion queue entry) SHAMPO fields: -match: in case it is set, then the current packet matches the opened session. -flush: in case it is set, the opened session must be flushed. -header_size: the size of the packet’s headers. -header_entry_index: the entry index in the headers buffer of the received packet headers. -data_offset: the offset of the received packet data in the WQE. HW-GRO works as follow: ---------------------- The feature can be enabled on the interface using the ethtool command by setting on rx-gro-hw. When the feature is on the mlx5 driver will reopen the RQ to support the SHAMPO feature: Will allocate the headers buffer and fill the parameters regarding the reservation and the match criteria. Receive packet flow: each RQ will hold SKB that represents the current GRO opened session. The driver has a new CQE handler mlx5e_handle_rx_cqe_mpwrq_shampo which will use the CQE SHAMPO params to extract the location of the packet’s headers in the headers buffer and the location of the packets data in the RQ. Also, the CQE has two flags flush and match that indicate if the current packet matches the current session or not and if we need to close the session. In case there is an opened session, and we receive a matched packet then the handler will merge the packet's payload to the current SKB, in case we receive no match then the handler will flush the SKB and create a new one for the new packet. In case the flash flag is set then the driver will close the session, the SKB will be passed to the network stack. In case the driver merges packets in the SKB, before passing the SKB to the network stack the driver will update the checksum of the packet’s headers. SKB build: --------- The driver will build a new SKB in the following situations: in case there is no current opened session. In case the current packet doesn’t match the current session. In case there is no place to add the packets data to the SKB that represents the current session. Otherwise, the driver will add the packet’s data to the SKB. When the driver builds a new SKB, the linear area will contain only the packet headers and the data will be added to the SKB fragments. In case the entry size of the headers buffer is sufficient to build the SKB it will be used, otherwise the driver will allocate new memory to build the SKB. ================== ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c163
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c285
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c646
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
-rw-r--r--include/linux/bitmap.h2
-rw-r--r--include/linux/mlx5/device.h28
-rw-r--r--include/linux/mlx5/mlx5_ifc.h64
-rw-r--r--lib/bitmap.c13
-rw-r--r--net/core/dev.c5
26 files changed, 1300 insertions, 164 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a3a4fece0cac..67453dd75b93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -79,6 +79,11 @@ struct page_pool;
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define MLX5E_RX_MAX_HEAD (256)
+#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
+#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
+#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
+#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
+#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
(6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
@@ -152,6 +157,25 @@ struct page_pool;
#define MLX5E_UMR_WQEBBS \
(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
+ (sizeof(struct mlx5e_umr_wqe) +\
+ (sizeof(struct mlx5_klm) * (sgl_len)))
+
+#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
+ (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
+ (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
+ (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
+
+#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
+ ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
+
+#define MLX5E_MAX_KLM_PER_WQE(mdev) \
+ MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
+
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
#define mlx5e_dbg(mlevel, priv, format, ...) \
@@ -217,7 +241,10 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
- struct mlx5_mtt inline_mtts[0];
+ union {
+ struct mlx5_mtt inline_mtts[0];
+ struct mlx5_klm inline_klms[0];
+ };
};
enum mlx5e_priv_flag {
@@ -242,6 +269,21 @@ enum mlx5e_priv_flag {
#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
+enum packet_merge {
+ MLX5E_PACKET_MERGE_NONE,
+ MLX5E_PACKET_MERGE_LRO,
+ MLX5E_PACKET_MERGE_SHAMPO,
+};
+
+struct mlx5e_packet_merge_param {
+ enum packet_merge type;
+ u32 timeout;
+ struct {
+ u8 match_criteria_type;
+ u8 alignment_granularity;
+ } shampo;
+};
+
struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
@@ -259,13 +301,12 @@ struct mlx5e_params {
bool tunneled_offload_en;
struct dim_cq_moder rx_cq_moderation;
struct dim_cq_moder tx_cq_moderation;
- bool lro_en;
+ struct mlx5e_packet_merge_param packet_merge;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
bool scatter_fcs_en;
bool rx_dim_enabled;
bool tx_dim_enabled;
- u32 lro_timeout;
u32 pflags;
struct bpf_prog *xdp_prog;
struct mlx5e_xsk *xsk;
@@ -287,7 +328,8 @@ enum {
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
- MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */
+ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
+ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
};
struct mlx5e_cq {
@@ -578,6 +620,7 @@ typedef struct sk_buff *
struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk);
void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params);
@@ -599,6 +642,25 @@ struct mlx5e_rq_frags_info {
u8 wqe_bulk;
};
+struct mlx5e_shampo_hd {
+ struct mlx5_core_mkey mkey;
+ struct mlx5e_dma_info *info;
+ struct page *last_page;
+ u16 hd_per_wq;
+ u16 hd_per_wqe;
+ unsigned long *bitmap;
+ u16 pi;
+ u16 ci;
+ __be32 key;
+ u64 last_addr;
+};
+
+struct mlx5e_hw_gro_data {
+ struct sk_buff *skb;
+ struct flow_keys fk;
+ int second_ip_id;
+};
+
struct mlx5e_rq {
/* data path */
union {
@@ -620,6 +682,7 @@ struct mlx5e_rq {
u8 umr_in_progress;
u8 umr_last_bulk;
u8 umr_completed;
+ struct mlx5e_shampo_hd *shampo;
} mpwqe;
};
struct {
@@ -639,6 +702,8 @@ struct mlx5e_rq {
struct mlx5e_icosq *icosq;
struct mlx5e_priv *priv;
+ struct mlx5e_hw_gro_data *hw_gro_data;
+
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_post_rx_wqes post_wqes;
mlx5e_fp_dealloc_wqe dealloc_wqe;
@@ -886,6 +951,7 @@ struct mlx5e_priv {
struct mlx5e_rx_handlers {
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo;
};
extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
@@ -915,6 +981,7 @@ void mlx5e_build_ptys2ethtool_map(void);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 3cbb596821e8..f8c29022dbb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -87,7 +87,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
mlx5e_rx_get_linear_frag_sz(params, NULL));
- return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
+ return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
+ linear_frag_sz <= PAGE_SIZE;
}
bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
@@ -138,6 +139,27 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
return params->log_rq_mtu_frames - log_pkts_per_wqe;
}
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE));
+}
+
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE);
+}
+
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
+ PAGE_SIZE;
+
+ return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu));
+}
+
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
@@ -164,19 +186,8 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
mlx5e_rx_is_linear_skb(params, xsk) :
mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
- return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
-}
-
-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
-{
- struct mlx5e_lro_param lro_param;
-
- lro_param = (struct mlx5e_lro_param) {
- .enabled = params->lro_en,
- .timeout = params->lro_timeout,
- };
-
- return lro_param;
+ return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
+ mlx5e_get_linear_rq_headroom(params, xsk) : 0;
}
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -453,6 +464,23 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
+static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+ int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk));
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+
+ /* +1 is for the case that the pkt_per_rsrv dont consume the reservation
+ * so we get a filler cqe for the rest of the reservation.
+ */
+ return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1));
+}
+
static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
@@ -464,9 +492,12 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
- mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
+ else
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
log_cq_size = params->log_rq_mtu_frames;
@@ -485,10 +516,11 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
+ bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
MLX5_CAP_GEN(mdev, relaxed_ordering_write);
- return ro && params->lro_en ?
+ return ro && lro_en ?
MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
}
@@ -520,6 +552,22 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(wq, wq, log_wqe_stride_size,
log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ MLX5_SET(wq, wq, shampo_enable, true);
+ MLX5_SET(wq, wq, log_reservation_size,
+ mlx5e_shampo_get_log_rsrv_size(mdev, params));
+ MLX5_SET(wq, wq,
+ log_max_num_of_packets_per_reservation,
+ mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ MLX5_SET(wq, wq, log_headers_entry_size,
+ mlx5e_shampo_get_log_hd_entry_size(mdev, params));
+ MLX5_SET(rqc, rqc, reservation_timeout,
+ params->packet_merge.timeout);
+ MLX5_SET(rqc, rqc, shampo_match_criteria_type,
+ params->packet_merge.shampo.match_criteria_type);
+ MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
+ params->packet_merge.shampo.alignment_granularity);
+ }
break;
}
default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -620,17 +668,80 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
return MLX5_GET(wq, wq, log_wq_sz);
}
-static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
+/* This function calculates the maximum number of headers entries that are needed
+ * per WQE, the formula is based on the size of the reservations and the
+ * restriction we have about max packets for reservation that is equal to max
+ * headers per reservation.
+ */
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE;
+ u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL));
+ int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
+ u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL);
+ int wqe_size = BIT(log_stride_sz) * num_strides;
+ u32 hd_per_wqe;
+
+ /* Assumption: hd_per_wqe % 8 == 0. */
+ hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv;
+ mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n",
+ __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv);
+ return hd_per_wqe;
+}
+
+/* This function calculates the maximum number of headers entries that are needed
+ * for the WQ, this value is uesed to allocate the header buffer in HW, thus
+ * must be a pow of 2.
+ */
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 hd_per_wqe, hd_per_wq;
+
+ hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size);
+ return hd_per_wq;
+}
+
+static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param)
+{
+ int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+ void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
+ int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ u32 wqebbs;
+
+ max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+ max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
+ max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
+ rest = max_hd_per_wqe % max_klm_per_umr;
+ wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+ if (rest)
+ wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+ wqebbs *= wq_size;
+ return wqebbs;
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_rq_param *rqp)
{
- switch (params->rq_wq_type) {
- case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
- order_base_2(MLX5E_UMR_WQEBBS) +
- mlx5e_get_rq_log_wq_sz(rqp->rqc));
- default: /* MLX5_WQ_TYPE_CYCLIC */
+ u32 wqebbs;
+
+ /* MLX5_WQ_TYPE_CYCLIC */
+ if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
- }
+
+ wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc));
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+ return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
}
static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
@@ -697,7 +808,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
if (err)
return err;
- icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
+ icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq);
async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 879ad46d754e..433e6967692d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -11,11 +11,6 @@ struct mlx5e_xsk_param {
u16 chunk_size;
};
-struct mlx5e_lro_param {
- bool enabled;
- u32 timeout;
-};
-
struct mlx5e_cq_param {
u32 cqc[MLX5_ST_SZ_DW(cqc)];
struct mlx5_wq_param wq;
@@ -116,6 +111,18 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
+u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
@@ -125,7 +132,6 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
/* Build queue parameters */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
index b8b481b335cf..c1cdd8c2e37a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -127,7 +127,7 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
enum mlx5_traffic_types tt,
- const struct mlx5e_lro_param *init_lro_param,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
bool inner)
{
struct mlx5e_rss_params_traffic_type rss_tt;
@@ -161,7 +161,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
rqtn, rss->inner_ft_support);
- mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
rss_tt = mlx5e_rss_get_tt_config(rss, tt);
mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
@@ -198,14 +198,14 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types
}
static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
- const struct mlx5e_lro_param *init_lro_param,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
bool inner)
{
enum mlx5_traffic_types tt, max_tt;
int err;
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
if (err)
goto err_destroy_tirs;
}
@@ -297,7 +297,7 @@ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
bool inner_ft_support, u32 drop_rqn,
- const struct mlx5e_lro_param *init_lro_param)
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
{
int err;
@@ -305,12 +305,12 @@ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
if (err)
goto err_out;
- err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
if (err)
goto err_destroy_rqt;
if (inner_ft_support) {
- err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
+ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
if (err)
goto err_destroy_tirs;
}
@@ -372,7 +372,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
*/
int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
enum mlx5_traffic_types tt,
- const struct mlx5e_lro_param *init_lro_param,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
bool inner, u32 *tirn)
{
struct mlx5e_tir *tir;
@@ -381,7 +381,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
if (!tir) { /* TIR doesn't exist, create one */
int err;
- err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
if (err)
return err;
tir = rss_get_tir(rss, tt, inner);
@@ -419,7 +419,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss)
mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
}
-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
{
struct mlx5e_tir_builder *builder;
enum mlx5_traffic_types tt;
@@ -429,7 +430,7 @@ int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_p
if (!builder)
return -ENOMEM;
- mlx5e_tir_builder_build_lro(builder, lro_param);
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
final_err = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
index d522a10dadf3..c6b216416344 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -17,7 +17,7 @@ struct mlx5e_rss *mlx5e_rss_alloc(void);
void mlx5e_rss_free(struct mlx5e_rss *rss);
int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
bool inner_ft_support, u32 drop_rqn,
- const struct mlx5e_lro_param *init_lro_param);
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param);
int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
bool inner_ft_support, u32 drop_rqn);
int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
@@ -30,13 +30,14 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
bool inner);
int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
enum mlx5_traffic_types tt,
- const struct mlx5e_lro_param *init_lro_param,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
bool inner, u32 *tirn);
void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
void mlx5e_rss_disable(struct mlx5e_rss *rss);
-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
const u8 *key, const u8 *hfunc,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
index 13056cb9757d..142953847996 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -34,7 +34,7 @@ struct mlx5e_rx_res {
/* API for rx_res_rss_* */
static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
- const struct mlx5e_lro_param *init_lro_param,
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
unsigned int init_nch)
{
bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
@@ -49,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
return -ENOMEM;
err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
- init_lro_param);
+ init_pkt_merge_param);
if (err)
goto err_rss_free;
@@ -275,7 +275,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
}
static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
- const struct mlx5e_lro_param *init_lro_param)
+ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
{
bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
struct mlx5e_tir_builder *builder;
@@ -306,7 +306,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
inner_ft_support);
- mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
mlx5e_tir_builder_build_direct(builder);
err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
@@ -336,7 +336,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
inner_ft_support);
- mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
mlx5e_tir_builder_build_direct(builder);
err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
@@ -437,7 +437,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
enum mlx5e_rx_res_features features, unsigned int max_nch,
- u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
unsigned int init_nch)
{
int err;
@@ -447,11 +447,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
res->max_nch = max_nch;
res->drop_rqn = drop_rqn;
- err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
+ err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
if (err)
goto err_out;
- err = mlx5e_rx_res_channels_init(res, init_lro_param);
+ err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
if (err)
goto err_rss_destroy;
@@ -645,7 +645,8 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
return err;
}
-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param)
{
struct mlx5e_tir_builder *builder;
int err, final_err;
@@ -655,7 +656,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
if (!builder)
return -ENOMEM;
- mlx5e_tir_builder_build_lro(builder, lro_param);
+ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
final_err = 0;
@@ -665,7 +666,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
if (!rss)
continue;
- err = mlx5e_rss_lro_set_param(rss, lro_param);
+ err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
if (err)
final_err = final_err ? : err;
}
@@ -673,7 +674,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
for (ix = 0; ix < res->max_nch; ix++) {
err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
if (err) {
- mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
if (!final_err)
final_err = err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
index 4a15942d79f7..d09f7d174a51 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -25,7 +25,7 @@ enum mlx5e_rx_res_features {
struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
enum mlx5e_rx_res_features features, unsigned int max_nch,
- u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
unsigned int init_nch);
void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
@@ -57,7 +57,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
u8 rx_hash_fields);
-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+ struct mlx5e_packet_merge_param *pkt_merge_param);
int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index de936dc4bc48..da169b816665 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -70,24 +70,30 @@ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
}
-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
- const struct mlx5e_lro_param *lro_param)
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param)
{
void *tirc = mlx5e_tir_builder_get_tirc(builder);
const unsigned int rough_max_l2_l3_hdr_sz = 256;
if (builder->modify)
- MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
-
- if (!lro_param->enabled)
- return;
-
- MLX5_SET(tirc, tirc, lro_enable_mask,
- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
- MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
- (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
- MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
+ MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+
+ switch (pkt_merge_param->type) {
+ case MLX5E_PACKET_MERGE_LRO:
+ MLX5_SET(tirc, tirc, packet_merge_mask,
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+ break;
+ case MLX5E_PACKET_MERGE_SHAMPO:
+ MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO);
+ break;
+ default:
+ break;
+ }
}
static int mlx5e_hfunc_to_hw(u8 hfunc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
index e45149a78ed9..857a84bcd53a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
@@ -18,7 +18,7 @@ struct mlx5e_rss_params_traffic_type {
};
struct mlx5e_tir_builder;
-struct mlx5e_lro_param;
+struct mlx5e_packet_merge_param;
struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
@@ -27,8 +27,8 @@ void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
u32 rqtn, bool inner_ft_support);
-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
- const struct mlx5e_lro_param *lro_param);
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_packet_merge_param *pkt_merge_param);
void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
const struct mlx5e_rss_params_hash *rss_hash,
const struct mlx5e_rss_params_traffic_type *rss_tt,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 055c3bc23733..4cdf8e5b24c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -36,6 +36,7 @@ ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_
enum mlx5e_icosq_wqe_type {
MLX5E_ICOSQ_WQE_NOP,
MLX5E_ICOSQ_WQE_UMR_RX,
+ MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
#ifdef CONFIG_MLX5_EN_TLS
MLX5E_ICOSQ_WQE_UMR_TLS,
MLX5E_ICOSQ_WQE_SET_PSV_TLS,
@@ -166,6 +167,10 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
+struct mlx5e_shampo_umr {
+ u16 len;
+};
+
struct mlx5e_icosq_wqe_info {
u8 wqe_type;
u8 num_wqebbs;
@@ -175,6 +180,7 @@ struct mlx5e_icosq_wqe_info {
struct {
struct mlx5e_rq *rq;
} umr;
+ struct mlx5e_shampo_umr shampo;
#ifdef CONFIG_MLX5_EN_TLS
struct {
struct mlx5e_ktls_offload_context_rx *priv_rx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 25926e581d18..c2ea5fad48dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1900,6 +1900,11 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
return -EINVAL;
}
+ if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n");
+ return -EINVAL;
+ }
+
new_params = priv->channels.params;
MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
if (rx_filter)
@@ -1952,8 +1957,8 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
return -EOPNOTSUPP;
if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
return -EINVAL;
- } else if (priv->channels.params.lro_en) {
- netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
+ } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 81ebf281cdb4..ad0d234632a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -411,7 +411,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
u32 rss_context, u32 *tirn)
{
if (fs->flow_type & FLOW_RSS) {
- struct mlx5e_lro_param lro_param;
+ struct mlx5e_packet_merge_param pkt_merge_param;
struct mlx5e_rss *rss;
u32 flow_type;
int err;
@@ -426,8 +426,8 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
if (tt < 0)
return -EINVAL;
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
- err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
+ pkt_merge_param = priv->channels.params.packet_merge;
+ err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
if (err)
return err;
eth_rule->rss = rss;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f3dec58026d9..6f398f636f01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -218,6 +218,45 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
}
+static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
+{
+ rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
+ GFP_KERNEL, node);
+ if (!rq->mpwqe.shampo)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
+{
+ kvfree(rq->mpwqe.shampo);
+}
+
+static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+
+ shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
+ node);
+ if (!shampo->bitmap)
+ return -ENOMEM;
+
+ shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
+ sizeof(*shampo->info)),
+ GFP_KERNEL, node);
+ if (!shampo->info) {
+ kvfree(shampo->bitmap);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
+{
+ kvfree(rq->mpwqe.shampo->bitmap);
+ kvfree(rq->mpwqe.shampo->info);
+}
+
static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
{
int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
@@ -233,10 +272,10 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
return 0;
}
-static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
- u64 npages, u8 page_shift,
- struct mlx5_core_mkey *umr_mkey,
- dma_addr_t filler_addr)
+static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
+ u64 npages, u8 page_shift,
+ struct mlx5_core_mkey *umr_mkey,
+ dma_addr_t filler_addr)
{
struct mlx5_mtt *mtt;
int inlen;
@@ -284,12 +323,59 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
return err;
}
+static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
+ u64 nentries,
+ struct mlx5_core_mkey *umr_mkey)
+{
+ int inlen;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(mkc, mkc, translations_octword_size, nentries);
+ MLX5_SET(mkc, mkc, length64, 1);
+ err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
{
u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
- return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
- rq->wqe_overflow.addr);
+ return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT,
+ &rq->umr_mkey, rq->wqe_overflow.addr);
+}
+
+static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq *rq)
+{
+ u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
+ mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
+ max_klm_size, rq->mpwqe.shampo->hd_per_wq);
+ return -EINVAL;
+ }
+ return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+ &rq->mpwqe.shampo->mkey);
}
static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
@@ -403,6 +489,65 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
}
+static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq *rq,
+ u32 *pool_size,
+ int node)
+{
+ void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
+ int wq_size;
+ int err;
+
+ if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ return 0;
+ err = mlx5e_rq_shampo_hd_alloc(rq, node);
+ if (err)
+ goto out;
+ rq->mpwqe.shampo->hd_per_wq =
+ mlx5e_shampo_hd_per_wq(mdev, params, rqp);
+ err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
+ if (err)
+ goto err_shampo_hd;
+ err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
+ if (err)
+ goto err_shampo_info;
+ rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
+ if (!rq->hw_gro_data) {
+ err = -ENOMEM;
+ goto err_hw_gro_data;
+ }
+ rq->mpwqe.shampo->key =
+ cpu_to_be32(rq->mpwqe.shampo->mkey.key);
+ rq->mpwqe.shampo->hd_per_wqe =
+ mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
+ wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
+ *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
+ MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
+ return 0;
+
+err_hw_gro_data:
+ mlx5e_rq_shampo_hd_info_free(rq);
+err_shampo_info:
+ mlx5_core_destroy_mkey(mdev, &rq->mpwqe.shampo->mkey);
+err_shampo_hd:
+ mlx5e_rq_shampo_hd_free(rq);
+out:
+ return err;
+}
+
+static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
+{
+ if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ return;
+
+ kvfree(rq->hw_gro_data);
+ mlx5e_rq_shampo_hd_info_free(rq);
+ mlx5_core_destroy_mkey(rq->mdev, &rq->mpwqe.shampo->mkey);
+ mlx5e_rq_shampo_hd_free(rq);
+}
+
static int mlx5e_alloc_rq(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5e_rq_param *rqp,
@@ -460,6 +605,11 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
err = mlx5e_rq_alloc_mpwqe_info(rq, node);
if (err)
goto err_rq_mkey;
+
+ err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
+ if (err)
+ goto err_free_by_rq_type;
+
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
@@ -512,14 +662,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
if (IS_ERR(rq->page_pool)) {
err = PTR_ERR(rq->page_pool);
rq->page_pool = NULL;
- goto err_free_by_rq_type;
+ goto err_free_shampo;
}
if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
}
if (err)
- goto err_free_by_rq_type;
+ goto err_free_shampo;
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -528,8 +678,10 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
u32 byte_count =
rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
u64 dma_offset = mlx5e_get_mpwqe_offset(i);
+ u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
+ 0 : rq->buff.headroom;
- wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
+ wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
wqe->data[0].byte_count = cpu_to_be32(byte_count);
wqe->data[0].lkey = rq->mkey_be;
} else {
@@ -569,6 +721,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
return 0;
+err_free_shampo:
+ mlx5e_rq_free_shampo(rq);
err_free_by_rq_type:
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -609,6 +763,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
kvfree(rq->mpwqe.info);
mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
mlx5e_free_mpwqe_rq_drop_page(rq);
+ mlx5e_rq_free_shampo(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
kvfree(rq->wqe.frags);
@@ -662,6 +817,12 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ MLX5_SET(wq, wq, log_headers_buffer_entry_num,
+ order_base_2(rq->mpwqe.shampo->hd_per_wq));
+ MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey.key);
+ }
+
mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(wq, wq, pas));
@@ -801,6 +962,15 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
}
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ u16 len;
+
+ len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) &
+ (rq->mpwqe.shampo->hd_per_wq - 1);
+ mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false);
+ rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci;
+ }
+
rq->mpwqe.actual_wq_head = wq->head;
rq->mpwqe.umr_in_progress = 0;
rq->mpwqe.umr_completed = 0;
@@ -826,6 +996,10 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
mlx5_wq_ll_pop(wq, wqe_ix_be,
&wqe->next.next_wqe_index);
}
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq,
+ 0, true);
} else {
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -845,6 +1019,9 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
struct mlx5_core_dev *mdev = rq->mdev;
int err;
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
+
err = mlx5e_alloc_rq(params, xsk, param, node, rq);
if (err)
return err;
@@ -2222,17 +2399,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
chs->num = 0;
}
-static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
+static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
{
struct mlx5e_rx_res *res = priv->rx_res;
- struct mlx5e_lro_param lro_param;
-
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
- return mlx5e_rx_res_lro_set_param(res, &lro_param);
+ return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
}
-static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
struct mlx5e_params *params, u16 mtu)
@@ -3351,16 +3525,59 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
}
new_params = *cur_params;
- new_params.lro_en = enable;
- if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
- mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
- reset = false;
+ if (enable)
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
+ else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ else
+ goto out;
+
+ if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
+ new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
+ if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+ reset = false;
+ }
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+ mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int set_feature_hw_gro(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params new_params;
+ bool reset = true;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+ new_params = priv->channels.params;
+
+ if (enable) {
+ if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
+ err = -EINVAL;
+ goto out;
+ }
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
+ new_params.packet_merge.shampo.match_criteria_type =
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
+ new_params.packet_merge.shampo.alignment_granularity =
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
+ } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ } else {
+ goto out;
}
err = mlx5e_safe_switch_params(priv, &new_params,
- mlx5e_modify_tirs_lro_ctx, NULL, reset);
+ mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
out:
mutex_unlock(&priv->state_lock);
return err;
@@ -3539,6 +3756,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
set_feature_cvlan_filter);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
@@ -3599,6 +3817,10 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
features &= ~NETIF_F_LRO;
}
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
}
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
@@ -3687,7 +3909,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
goto out;
}
- if (params->lro_en)
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
reset = false;
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -4144,8 +4366,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
struct net_device *netdev = priv->netdev;
struct mlx5e_params new_params;
- if (priv->channels.params.lro_en) {
- netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+ if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
return -EINVAL;
}
@@ -4402,9 +4624,10 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
/* No XSK params: checking the availability of striding RQ in general. */
if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- params->lro_en = !slow_pci_heuristic(mdev);
+ params->packet_merge.type = slow_pci_heuristic(mdev) ?
+ MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
}
- params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+ params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
@@ -4539,6 +4762,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+ if (!!MLX5_CAP_GEN(mdev, shampo) &&
+ mlx5e_check_fragmented_striding_rq_cap(mdev))
+ netdev->hw_features |= NETIF_F_GRO_HW;
+
if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
@@ -4589,6 +4816,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (fcs_enabled)
netdev->features &= ~NETIF_F_RXALL;
netdev->features &= ~NETIF_F_LRO;
+ netdev->features &= ~NETIF_F_GRO_HW;
netdev->features &= ~NETIF_F_RXFCS;
#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
@@ -4693,7 +4921,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
enum mlx5e_rx_res_features features;
- struct mlx5e_lro_param lro_param;
int err;
priv->rx_res = mlx5e_rx_res_alloc();
@@ -4711,9 +4938,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
if (priv->channels.params.tunneled_offload_en)
features |= MLX5E_RX_RES_FEATURE_INNER_FT;
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
- priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 0684ac6699b2..5230e0422cae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -793,7 +793,6 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_lro_param lro_param;
int err;
priv->rx_res = mlx5e_rx_res_alloc();
@@ -808,9 +807,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
return err;
}
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
- priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 29a6586ef28d..fe979edd96dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -33,9 +33,12 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
+#include <linux/bitmap.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
+#include <net/udp.h>
+#include <net/tcp.h>
#include "en.h"
#include "en/txrx.h"
#include "en_tc.h"
@@ -62,10 +65,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
.handle_rx_cqe = mlx5e_handle_rx_cqe,
.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
};
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
@@ -185,8 +190,9 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
- INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
- mlx5e_handle_rx_cqe, rq, &cqd->title);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+ rq, &cqd->title);
}
mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
wq->cc = cqcc;
@@ -206,8 +212,9 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
mlx5e_read_title_slot(rq, wq, cc);
mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
mlx5e_decompress_cqe(rq, wq, cc);
- INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
- mlx5e_handle_rx_cqe, rq, &cqd->title);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+ rq, &cqd->title);
cqd->mini_arr_idx++;
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
@@ -448,13 +455,13 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
static inline void
mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
struct mlx5e_dma_info *dma_info,
- int offset_from, u32 headlen)
+ int offset_from, int dma_offset, u32 headlen)
{
const void *from = page_address(dma_info->page) + offset_from;
/* Aligning len to sizeof(long) optimizes memcpy performance */
unsigned int len = ALIGN(headlen, sizeof(long));
- dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
+ dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len,
DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, from, len);
}
@@ -494,6 +501,157 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
mlx5_wq_ll_update_db_record(wq);
}
+/* This function returns the size of the continuous free space inside a bitmap
+ * that starts from first and no longer than len including circular ones.
+ */
+static int bitmap_find_window(unsigned long *bitmap, int len,
+ int bitmap_size, int first)
+{
+ int next_one, count;
+
+ next_one = find_next_bit(bitmap, bitmap_size, first);
+ if (next_one == bitmap_size) {
+ if (bitmap_size - first >= len)
+ return len;
+ next_one = find_next_bit(bitmap, bitmap_size, 0);
+ count = next_one + bitmap_size - first;
+ } else {
+ count = next_one - first;
+ }
+
+ return min(len, count);
+}
+
+static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
+ __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
+{
+ memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+ umr_wqe->ctrl.umr_mkey = key;
+ umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
+ | MLX5E_KLM_UMR_DS_CNT(klm_len));
+ umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+ umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
+ umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
+ struct mlx5e_icosq *sq,
+ u16 klm_entries, u16 index)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries;
+ u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey.key;
+ struct page *page = shampo->last_page;
+ u64 addr = shampo->last_addr;
+ struct mlx5e_dma_info *dma_info;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int headroom;
+
+ headroom = rq->buff.headroom;
+ new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+ entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT);
+ wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
+ pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+ build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+
+ for (i = 0; i < entries; i++, index++) {
+ dma_info = &shampo->info[index];
+ if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
+ MLX5_UMR_KLM_ALIGNMENT))
+ goto update_klm;
+ header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
+ MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
+ if (!(header_offset & (PAGE_SIZE - 1))) {
+ err = mlx5e_page_alloc(rq, dma_info);
+ if (unlikely(err))
+ goto err_unmap;
+ addr = dma_info->addr;
+ page = dma_info->page;
+ } else {
+ dma_info->addr = addr + header_offset;
+ dma_info->page = page;
+ }
+
+update_klm:
+ umr_wqe->inline_klms[i].bcount =
+ cpu_to_be32(MLX5E_RX_MAX_HEAD);
+ umr_wqe->inline_klms[i].key = cpu_to_be32(lkey);
+ umr_wqe->inline_klms[i].va =
+ cpu_to_be64(dma_info->addr + headroom);
+ }
+
+ sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
+ .num_wqebbs = wqe_bbs,
+ .shampo.len = new_entries,
+ };
+
+ shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
+ shampo->last_page = page;
+ shampo->last_addr = addr;
+ sq->pc += wqe_bbs;
+ sq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_unmap:
+ while (--i >= 0) {
+ if (--index < 0)
+ index = shampo->hd_per_wq - 1;
+ dma_info = &shampo->info[index];
+ if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
+ dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
+ mlx5e_page_release(rq, dma_info, true);
+ }
+ }
+ rq->stats->buff_alloc_err++;
+ return err;
+}
+
+static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u16 klm_entries, num_wqe, index, entries_before;
+ struct mlx5e_icosq *sq = rq->icosq;
+ int i, err, max_klm_entries, len;
+
+ max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
+ klm_entries = bitmap_find_window(shampo->bitmap,
+ shampo->hd_per_wqe,
+ shampo->hd_per_wq, shampo->pi);
+ if (!klm_entries)
+ return 0;
+
+ klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
+ index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT);
+ entries_before = shampo->hd_per_wq - index;
+
+ if (unlikely(entries_before < klm_entries))
+ num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
+ DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
+ else
+ num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
+
+ for (i = 0; i < num_wqe; i++) {
+ len = (klm_entries > max_klm_entries) ? max_klm_entries :
+ klm_entries;
+ if (unlikely(index + len > shampo->hd_per_wq))
+ len = shampo->hd_per_wq - index;
+ err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
+ if (unlikely(err))
+ return err;
+ index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
+ klm_entries -= len;
+ }
+
+ return 0;
+}
+
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -514,6 +672,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
goto err;
}
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+ err = mlx5e_alloc_rx_hd_mpwqe(rq);
+ if (unlikely(err))
+ goto err;
+ }
+
pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
@@ -558,6 +722,44 @@ err:
return err;
}
+/* This function is responsible to dealloc SHAMPO header buffer.
+ * close == true specifies that we are in the middle of closing RQ operation so
+ * we go over all the entries and if they are not in use we free them,
+ * otherwise we only go over a specific range inside the header buffer that are
+ * not in use.
+ */
+void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ int hd_per_wq = shampo->hd_per_wq;
+ struct page *deleted_page = NULL;
+ struct mlx5e_dma_info *hd_info;
+ int i, index = start;
+
+ for (i = 0; i < len; i++, index++) {
+ if (index == hd_per_wq)
+ index = 0;
+
+ if (close && !test_bit(index, shampo->bitmap))
+ continue;
+
+ hd_info = &shampo->info[index];
+ hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
+ if (hd_info->page != deleted_page) {
+ deleted_page = hd_info->page;
+ mlx5e_page_release(rq, hd_info, false);
+ }
+ }
+
+ if (start + len > hd_per_wq) {
+ len -= hd_per_wq - start;
+ bitmap_clear(shampo->bitmap, start, hd_per_wq - start);
+ start = 0;
+ }
+
+ bitmap_clear(shampo->bitmap, start, len);
+}
+
static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -629,6 +831,28 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
sq->cc = sqcc;
}
+static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
+ struct mlx5e_shampo_hd *shampo;
+ /* assume 1:1 relationship between RQ and icosq */
+ struct mlx5e_rq *rq = &c->rq;
+ int end, from, len = umr.len;
+
+ shampo = rq->mpwqe.shampo;
+ end = shampo->hd_per_wq;
+ from = shampo->ci;
+ if (from + len > shampo->hd_per_wq) {
+ len -= end - from;
+ bitmap_set(shampo->bitmap, from, end - from);
+ from = 0;
+ }
+
+ bitmap_set(shampo->bitmap, from, len);
+ shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
+}
+
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
{
struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
@@ -685,6 +909,9 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
break;
case MLX5E_ICOSQ_WQE_NOP:
break;
+ case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR:
+ mlx5e_handle_shampo_hd_umr(wi->shampo, sq);
+ break;
#ifdef CONFIG_MLX5_EN_TLS
case MLX5E_ICOSQ_WQE_UMR_TLS:
break;
@@ -782,8 +1009,8 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
if (tcp_ack) {
tcp->ack = 1;
- tcp->ack_seq = cqe->lro_ack_seq_num;
- tcp->window = cqe->lro_tcp_win;
+ tcp->ack_seq = cqe->lro.ack_seq_num;
+ tcp->window = cqe->lro.tcp_win;
}
}
@@ -809,7 +1036,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
tcp = ip_p + sizeof(struct iphdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
- ipv4->ttl = cqe->lro_min_ttl;
+ ipv4->ttl = cqe->lro.min_ttl;
ipv4->tot_len = cpu_to_be16(tot_len);
ipv4->check = 0;
ipv4->check = ip_fast_csum((unsigned char *)ipv4,
@@ -829,7 +1056,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
tcp = ip_p + sizeof(struct ipv6hdr);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
- ipv6->hop_limit = cqe->lro_min_ttl;
+ ipv6->hop_limit = cqe->lro.min_ttl;
ipv6->payload_len = cpu_to_be16(payload_len);
mlx5e_lro_update_tcp_hdr(cqe, tcp);
@@ -841,6 +1068,142 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
}
}
+static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
+{
+ struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
+ u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
+
+ return page_address(last_head->page) + head_offset;
+}
+
+static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
+{
+ int udp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)(skb->data + udp_off);
+ uh->len = htons(skb->len - udp_off);
+
+ if (uh->check)
+ uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr,
+ ipv4->daddr, 0);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6)
+{
+ int udp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct udphdr *uh;
+
+ uh = (struct udphdr *)(skb->data + udp_off);
+ uh->len = htons(skb->len - udp_off);
+
+ if (uh->check)
+ uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr,
+ &ipv6->daddr, 0);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+}
+
+static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct tcphdr *skb_tcp_hd)
+{
+ u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+ struct tcphdr *last_tcp_hd;
+ void *last_hd_addr;
+
+ last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+ last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff;
+ tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+}
+
+static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4,
+ struct mlx5_cqe64 *cqe, bool match)
+{
+ int tcp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct tcphdr *tcp;
+
+ tcp = (struct tcphdr *)(skb->data + tcp_off);
+ if (match)
+ mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+ tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
+ ipv4->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+
+ skb->csum_start = (unsigned char *)tcp - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (tcp->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6,
+ struct mlx5_cqe64 *cqe, bool match)
+{
+ int tcp_off = rq->hw_gro_data->fk.control.thoff;
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct tcphdr *tcp;
+
+ tcp = (struct tcphdr *)(skb->data + tcp_off);
+ if (match)
+ mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
+
+ tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr,
+ &ipv6->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ skb->csum_start = (unsigned char *)tcp - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (tcp->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+}
+
+static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+ bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP));
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (is_ipv4) {
+ int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr);
+ struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff);
+ __be16 newlen = htons(skb->len - nhoff);
+
+ csum_replace2(&ipv4->check, ipv4->tot_len, newlen);
+ ipv4->tot_len = newlen;
+
+ if (ipv4->protocol == IPPROTO_TCP)
+ mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match);
+ else
+ mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4);
+ } else {
+ int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr);
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff);
+
+ ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6));
+
+ if (ipv6->nexthdr == IPPROTO_TCP)
+ mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match);
+ else
+ mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6);
+ }
+}
+
static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
struct sk_buff *skb)
{
@@ -1090,6 +1453,27 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
stats->mcast_packets++;
}
+static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->packets++;
+ stats->gro_packets++;
+ stats->bytes += cqe_bcnt;
+ stats->gro_bytes += cqe_bcnt;
+ if (NAPI_GRO_CB(skb)->count != 1)
+ return;
+ mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+ skb_reset_network_header(skb);
+ if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) {
+ napi_gro_receive(rq->cq.napi, skb);
+ rq->hw_gro_data->skb = NULL;
+ }
+}
+
static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe,
u32 cqe_bcnt,
@@ -1199,7 +1583,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
}
/* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
+ mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset,
+ headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
@@ -1395,6 +1780,30 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
};
#endif
+static void
+mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ u32 data_bcnt, u32 data_offset)
+{
+ net_prefetchw(skb->data);
+
+ while (data_bcnt) {
+ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
+ unsigned int truesize;
+
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+ truesize = pg_consumed_bytes;
+ else
+ truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_frag(rq, skb, di, data_offset,
+ pg_consumed_bytes, truesize);
+
+ data_bcnt -= pg_consumed_bytes;
+ data_offset = 0;
+ di++;
+ }
+}
+
static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
@@ -1420,20 +1829,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
frag_offset -= PAGE_SIZE;
}
- while (byte_cnt) {
- u32 pg_consumed_bytes =
- min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
- unsigned int truesize =
- ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
-
- mlx5e_add_skb_frag(rq, skb, di, frag_offset,
- pg_consumed_bytes, truesize);
- byte_cnt -= pg_consumed_bytes;
- frag_offset = 0;
- di++;
- }
+ mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset);
/* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
+ mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
@@ -1487,6 +1885,181 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return skb;
}
+static void
+mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ struct mlx5_cqe64 *cqe, u16 header_index)
+{
+ struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
+ u16 head_offset = head->addr & (PAGE_SIZE - 1);
+ u16 head_size = cqe->shampo.header_size;
+ u16 rx_headroom = rq->buff.headroom;
+ struct sk_buff *skb = NULL;
+ void *hdr, *data;
+ u32 frag_size;
+
+ hdr = page_address(head->page) + head_offset;
+ data = hdr + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
+
+ if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
+ /* build SKB around header */
+ dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE);
+ prefetchw(hdr);
+ prefetch(data);
+ skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
+
+ if (unlikely(!skb))
+ return;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(head->page);
+
+ } else {
+ /* allocate SKB and copy header for large header */
+ rq->stats->gro_large_hds++;
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(head_size, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return;
+ }
+
+ prefetchw(skb->data);
+ mlx5e_copy_skb_header(rq->pdev, skb, head,
+ head_offset + rx_headroom,
+ rx_headroom, head_size);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += head_size;
+ skb->len += head_size;
+ }
+ rq->hw_gro_data->skb = skb;
+ NAPI_GRO_CB(skb)->count = 1;
+ skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size;
+}
+
+static void
+mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz)
+{
+ skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+ unsigned int frag_size = skb_frag_size(last_frag);
+ unsigned int frag_truesize;
+
+ frag_truesize = ALIGN(frag_size, BIT(log_stride_sz));
+ skb->truesize += frag_truesize - frag_size;
+}
+
+static void
+mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
+{
+ struct sk_buff *skb = rq->hw_gro_data->skb;
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->gro_skbs++;
+ if (likely(skb_shinfo(skb)->nr_frags))
+ mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz);
+ if (NAPI_GRO_CB(skb)->count > 1)
+ mlx5e_shampo_update_hdr(rq, cqe, match);
+ napi_gro_receive(rq->cq.napi, skb);
+ rq->hw_gro_data->skb = NULL;
+}
+
+static bool
+mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
+{
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+
+ return PAGE_SIZE * nr_frags + data_bcnt <= GSO_MAX_SIZE;
+}
+
+static void
+mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
+{
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+ u64 addr = shampo->info[header_index].addr;
+
+ if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
+ shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
+ mlx5e_page_release(rq, &shampo->info[header_index], true);
+ }
+ bitmap_clear(shampo->bitmap, header_index, 1);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+ u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+ u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
+ u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ struct sk_buff **skb = &rq->hw_gro_data->skb;
+ bool flush = cqe->shampo.flush;
+ bool match = cqe->shampo.match;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct mlx5e_rx_wqe_ll *wqe;
+ struct mlx5e_dma_info *di;
+ struct mlx5e_mpw_info *wi;
+ struct mlx5_wq_ll *wq;
+
+ wi = &rq->mpwqe.info[wqe_id];
+ wi->consumed_strides += cstrides;
+
+ if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ trigger_report(rq, cqe);
+ stats->wqe_err++;
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ stats->gro_match_packets += match;
+
+ if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
+ match = false;
+ mlx5e_shampo_flush_skb(rq, cqe, match);
+ }
+
+ if (!*skb) {
+ mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ if (unlikely(!*skb))
+ goto free_hd_entry;
+ } else {
+ NAPI_GRO_CB(*skb)->count++;
+ if (NAPI_GRO_CB(*skb)->count == 2 &&
+ rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) {
+ void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
+ int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff -
+ sizeof(struct iphdr);
+ struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff);
+
+ rq->hw_gro_data->second_ip_id = ntohs(iph->id);
+ }
+ }
+
+ di = &wi->umr.dma_info[page_idx];
+ mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
+
+ mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
+ if (flush)
+ mlx5e_shampo_flush_skb(rq, cqe, match);
+free_hd_entry:
+ mlx5e_free_rx_shampo_hd_entry(rq, header_index);
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
@@ -1579,11 +2152,15 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
mlx5_cqwq_pop(cqwq);
- INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
- mlx5e_handle_rx_cqe, rq, cqe);
+ INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
+ rq, cqe);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
out:
+ if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
+ mlx5e_shampo_flush_skb(rq, NULL, false);
+
if (rcu_access_pointer(rq->xdp_prog))
mlx5e_xdp_rx_poll_complete(rq);
@@ -1784,15 +2361,24 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
- rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
if (mlx5_fpga_is_ipsec_device(mdev)) {
netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
return -EINVAL;
}
- if (!rq->handle_rx_cqe) {
- netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
- return -EINVAL;
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
+ return -EINVAL;
+ }
+ } else {
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
+ if (!rq->handle_rx_cqe) {
+ netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
+ return -EINVAL;
+ }
}
+
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
rq->wqe.skb_from_cqe = xsk ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index e1dd17019030..2a9bfc3ffa2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -128,6 +128,11 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
@@ -313,6 +318,11 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
s->rx_bytes += rq_stats->bytes;
s->rx_lro_packets += rq_stats->lro_packets;
s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_gro_packets += rq_stats->gro_packets;
+ s->rx_gro_bytes += rq_stats->gro_bytes;
+ s->rx_gro_skbs += rq_stats->gro_skbs;
+ s->rx_gro_match_packets += rq_stats->gro_match_packets;
+ s->rx_gro_large_hds += rq_stats->gro_large_hds;
s->rx_ecn_mark += rq_stats->ecn_mark;
s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
s->rx_csum_none += rq_stats->csum_none;
@@ -1760,6 +1770,11 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 139e59f30db0..2c1ed5b81be6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -144,6 +144,11 @@ struct mlx5e_sw_stats {
u64 tx_mpwqe_pkts;
u64 rx_lro_packets;
u64 rx_lro_bytes;
+ u64 rx_gro_packets;
+ u64 rx_gro_bytes;
+ u64 rx_gro_skbs;
+ u64 rx_gro_match_packets;
+ u64 rx_gro_large_hds;
u64 rx_mcast_packets;
u64 rx_ecn_mark;
u64 rx_removed_vlan_packets;
@@ -322,6 +327,11 @@ struct mlx5e_rq_stats {
u64 csum_none;
u64 lro_packets;
u64 lro_bytes;
+ u64 gro_packets;
+ u64 gro_bytes;
+ u64 gro_skbs;
+ u64 gro_match_packets;
+ u64 gro_large_hds;
u64 mcast_packets;
u64 ecn_mark;
u64 removed_vlan_packets;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 1037e3629e7e..2d8406fab844 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -269,6 +269,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, shampo)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 3b8d8ada1a01..84297cc1b509 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -67,7 +67,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
- params->lro_en = false;
+ params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
params->tunneled_offload_en = false;
}
@@ -356,7 +356,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
static int mlx5i_init_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_lro_param lro_param;
int err;
priv->rx_res = mlx5e_rx_res_alloc();
@@ -371,9 +370,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
- priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->max_nch, priv->drop_rq.rqn,
+ &priv->channels.params.packet_merge,
priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
index dea199e79bed..57af962cad29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
@@ -31,7 +31,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
-bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
+static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_LAG_MP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f8446395163a..a92a92a52346 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1417,6 +1417,7 @@ static const int types[] = {
MLX5_CAP_VDPA_EMULATION,
MLX5_CAP_IPSEC,
MLX5_CAP_PORT_SELECTION,
+ MLX5_CAP_DEV_SHAMPO,
};
static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 37f36dad18bd..a241dcf50f39 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -123,6 +123,8 @@ struct device;
*/
unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
+unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node);
+unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node);
void bitmap_free(const unsigned long *bitmap);
/* Managed variants of the above. */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f8a0bbb42c3b..56bcf95d4ab7 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -290,6 +290,7 @@ enum {
MLX5_UMR_INLINE = (1 << 7),
};
+#define MLX5_UMR_KLM_ALIGNMENT 4
#define MLX5_UMR_MTT_ALIGNMENT 0x40
#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
@@ -799,10 +800,23 @@ struct mlx5_cqe64 {
u8 tls_outer_l3_tunneled;
u8 rsvd0;
__be16 wqe_id;
- u8 lro_tcppsh_abort_dupack;
- u8 lro_min_ttl;
- __be16 lro_tcp_win;
- __be32 lro_ack_seq_num;
+ union {
+ struct {
+ u8 tcppsh_abort_dupack;
+ u8 min_ttl;
+ __be16 tcp_win;
+ __be32 ack_seq_num;
+ } lro;
+ struct {
+ u8 reserved0:1;
+ u8 match:1;
+ u8 flush:1;
+ u8 reserved3:5;
+ u8 header_size;
+ __be16 header_entry_index;
+ __be32 data_offset;
+ } shampo;
+ };
__be32 rss_hash_result;
u8 rss_hash_type;
u8 ml_path;
@@ -872,7 +886,7 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
{
- return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
+ return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
}
static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe)
@@ -1186,6 +1200,7 @@ enum mlx5_cap_type {
MLX5_CAP_VDPA_EMULATION = 0x13,
MLX5_CAP_DEV_EVENT = 0x14,
MLX5_CAP_IPSEC,
+ MLX5_CAP_DEV_SHAMPO = 0x1d,
MLX5_CAP_GENERAL_2 = 0x20,
MLX5_CAP_PORT_SELECTION = 0x25,
/* NUM OF CAP Types */
@@ -1431,6 +1446,9 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_IPSEC(mdev, cap)\
MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap)
+#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\
+ MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 746381eccccf..0bb78c04336c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1350,7 +1350,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_b0[0x1];
u8 uplink_follow[0x1];
u8 ts_cqe_to_dest_cqn[0x1];
- u8 reserved_at_b3[0xd];
+ u8 reserved_at_b3[0x7];
+ u8 shampo[0x1];
+ u8 reserved_at_bb[0x5];
u8 max_sgl_for_optimized_performance[0x8];
u8 log_max_cq_sz[0x8];
@@ -1893,7 +1895,21 @@ struct mlx5_ifc_wq_bits {
u8 reserved_at_139[0x4];
u8 log_wqe_stride_size[0x3];
- u8 reserved_at_140[0x4c0];
+ u8 reserved_at_140[0x80];
+
+ u8 headers_mkey[0x20];
+
+ u8 shampo_enable[0x1];
+ u8 reserved_at_1e1[0x4];
+ u8 log_reservation_size[0x3];
+ u8 reserved_at_1e8[0x5];
+ u8 log_max_num_of_packets_per_reservation[0x3];
+ u8 reserved_at_1f0[0x6];
+ u8 log_headers_entry_size[0x2];
+ u8 reserved_at_1f8[0x4];
+ u8 log_headers_buffer_entry_num[0x4];
+
+ u8 reserved_at_200[0x400];
struct mlx5_ifc_cmd_pas_bits pas[];
};
@@ -3169,6 +3185,20 @@ struct mlx5_ifc_roce_addr_layout_bits {
u8 reserved_at_e0[0x20];
};
+struct mlx5_ifc_shampo_cap_bits {
+ u8 reserved_at_0[0x3];
+ u8 shampo_log_max_reservation_size[0x5];
+ u8 reserved_at_8[0x3];
+ u8 shampo_log_min_reservation_size[0x5];
+ u8 shampo_min_mss_size[0x10];
+
+ u8 reserved_at_20[0x3];
+ u8 shampo_max_log_headers_entry_size[0x5];
+ u8 reserved_at_28[0x18];
+
+ u8 reserved_at_40[0x7c0];
+};
+
union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2;
@@ -3187,6 +3217,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_tls_cap_bits tls_cap;
struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
+ struct mlx5_ifc_shampo_cap_bits shampo_cap;
u8 reserved_at_0[0x8000];
};
@@ -3361,8 +3392,9 @@ enum {
};
enum {
- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1,
- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2,
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0),
+ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1),
+ MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO = BIT(2),
};
enum {
@@ -3387,7 +3419,7 @@ struct mlx5_ifc_tirc_bits {
u8 reserved_at_80[0x4];
u8 lro_timeout_period_usecs[0x10];
- u8 lro_enable_mask[0x4];
+ u8 packet_merge_mask[0x4];
u8 lro_max_ip_payload_size[0x8];
u8 reserved_at_a0[0x40];
@@ -3569,6 +3601,18 @@ enum {
MLX5_RQC_STATE_ERR = 0x3,
};
+enum {
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_BYTE = 0x0,
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE = 0x1,
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_PAGE = 0x2,
+};
+
+enum {
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_NO_MATCH = 0x0,
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED = 0x1,
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_FIVE_TUPLE = 0x2,
+};
+
struct mlx5_ifc_rqc_bits {
u8 rlky[0x1];
u8 delay_drop_en[0x1];
@@ -3601,7 +3645,13 @@ struct mlx5_ifc_rqc_bits {
u8 reserved_at_c0[0x10];
u8 hairpin_peer_vhca[0x10];
- u8 reserved_at_e0[0xa0];
+ u8 reserved_at_e0[0x46];
+ u8 shampo_no_match_alignment_granularity[0x2];
+ u8 reserved_at_128[0x6];
+ u8 shampo_match_criteria_type[0x2];
+ u8 reservation_timeout[0x10];
+
+ u8 reserved_at_140[0x40];
struct mlx5_ifc_wq_bits wq;
};
@@ -6657,7 +6707,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
u8 reserved_at_3c[0x1];
u8 hash[0x1];
u8 reserved_at_3e[0x1];
- u8 lro[0x1];
+ u8 packet_merge[0x1];
};
struct mlx5_ifc_modify_tir_out_bits {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 663dd81967d4..926408883456 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1398,6 +1398,19 @@ unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags)
}
EXPORT_SYMBOL(bitmap_zalloc);
+unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node)
+{
+ return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long),
+ flags, node);
+}
+EXPORT_SYMBOL(bitmap_alloc_node);
+
+unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node)
+{
+ return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(bitmap_zalloc_node);
+
void bitmap_free(const unsigned long *bitmap)
{
kfree(bitmap);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4e3d19a06de4..e8754560e641 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9922,6 +9922,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
+ netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
if (features & NETIF_F_HW_TLS_TX) {
bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);