diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2024-06-06 06:13:10 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2024-06-06 06:20:47 +0300 |
| commit | 7da375e2c7e023957b71fce44a72107559cfa6d0 (patch) | |
| tree | ac187f11c0650989716a44f6f752477936b4b6df /include/linux | |
| parent | ed20142ed68c2b8819120508bc029e84d13cfe63 (diff) | |
| parent | 14ae2fd12be8c5089e43fee8a21cd8631699b97a (diff) | |
| download | linux-7da375e2c7e023957b71fce44a72107559cfa6d0.tar.xz | |
Merge branch 'net-mlx5e-shampo-enable-hw-gro-once-more'
Tariq Toukan says:
====================
net/mlx5e: SHAMPO, Enable HW GRO once more
This series enables hardware GRO for ConnectX-7 and newer NICs.
SHAMPO stands for Split Header And Merge Payload Offload.
The first part of the series contains important fixes and improvements.
The second part reworks the HW GRO counters.
Lastly, HW GRO is perf optimized and enabled.
Here are the bandwidth numbers for a simple iperf3 test over a single rq
where the application and irq are pinned to the same CPU:
+---------+--------+--------+-----------+-------------+
| streams | SW GRO | HW GRO | Unit | Improvement |
+---------+--------+--------+-----------+-------------+
| 1 | 36 | 57 | Gbits/sec | 1.6 x |
| 4 | 34 | 50 | Gbits/sec | 1.5 x |
| 8 | 31 | 43 | Gbits/sec | 1.4 x |
+---------+--------+--------+-----------+-------------+
Benchmark details:
VM based setup
CPU: Intel(R) Xeon(R) Platinum 8380 CPU, 24 cores
NIC: ConnectX-7 100GbE
iperf3 and irq running on same CPU over a single receive queue
====================
Link: https://lore.kernel.org/r/20240603212219.1037656-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/mlx5/device.h | 1 | ||||
| -rw-r--r-- | include/linux/mlx5/mlx5_ifc.h | 16 |
2 files changed, 12 insertions, 5 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d7bb31d9a446..da09bfaa7b81 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -294,6 +294,7 @@ enum { #define MLX5_UMR_FLEX_ALIGNMENT 0x40 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) +#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm)) #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..17acd0f3ca8e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0x6]; u8 go_back_n[0x1]; - u8 shampo[0x1]; - u8 reserved_at_bb[0x5]; + u8 reserved_at_ba[0x6]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0x10]; + u8 reserved_at_2a0[0xb]; + u8 shampo[0x1]; + u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; u8 max_flow_counter_31_16[0x10]; @@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_250[0x10]; u8 reserved_at_260[0x120]; - u8 reserved_at_380[0x10]; + u8 reserved_at_380[0xb]; + u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; u8 reserved_at_3a0[0x10]; @@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x1]; + u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1]; + u8 reserved_at_402[0x1e]; + + u8 reserved_at_420[0x3e0]; }; enum mlx5_ifc_flow_destination_type { |
